diff options
Diffstat (limited to 'src/Text')
124 files changed, 6294 insertions, 5922 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 0d34eca11..ecbdeecd8 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} {- | @@ -58,7 +59,7 @@ import Text.Pandoc.Readers.Markdown (yamlToMeta) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput, - defaultUserDataDirs) + defaultUserDataDirs, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString) import qualified Text.Pandoc.UTF8 as UTF8 #ifndef _WINDOWS @@ -66,7 +67,6 @@ import System.Posix.IO (stdOutput) import System.Posix.Terminal (queryTerminal) #endif - convertWithOpts :: Opt -> IO () convertWithOpts opts = do let outputFile = fromMaybe "-" (optOutputFile opts) @@ -141,17 +141,17 @@ convertWithOpts opts = do Nothing -> case formatFromFilePaths sources of Just f' -> return f' Nothing | sources == ["-"] -> return "markdown" - | any isURI sources -> return "html" + | any (isURI . T.pack) sources -> return "html" | otherwise -> do report $ CouldNotDeduceFormat - (map takeExtension sources) "markdown" + (map (T.pack . takeExtension) sources) "markdown" return "markdown" let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" when (pdfOutput && readerName == "latex") $ case (optInputFiles opts) of - (inputFile:_) -> report $ UnusualConversion $ + (inputFile:_) -> report $ UnusualConversion $ T.pack $ "to convert a .tex file to PDF, you get better results by using pdflatex " <> "(or lualatex or xelatex) directly, try `pdflatex " <> inputFile <> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`." @@ -187,15 +187,15 @@ convertWithOpts opts = do when ((pdfOutput || not (isTextFormat format)) && istty && isNothing ( optOutputFile opts)) $ throwError $ PandocAppError $ - "Cannot write " ++ format ++ " output to terminal.\n" ++ - "Specify an output file using the -o option, or " ++ + "Cannot write " <> format <> " output to terminal.\n" <> + "Specify an output file using the -o option, or " <> "use '-o -' to force output to stdout." - abbrevs <- Set.fromList . filter (not . null) . lines <$> + abbrevs <- Set.fromList . filter (not . T.null) . T.lines <$> case optAbbreviations opts of - Nothing -> UTF8.toString <$> readDataFile "abbreviations" - Just f -> UTF8.toString <$> readFileStrict f + Nothing -> UTF8.toText <$> readDataFile "abbreviations" + Just f -> UTF8.toText <$> readFileStrict f metadata <- if format == "jats" && isNothing (lookupMeta "csl" (optMetadata opts)) && @@ -285,7 +285,7 @@ convertWithOpts opts = do >=> return . adjustMetadata (metadataFromFile <>) >=> return . adjustMetadata (<> metadata) >=> applyTransforms transforms - >=> applyFilters readerOpts filters' [format] + >=> applyFilters readerOpts filters' [T.unpack format] >=> maybe return extractMedia (optExtractMedia opts) ) @@ -298,7 +298,7 @@ convertWithOpts opts = do case res of Right pdf -> writeFnBinary outputFile pdf Left err' -> throwError $ PandocPDFError $ - TL.unpack (TE.decodeUtf8With TE.lenientDecode err') + TL.toStrict (TE.decodeUtf8With TE.lenientDecode err') Nothing -> do let ensureNl t @@ -308,18 +308,16 @@ convertWithOpts opts = do output <- ensureNl <$> f writerOptions doc writerFn eol outputFile =<< if optSelfContained opts && htmlFormat format - -- TODO not maximally efficient; change type - -- of makeSelfContained so it works w/ Text - then T.pack <$> makeSelfContained (T.unpack output) + then makeSelfContained output else return output type Transform = Pandoc -> Pandoc -htmlFormat :: String -> Bool +htmlFormat :: Text -> Bool htmlFormat = (`elem` ["html","html4","html5","s5","slidy", "slideous","dzslides","revealjs"]) -isTextFormat :: String -> Bool +isTextFormat :: Text -> Bool isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"] adjustMetadata :: (Meta -> Meta) -> Pandoc -> Pandoc @@ -335,7 +333,7 @@ readSource src = case parseURI src of Just u | uriScheme u `elem` ["http:","https:"] -> readURI src | uriScheme u == "file:" -> liftIO $ - readTextFile (uriPathToPath $ uriPath u) + readTextFile (uriPathToPath $ T.pack $ uriPath u) _ -> liftIO $ readTextFile src where readTextFile :: FilePath -> IO Text readTextFile fp = do @@ -347,12 +345,12 @@ readSource src = case parseURI src of TSE.DecodeError _ (Just w) -> do case BS.elemIndex w bs of Just offset -> E.throwIO $ - PandocUTF8DecodingError fp offset w - _ -> E.throwIO $ PandocUTF8DecodingError fp 0 w - _ -> E.throwIO $ PandocAppError (show e)) + PandocUTF8DecodingError (T.pack fp) offset w + _ -> E.throwIO $ PandocUTF8DecodingError (T.pack fp) 0 w + _ -> E.throwIO $ PandocAppError (tshow e)) readURI :: FilePath -> PandocIO Text -readURI src = UTF8.toText . fst <$> openURL src +readURI src = UTF8.toText . fst <$> openURL (T.pack src) readFile' :: MonadIO m => FilePath -> m BL.ByteString readFile' "-" = liftIO BL.getContents diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index c6f88af24..56b1f780a 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -2,6 +2,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.App.CommandLineOptions Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -45,7 +46,7 @@ import Text.Pandoc.Builder (setMeta) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..)) import Text.Pandoc.Filter (Filter (..)) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Shared (ordNub, safeRead, defaultUserDataDirs) +import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs) import Text.Printf #ifdef EMBED_DATA_FILES @@ -78,7 +79,7 @@ parseOptions options' defaults = do unrecognizedOpts unless (null errors && null unknownOptionErrors) $ - E.throwIO $ PandocOptionError $ + E.throwIO $ PandocOptionError $ T.pack $ concat errors ++ unlines unknownOptionErrors ++ ("Try " ++ prg ++ " --help for more information.") @@ -92,7 +93,7 @@ latexEngines = ["pdflatex", "lualatex", "xelatex", "latexmk", "tectonic"] htmlEngines :: [String] htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"] -engines :: [(String, String)] +engines :: [(Text, String)] engines = map ("html",) htmlEngines ++ map ("html5",) htmlEngines ++ map ("latex",) latexEngines ++ @@ -119,13 +120,13 @@ options = [ Option "fr" ["from","read"] (ReqArg (\arg opt -> return opt { optFrom = - Just (map toLower arg) }) + Just (T.toLower $ T.pack arg) }) "FORMAT") "" , Option "tw" ["to","write"] (ReqArg - (\arg opt -> return opt { optTo = Just arg }) + (\arg opt -> return opt { optTo = Just $ T.pack arg }) "FORMAT") "" @@ -218,7 +219,7 @@ options = , Option "" ["toc-depth"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t >= 1 && t <= 6 -> return opt { optTOCDepth = t } _ -> E.throwIO $ PandocOptionError @@ -234,7 +235,7 @@ options = , Option "" ["number-offset"] (ReqArg (\arg opt -> - case safeRead ('[':arg ++ "]") of + case safeStrRead ("[" <> arg <> "]") of Just ns -> return opt { optNumberOffset = ns, optNumberSections = True } _ -> E.throwIO $ PandocOptionError @@ -255,7 +256,7 @@ options = "default" -> return opt{ optTopLevelDivision = TopLevelDefault } _ -> E.throwIO $ PandocOptionError $ - "Top-level division must be " ++ + "Top-level division must be " <> "section, chapter, part, or default" ) "section|chapter|part") "" -- "Use top-level division type in LaTeX, ConTeXt, DocBook" @@ -307,7 +308,7 @@ options = , Option "" ["highlight-style"] (ReqArg (\arg opt -> - return opt{ optHighlightStyle = Just arg }) + return opt{ optHighlightStyle = Just $ T.pack arg }) "STYLE|FILE") "" -- "Style for highlighted code" @@ -328,7 +329,7 @@ options = , Option "" ["dpi"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t > 0 -> return opt { optDpi = t } _ -> E.throwIO $ PandocOptionError "dpi must be a number greater than 0") @@ -351,7 +352,7 @@ options = , Option "" ["columns"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t > 0 -> return opt { optColumns = t } _ -> E.throwIO $ PandocOptionError "columns must be a number greater than 0") @@ -366,7 +367,7 @@ options = , Option "" ["tab-stop"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t > 0 -> return opt { optTabStop = t } _ -> E.throwIO $ PandocOptionError "tab-stop must be a number greater than 0") @@ -379,7 +380,7 @@ options = let b = takeBaseName arg if b `elem` pdfEngines then return opt { optPdfEngine = Just arg } - else E.throwIO $ PandocOptionError $ "pdf-engine must be one of " + else E.throwIO $ PandocOptionError $ T.pack $ "pdf-engine must be one of " ++ intercalate ", " pdfEngines) "PROGRAM") "" -- "Name of program to use in generating PDF" @@ -410,7 +411,7 @@ options = (\arg opt -> do let (key, val) = splitField arg return opt{ optRequestHeaders = - (key, val) : optRequestHeaders opt }) + (T.pack key, T.pack val) : optRequestHeaders opt }) "NAME:VALUE") "" @@ -422,14 +423,15 @@ options = , Option "" ["indented-code-classes"] (ReqArg - (\arg opt -> return opt { optIndentedCodeClasses = words $ - map (\c -> if c == ',' then ' ' else c) arg }) + (\arg opt -> return opt { optIndentedCodeClasses = T.words $ + T.map (\c -> if c == ',' then ' ' else c) $ + T.pack arg }) "STRING") "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" , Option "" ["default-image-extension"] (ReqArg - (\arg opt -> return opt { optDefaultImageExtension = arg }) + (\arg opt -> return opt { optDefaultImageExtension = T.pack arg }) "extension") "" -- "Default extension for extensionless images" @@ -450,7 +452,7 @@ options = , Option "" ["shift-heading-level-by"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t -> return opt{ optShiftHeadingLevelBy = t } _ -> E.throwIO $ PandocOptionError @@ -463,7 +465,7 @@ options = (\arg opt -> do deprecatedOption "--base-header-level" "Use --shift-heading-level-by instead." - case safeRead arg of + case safeStrRead arg of Just t | t > 0 && t < 6 -> return opt{ optShiftHeadingLevelBy = t - 1 } _ -> E.throwIO $ PandocOptionError @@ -486,7 +488,7 @@ options = "accept" -> return AcceptChanges "reject" -> return RejectChanges "all" -> return AllChanges - _ -> E.throwIO $ PandocOptionError + _ -> E.throwIO $ PandocOptionError $ T.pack ("Unknown option for track-changes: " ++ arg) return opt { optTrackChanges = action }) "accept|reject|all") @@ -509,7 +511,7 @@ options = "block" -> return EndOfBlock "section" -> return EndOfSection "document" -> return EndOfDocument - _ -> E.throwIO $ PandocOptionError + _ -> E.throwIO $ PandocOptionError $ T.pack ("Unknown option for reference-location: " ++ arg) return opt { optReferenceLocation = action }) "block|section|document") @@ -533,7 +535,7 @@ options = , Option "" ["slide-level"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t >= 1 && t <= 6 -> return opt { optSlideLevel = Just t } _ -> E.throwIO $ PandocOptionError @@ -559,7 +561,7 @@ options = "references" -> return ReferenceObfuscation "javascript" -> return JavascriptObfuscation "none" -> return NoObfuscation - _ -> E.throwIO $ PandocOptionError + _ -> E.throwIO $ PandocOptionError $ T.pack ("Unknown obfuscation method: " ++ arg) return opt { optEmailObfuscation = method }) "none|javascript|references") @@ -567,7 +569,7 @@ options = , Option "" ["id-prefix"] (ReqArg - (\arg opt -> return opt { optIdentifierPrefix = arg }) + (\arg opt -> return opt { optIdentifierPrefix = T.pack arg }) "STRING") "" -- "Prefix to add to automatically generated HTML identifiers" @@ -620,7 +622,7 @@ options = , Option "" ["epub-chapter-level"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t >= 1 && t <= 6 -> return opt { optEpubChapterLevel = t } _ -> E.throwIO $ PandocOptionError @@ -685,15 +687,15 @@ options = (OptArg (\arg opt -> do let url' = fromMaybe "https://latex.codecogs.com/png.latex?" arg - return opt { optHTMLMathMethod = WebTeX url' }) + return opt { optHTMLMathMethod = WebTeX $ T.pack url' }) "URL") "" -- "Use web service for HTML math" , Option "" ["mathjax"] (OptArg (\arg opt -> do - let url' = fromMaybe (defaultMathJaxURL ++ - "tex-mml-chtml.js") arg + let url' = maybe (defaultMathJaxURL <> + "tex-mml-chtml.js") T.pack arg return opt { optHTMLMathMethod = MathJax url'}) "URL") "" -- "Use MathJax for HTML math" @@ -703,7 +705,7 @@ options = (\arg opt -> return opt { optHTMLMathMethod = KaTeX $ - fromMaybe defaultKaTeXURL arg }) + maybe defaultKaTeXURL T.pack arg }) "URL") "" -- Use KaTeX for HTML Math @@ -763,7 +765,7 @@ options = UTF8.hPutStrLn stdout $ printf tpl allopts (unwords readersNames) (unwords writersNames) - (unwords $ map fst highlightingStyles) + (unwords $ map (T.unpack . fst) highlightingStyles) (unwords datafiles) exitSuccess )) "" -- "Print bash completion script" @@ -790,12 +792,12 @@ options = let allExts = case arg of Nothing -> extensionsFromList extList - Just fmt -> getAllExtensions fmt + Just fmt -> getAllExtensions $ T.pack fmt let defExts = case arg of Nothing -> getDefaultExtensions "markdown" - Just fmt -> getDefaultExtensions fmt + Just fmt -> getDefaultExtensions $ T.pack fmt let showExt x = (if extensionEnabled x defExts then '+' @@ -823,7 +825,7 @@ options = , Option "" ["list-highlight-styles"] (NoArg (\_ -> do - mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles + mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles exitSuccess )) "" @@ -835,11 +837,11 @@ options = Nothing -> UTF8.hPutStr stdout templ <- runIO $ do setUserDataDir Nothing - getDefaultTemplate arg + getDefaultTemplate (T.pack arg) case templ of Right t | T.null t -> -- e.g. for docx, odt, json: - E.throwIO $ PandocCouldNotFindDataFileError + E.throwIO $ PandocCouldNotFindDataFileError $ T.pack ("templates/default." ++ arg) | otherwise -> write . T.unpack $ t Left e -> E.throwIO e @@ -890,7 +892,7 @@ options = (\_ -> do prg <- getProgName defaultDatadirs <- defaultUserDataDirs - UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ + UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++ compileInfo ++ "\nDefault user data directory: " ++ intercalate " or " defaultDatadirs ++ ('\n':copyrightMessage)) @@ -963,14 +965,14 @@ handleUnrecognizedOption x = (("Unknown option " ++ x ++ ".") :) readersNames :: [String] -readersNames = sort (map fst (readers :: [(String, Reader PandocIO)])) +readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)])) writersNames :: [String] -writersNames = sort (map fst (writers :: [(String, Writer PandocIO)])) +writersNames = sort (map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)])) splitField :: String -> (String, String) splitField s = - case break (`elem` ":=") s of + case break (`elemText` ":=") s of (k,_:v) -> (k,v) (k,[]) -> (k,"true") @@ -991,7 +993,7 @@ applyDefaults opt file = runIOorExplode $ do case Y.decode1 inp of Right (f :: Opt -> Opt) -> return $ f opt Left (errpos, errmsg) -> throwError $ - PandocParseError $ + PandocParseError $ T.pack $ "Error parsing " ++ fp' ++ " line " ++ show (Y.posLine errpos) ++ " column " ++ show (Y.posColumn errpos) ++ ":\n" ++ errmsg @@ -1001,18 +1003,18 @@ lookupHighlightStyle s | takeExtension s == ".theme" = -- attempt to load KDE theme do contents <- readFileLazy s case parseTheme contents of - Left _ -> throwError $ PandocOptionError $ + Left _ -> throwError $ PandocOptionError $ T.pack $ "Could not read highlighting theme " ++ s Right sty -> return sty | otherwise = - case lookup (map toLower s) highlightingStyles of + case lookup (T.toLower $ T.pack s) highlightingStyles of Just sty -> return sty - Nothing -> throwError $ PandocOptionError $ + Nothing -> throwError $ PandocOptionError $ T.pack $ "Unknown highlight-style " ++ s deprecatedOption :: String -> String -> IO () deprecatedOption o msg = - runIO (report $ Deprecated o msg) >>= + runIO (report $ Deprecated (T.pack o) (T.pack msg)) >>= \r -> case r of Right () -> return () Left e -> E.throwIO e @@ -1024,13 +1026,14 @@ setVariable key val (Context ctx) = addMeta :: String -> String -> Meta -> Meta addMeta k v meta = - case lookupMeta k meta of - Nothing -> setMeta k v' meta + case lookupMeta k' meta of + Nothing -> setMeta k' v' meta Just (MetaList xs) -> - setMeta k (MetaList (xs ++ [v'])) meta - Just x -> setMeta k (MetaList [x, v']) meta + setMeta k' (MetaList (xs ++ [v'])) meta + Just x -> setMeta k' (MetaList [x, v']) meta where v' = readMetaValue v + k' = T.pack k readMetaValue :: String -> MetaValue readMetaValue s @@ -1040,7 +1043,7 @@ readMetaValue s | s == "false" = MetaBool False | s == "False" = MetaBool False | s == "FALSE" = MetaBool False - | otherwise = MetaString s + | otherwise = MetaString $ T.pack s -- On Windows with ghc 8.6+, we need to rewrite paths -- beginning with \\ to \\?\UNC\. -- See #5127. diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs index a02d8d15e..25e0a303e 100644 --- a/src/Text/Pandoc/App/FormatHeuristics.hs +++ b/src/Text/Pandoc/App/FormatHeuristics.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.App.FormatHeuristics Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -16,10 +17,11 @@ module Text.Pandoc.App.FormatHeuristics import Prelude import Data.Char (toLower) +import Data.Text (Text) import System.FilePath (takeExtension) -- Determine default format based on file extensions. -formatFromFilePaths :: [FilePath] -> Maybe String +formatFromFilePaths :: [FilePath] -> Maybe Text formatFromFilePaths [] = Nothing formatFromFilePaths (x:xs) = case formatFromFilePath x of @@ -27,7 +29,7 @@ formatFromFilePaths (x:xs) = Nothing -> formatFromFilePaths xs -- Determine format based on file extension -formatFromFilePath :: FilePath -> Maybe String +formatFromFilePath :: FilePath -> Maybe Text formatFromFilePath x = case takeExtension (map toLower x) of ".adoc" -> Just "asciidoc" diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index 6db397147..c0d06e0f4 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -33,7 +33,7 @@ import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault), ReferenceLocation (EndOfDocument), ObfuscationMethod (NoObfuscation), CiteMethod (Citeproc)) -import Text.Pandoc.Shared (camelCaseToHyphenated) +import Text.Pandoc.Shared (camelCaseStrToHyphenated) import Text.DocLayout (render) import Text.DocTemplates (Context(..), Val(..)) import Data.Text (Text, unpack) @@ -76,8 +76,8 @@ data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces , optStandalone :: Bool -- ^ Include header, footer - , optFrom :: Maybe String -- ^ Reader format - , optTo :: Maybe String -- ^ Writer format + , optFrom :: Maybe Text -- ^ Reader format + , optTo :: Maybe Text -- ^ Writer format , optTableOfContents :: Bool -- ^ Include table of contents , optShiftHeadingLevelBy :: Int -- ^ Shift heading level by , optTemplate :: Maybe FilePath -- ^ Custom template @@ -92,7 +92,7 @@ data Opt = Opt , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 , optSelfContained :: Bool -- ^ Make HTML accessible offline , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML - , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code + , optHighlightStyle :: Maybe Text -- ^ Style to use for highlighted code , optSyntaxDefinitions :: [FilePath] -- ^ xml syntax defs to load , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math @@ -117,9 +117,9 @@ data Opt = Opt , optColumns :: Int -- ^ Line length in characters , optFilters :: [Filter] -- ^ Filters to apply , optEmailObfuscation :: ObfuscationMethod - , optIdentifierPrefix :: String + , optIdentifierPrefix :: Text , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs - , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks + , optIndentedCodeClasses :: [Text] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites , optListings :: Bool -- ^ Use listings package for code blocks @@ -128,18 +128,18 @@ data Opt = Opt , optSlideLevel :: Maybe Int -- ^ Header level that creates slides , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 , optAscii :: Bool -- ^ Prefer ascii output - , optDefaultImageExtension :: String -- ^ Default image extension + , optDefaultImageExtension :: Text -- ^ Default image extension , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. , optFileScope :: Bool -- ^ Parse input files before combining - , optTitlePrefix :: Maybe String -- ^ Prefix for title + , optTitlePrefix :: Maybe Text -- ^ Prefix for title , optCss :: [FilePath] -- ^ CSS files to link to , optIpynbOutput :: IpynbOutput -- ^ How to treat ipynb output blocks , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body , optIncludeInHeader :: [FilePath] -- ^ Files to include in header , optResourcePath :: [FilePath] -- ^ Path to search for images etc - , optRequestHeaders :: [(String, String)] -- ^ Headers for HTTP requests + , optRequestHeaders :: [(Text, Text)] -- ^ Headers for HTTP requests , optEol :: LineEnding -- ^ Style of line-endings to use , optStripComments :: Bool -- ^ Skip HTML comments } deriving (Generic, Show) @@ -167,13 +167,13 @@ doOpt (k',v) = do "toc" -> parseYAML v >>= \x -> return (\o -> o{ optTableOfContents = x }) "from" -> - parseYAML v >>= \x -> return (\o -> o{ optFrom = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optFrom = x }) "reader" -> - parseYAML v >>= \x -> return (\o -> o{ optFrom = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optFrom = x }) "to" -> - parseYAML v >>= \x -> return (\o -> o{ optTo = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optTo = x }) "writer" -> - parseYAML v >>= \x -> return (\o -> o{ optTo = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optTo = x }) "shift-heading-level-by" -> parseYAML v >>= \x -> return (\o -> o{ optShiftHeadingLevelBy = x }) "template" -> @@ -211,7 +211,7 @@ doOpt (k',v) = do "html-q-tags" -> parseYAML v >>= \x -> return (\o -> o{ optHtmlQTags = x }) "highlight-style" -> - parseYAML v >>= \x -> return (\o -> o{ optHighlightStyle = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optHighlightStyle = x }) "syntax-definition" -> (parseYAML v >>= \x -> return (\o -> o{ optSyntaxDefinitions = map unpack x })) @@ -274,12 +274,12 @@ doOpt (k',v) = do parseYAML v >>= \x -> return (\o -> o{ optEmailObfuscation = x }) "identifier-prefix" -> parseYAML v >>= \x -> - return (\o -> o{ optIdentifierPrefix = unpack x }) + return (\o -> o{ optIdentifierPrefix = x }) "strip-empty-paragraphs" -> parseYAML v >>= \x -> return (\o -> o{ optStripEmptyParagraphs = x }) "indented-code-classes" -> parseYAML v >>= \x -> - return (\o -> o{ optIndentedCodeClasses = map unpack x }) + return (\o -> o{ optIndentedCodeClasses = x }) "data-dir" -> parseYAML v >>= \x -> return (\o -> o{ optDataDir = unpack <$> x }) "cite-method" -> @@ -305,7 +305,7 @@ doOpt (k',v) = do parseYAML v >>= \x -> return (\o -> o{ optAscii = x }) "default-image-extension" -> parseYAML v >>= \x -> - return (\o -> o{ optDefaultImageExtension = unpack x }) + return (\o -> o{ optDefaultImageExtension = x }) "extract-media" -> parseYAML v >>= \x -> return (\o -> o{ optExtractMedia = unpack <$> x }) @@ -314,7 +314,7 @@ doOpt (k',v) = do "file-scope" -> parseYAML v >>= \x -> return (\o -> o{ optFileScope = x }) "title-prefix" -> - parseYAML v >>= \x -> return (\o -> o{ optTitlePrefix = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optTitlePrefix = x }) "css" -> (parseYAML v >>= \x -> return (\o -> o{ optCss = map unpack x })) <|> @@ -344,9 +344,7 @@ doOpt (k',v) = do return (\o -> o{ optResourcePath = map unpack x }) "request-headers" -> parseYAML v >>= \x -> - return (\o -> o{ optRequestHeaders = - map (\(key,val) -> - (unpack key, unpack val)) x }) + return (\o -> o{ optRequestHeaders = x }) "eol" -> parseYAML v >>= \x -> return (\o -> o{ optEol = x }) "strip-comments" -> @@ -429,13 +427,13 @@ defaultOpts = Opt contextToMeta :: Context Text -> Meta contextToMeta (Context m) = - Meta . M.mapKeys unpack . M.map valToMetaVal $ m + Meta . M.map valToMetaVal $ m valToMetaVal :: Val Text -> MetaValue valToMetaVal (MapVal (Context m)) = - MetaMap . M.mapKeys unpack . M.map valToMetaVal $ m + MetaMap . M.map valToMetaVal $ m valToMetaVal (ListVal xs) = MetaList $ map valToMetaVal xs -valToMetaVal (SimpleVal d) = MetaString (unpack $ render Nothing d) +valToMetaVal (SimpleVal d) = MetaString $ render Nothing d valToMetaVal NullVal = MetaString "" -- see https://github.com/jgm/pandoc/pull/4083 @@ -446,5 +444,5 @@ $(deriveJSON defaultOptions{ fieldLabelModifier = map toLower } ''LineEnding) $(deriveJSON defaultOptions{ fieldLabelModifier = - camelCaseToHyphenated . dropWhile isLower + camelCaseStrToHyphenated . dropWhile isLower } ''Opt) diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index b29860c03..d328a9b6a 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -1,6 +1,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {- | @@ -27,7 +28,7 @@ import Control.Monad import Control.Monad.Except (catchError, throwError) import Control.Monad.Trans import Data.Char (toLower) -import Data.List (find, isPrefixOf, isSuffixOf) +import Data.List (find, isPrefixOf) import Data.Maybe (fromMaybe) import Skylighting (defaultSyntaxMap) import Skylighting.Parser (addSyntaxDefinition, parseSyntaxDefinition) @@ -42,18 +43,18 @@ import Text.Pandoc.App.CommandLineOptions (engines, lookupHighlightStyle, setVariable) import qualified Text.Pandoc.UTF8 as UTF8 +readUtf8File :: PandocMonad m => FilePath -> m T.Text +readUtf8File = fmap UTF8.toText . readFileStrict + -- | Settings specifying how document output should be produced. data OutputSettings = OutputSettings - { outputFormat :: String + { outputFormat :: T.Text , outputWriter :: Writer PandocIO - , outputWriterName :: String + , outputWriterName :: T.Text , outputWriterOptions :: WriterOptions , outputPdfProgram :: Maybe String } -readUtf8File :: PandocMonad m => FilePath -> m String -readUtf8File = fmap UTF8.toString . readFileStrict - -- | Get output settings from command line options. optToOutputSettings :: Opt -> PandocIO OutputSettings optToOutputSettings opts = do @@ -85,33 +86,33 @@ optToOutputSettings opts = do case formatFromFilePaths [outputFile] of Nothing -> do report $ CouldNotDeduceFormat - [takeExtension outputFile] "html" + [T.pack $ takeExtension outputFile] "html" return ("html", Nothing) Just f -> return (f, Nothing) - let format = if ".lua" `isSuffixOf` writerName + let format = if ".lua" `T.isSuffixOf` writerName then writerName - else map toLower $ baseWriterName writerName + else T.toLower $ baseWriterName writerName (writer :: Writer PandocIO, writerExts) <- - if ".lua" `isSuffixOf` format + if ".lua" `T.isSuffixOf` format then return (TextWriter - (\o d -> writeCustom writerName o d) + (\o d -> writeCustom (T.unpack writerName) o d) :: Writer PandocIO, mempty) - else getWriter (map toLower writerName) + else getWriter (T.toLower writerName) let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput let addSyntaxMap existingmap f = do res <- liftIO (parseSyntaxDefinition f) case res of - Left errstr -> throwError $ PandocSyntaxMapError errstr + Left errstr -> throwError $ PandocSyntaxMapError $ T.pack errstr Right syn -> return $ addSyntaxDefinition syn existingmap syntaxMap <- foldM addSyntaxMap defaultSyntaxMap (optSyntaxDefinitions opts) - hlStyle <- maybe (return Nothing) (fmap Just . lookupHighlightStyle) + hlStyle <- maybe (return Nothing) (fmap Just . lookupHighlightStyle . T.unpack) (optHighlightStyle opts) let setVariableM k v = return . setVariable k v @@ -135,15 +136,15 @@ optToOutputSettings opts = do >>= setVariableM "outputfile" outputFile >>= - setFilesVariableM "include-before" (optIncludeBeforeBody opts) + setFilesVariableM "include-before" (T.pack <$> optIncludeBeforeBody opts) >>= - setFilesVariableM "include-after" (optIncludeAfterBody opts) + setFilesVariableM "include-after" (T.pack <$> optIncludeAfterBody opts) >>= - setFilesVariableM "header-includes" (optIncludeInHeader opts) + setFilesVariableM "header-includes" (T.pack <$> optIncludeInHeader opts) >>= setListVariableM "css" (optCss opts) >>= - maybe return (setVariableM "title-prefix") + maybe return (setVariableM "title-prefix" . T.unpack) (optTitlePrefix opts) >>= maybe return (setVariableM "epub-cover-image") @@ -168,7 +169,7 @@ optToOutputSettings opts = do Just tp -> do -- strip off extensions let tp' = case takeExtension tp of - "" -> tp <.> format + "" -> tp <.> T.unpack format _ -> tp Just . UTF8.toText <$> ((do surl <- stSourceURL <$> getCommonState @@ -176,7 +177,7 @@ optToOutputSettings opts = do -- unless the full URL is specified: modifyCommonState $ \st -> st{ stSourceURL = Nothing } - (bs, _) <- fetchItem tp' + (bs, _) <- fetchItem $ T.pack tp' modifyCommonState $ \st -> st{ stSourceURL = surl } return bs) @@ -194,7 +195,7 @@ optToOutputSettings opts = do Just ts -> do res <- compileTemplate templatePath ts case res of - Left e -> throwError $ PandocTemplateError e + Left e -> throwError $ PandocTemplateError $ T.pack e Right t -> return $ Just t let writerOpts = def { @@ -222,7 +223,7 @@ optToOutputSettings opts = do , writerSlideLevel = optSlideLevel opts , writerHighlightStyle = hlStyle , writerSetextHeaders = optSetextHeaders opts - , writerEpubSubdirectory = optEpubSubdirectory opts + , writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts , writerEpubMetadata = epubMetadata , writerEpubFonts = optEpubFonts opts , writerEpubChapterLevel = optEpubChapterLevel opts @@ -239,12 +240,12 @@ optToOutputSettings opts = do , outputPdfProgram = maybePdfProg } -baseWriterName :: String -> String -baseWriterName = takeWhile (\c -> c /= '+' && c /= '-') +baseWriterName :: T.Text -> T.Text +baseWriterName = T.takeWhile (\c -> c /= '+' && c /= '-') -pdfWriterAndProg :: Maybe String -- ^ user-specified writer name +pdfWriterAndProg :: Maybe T.Text -- ^ user-specified writer name -> Maybe String -- ^ user-specified pdf-engine - -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) + -> IO (T.Text, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) pdfWriterAndProg mWriter mEngine = case go mWriter mEngine of Right (writ, prog) -> return (writ, Just prog) @@ -256,20 +257,20 @@ pdfWriterAndProg mWriter mEngine = go (Just writer) (Just engine) = case find (== (baseWriterName writer, takeBaseName engine)) engines of Just _ -> Right (writer, engine) - Nothing -> Left $ "pdf-engine " ++ engine ++ - " is not compatible with output format " ++ writer + Nothing -> Left $ "pdf-engine " <> T.pack engine <> + " is not compatible with output format " <> writer writerForEngine eng = case [f | (f,e) <- engines, e == eng] of fmt : _ -> Right fmt [] -> Left $ - "pdf-engine " ++ eng ++ " not known" + "pdf-engine " <> T.pack eng <> " not known" engineForWriter "pdf" = Left "pdf writer" engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of eng : _ -> Right eng [] -> Left $ - "cannot produce pdf output from " ++ w + "cannot produce pdf output from " <> w -isTextFormat :: String -> Bool +isTextFormat :: T.Text -> Bool isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx","pdf"] diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index ce8aa99ca..f4afec90c 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -20,9 +20,7 @@ module Text.Pandoc.BCP47 ( where import Prelude import Control.Monad (guard) -import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper, toLower, - toUpper) -import Data.List (intercalate) +import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.DocTemplates (FromContext(..)) @@ -30,22 +28,22 @@ import qualified Data.Text as T import qualified Text.Parsec as P -- | Represents BCP 47 language/country code. -data Lang = Lang{ langLanguage :: String - , langScript :: String - , langRegion :: String - , langVariants :: [String] } +data Lang = Lang{ langLanguage :: T.Text + , langScript :: T.Text + , langRegion :: T.Text + , langVariants :: [T.Text] } deriving (Eq, Ord, Show) -- | Render a Lang as BCP 47. -renderLang :: Lang -> String -renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) +renderLang :: Lang -> T.Text +renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null) ([langScript lang, langRegion lang] ++ langVariants lang)) -- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe String +getLang :: WriterOptions -> Meta -> Maybe T.Text getLang opts meta = case lookupContext "lang" (writerVariables opts) of - Just s -> Just $ T.unpack s + Just s -> Just s _ -> case lookupMeta "lang" meta of Just (MetaInlines [Str s]) -> Just s @@ -55,11 +53,11 @@ getLang opts meta = -- | Parse a BCP 47 string as a Lang. Currently we parse -- extensions and private-use fields as "variants," even -- though officially they aren't. -parseBCP47 :: String -> Either String Lang +parseBCP47 :: T.Text -> Either T.Text Lang parseBCP47 lang = case P.parse bcp47 "lang" lang of Right r -> Right r - Left e -> Left $ show e + Left e -> Left $ T.pack $ show e where bcp47 = do language <- pLanguage script <- P.option "" pScript @@ -75,19 +73,19 @@ parseBCP47 lang = cs <- P.many1 asciiLetter let lcs = length cs guard $ lcs == 2 || lcs == 3 - return $ map toLower cs + return $ T.toLower $ T.pack $ cs pScript = P.try $ do P.char '-' x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) xs <- P.count 3 (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return $ map toLower (x:xs) + return $ T.toLower $ T.pack (x:xs) pRegion = P.try $ do P.char '-' cs <- P.many1 asciiLetter let lcs = length cs guard $ lcs == 2 || lcs == 3 - return $ map toUpper cs + return $ T.toUpper $ T.pack cs pVariant = P.try $ do P.char '-' ds <- P.option "" (P.count 1 P.digit) @@ -96,12 +94,12 @@ parseBCP47 lang = guard $ if null ds then length var >= 5 && length var <= 8 else length var == 4 - return $ map toLower var + return $ T.toLower $ T.pack var pExtension = P.try $ do P.char '-' cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) guard $ length cs >= 2 && length cs <= 8 - return $ map toLower cs + return $ T.toLower $ T.pack cs pPrivateUse = P.try $ do P.char '-' P.char 'x' @@ -109,4 +107,4 @@ parseBCP47 lang = cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) guard $ not (null cs) && length cs <= 8 let var = "x-" ++ cs - return $ map toLower var + return $ T.toLower $ T.pack var diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs index 660ec1b12..47a96b468 100644 --- a/src/Text/Pandoc/CSS.hs +++ b/src/Text/Pandoc/CSS.hs @@ -19,17 +19,18 @@ module Text.Pandoc.CSS ( foldOrElse where import Prelude +import qualified Data.Text as T import Text.Pandoc.Shared (trim) import Text.Parsec -import Text.Parsec.String +import Text.Parsec.Text -ruleParser :: Parser (String, String) +ruleParser :: Parser (T.Text, T.Text) ruleParser = do p <- many1 (noneOf ":") <* char ':' v <- many1 (noneOf ":;") <* optional (char ';') <* spaces - return (trim p, trim v) + return (trim $ T.pack p, trim $ T.pack v) -styleAttrParser :: Parser [(String, String)] +styleAttrParser :: Parser [(T.Text, T.Text)] styleAttrParser = many1 ruleParser orElse :: Eq a => a -> a -> a -> a @@ -44,7 +45,7 @@ eitherToMaybe _ = Nothing -- | takes a list of keys/properties and a CSS string and -- returns the corresponding key-value-pairs. -pickStylesToKVs :: [String] -> String -> [(String, String)] +pickStylesToKVs :: [T.Text] -> T.Text -> [(T.Text, T.Text)] pickStylesToKVs props styleAttr = case parse styleAttrParser "" styleAttr of Left _ -> [] @@ -52,7 +53,7 @@ pickStylesToKVs props styleAttr = -- | takes a list of key/property synonyms and a CSS string and maybe -- returns the value of the first match (in order of the supplied list) -pickStyleAttrProps :: [String] -> String -> Maybe String +pickStyleAttrProps :: [T.Text] -> T.Text -> Maybe T.Text pickStyleAttrProps lookupProps styleAttr = do styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr foldOrElse Nothing $ map (`lookup` styles) lookupProps diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 6d4e8d895..8449e4a0e 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -9,6 +9,8 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Class Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane @@ -79,7 +81,6 @@ import qualified System.Random as IO (newStdGen) import Codec.Archive.Zip import qualified Data.CaseInsensitive as CI import Data.Unique (hashUnique) -import Data.List (stripPrefix) import qualified Data.Unique as IO (newUnique) import qualified Text.Pandoc.UTF8 as UTF8 import qualified System.Directory as Directory @@ -134,6 +135,7 @@ import Data.Default import System.IO.Error import System.IO (stderr) import qualified Data.Map as M +import qualified Data.Text as T import Text.Pandoc.Error import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, @@ -153,7 +155,7 @@ import qualified Paths_pandoc as Paths class (Functor m, Applicative m, Monad m, MonadError PandocError m) => PandocMonad m where -- | Lookup an environment variable. - lookupEnv :: String -> m (Maybe String) + lookupEnv :: T.Text -> m (Maybe T.Text) -- | Get the current (UTC) time. getCurrentTime :: m UTCTime -- | Get the locale's time zone. @@ -164,7 +166,7 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m) newUniqueHash :: m Int -- | Retrieve contents and mime type from a URL, raising -- an error on failure. - openURL :: String -> m (B.ByteString, Maybe MimeType) + openURL :: T.Text -> m (B.ByteString, Maybe MimeType) -- | Read the lazy ByteString contents from a file path, -- raising an error on failure. readFileLazy :: FilePath -> m BL.ByteString @@ -199,10 +201,10 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m) -- Output a debug message to sterr, using 'Debug.Trace.trace', -- if tracing is enabled. Note: this writes to stderr even in -- pure instances. - trace :: String -> m () + trace :: T.Text -> m () trace msg = do tracing <- getsCommonState stTrace - when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ()) + when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ()) -- * Functions defined for all PandocMonad instances @@ -238,8 +240,8 @@ setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing} -- | Set request header to use in HTTP requests. setRequestHeader :: PandocMonad m - => String -- ^ Header name - -> String -- ^ Value + => T.Text -- ^ Header name + -> T.Text -- ^ Value -> m () setRequestHeader name val = modifyCommonState $ \st -> st{ stRequestHeaders = @@ -277,7 +279,7 @@ setInputFiles fs = do _ -> Nothing modifyCommonState $ \st -> st{ stInputFiles = fs - , stSourceURL = sourceURL } + , stSourceURL = T.pack <$> sourceURL } -- Retrieve the output filename. getOutputFile :: PandocMonad m => m (Maybe FilePath) @@ -307,10 +309,10 @@ getZonedTime = do return $ utcToZonedTime tz t -- | Read file, checking in any number of directories. -readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String) +readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text) readFileFromDirs [] _ = return Nothing readFileFromDirs (d:ds) f = catchError - ((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f)) + ((Just . T.pack . UTF8.toStringLazy) <$> readFileLazy (d </> f)) (\_ -> readFileFromDirs ds f) instance TemplateMonad PandocIO where @@ -331,9 +333,9 @@ data CommonState = CommonState { stLog :: [LogMessage] -- ^ A list of log messages in reverse order , stUserDataDir :: Maybe FilePath -- ^ Directory to search for data files - , stSourceURL :: Maybe String + , stSourceURL :: Maybe T.Text -- ^ Absolute URL + dir of 1st source file - , stRequestHeaders :: [(String, String)] + , stRequestHeaders :: [(T.Text, T.Text)] -- ^ Headers to add for HTTP requests , stMediaBag :: MediaBag -- ^ Media parsed from binary containers @@ -370,7 +372,7 @@ instance Default CommonState where -- | Convert BCP47 string to a Lang, issuing warning -- if there are problems. -toLang :: PandocMonad m => Maybe String -> m (Maybe Lang) +toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang) toLang Nothing = return Nothing toLang (Just s) = case parseBCP47 s of @@ -395,14 +397,14 @@ getTranslations = do Nothing -> return mempty -- no language defined Just (_, Just t) -> return t Just (lang, Nothing) -> do -- read from file - let translationFile = "translations/" ++ renderLang lang ++ ".yaml" - let fallbackFile = "translations/" ++ langLanguage lang ++ ".yaml" + let translationFile = "translations/" <> renderLang lang <> ".yaml" + let fallbackFile = "translations/" <> langLanguage lang <> ".yaml" let getTrans fp = do bs <- readDataFile fp - case readTranslations (UTF8.toString bs) of + case readTranslations (UTF8.toText bs) of Left e -> do report $ CouldNotLoadTranslations (renderLang lang) - (fp ++ ": " ++ e) + (T.pack fp <> ": " <> e) -- make sure we don't try again... modifyCommonState $ \st -> st{ stTranslations = Nothing } @@ -411,14 +413,14 @@ getTranslations = do modifyCommonState $ \st -> st{ stTranslations = Just (lang, Just t) } return t - catchError (getTrans translationFile) + catchError (getTrans $ T.unpack translationFile) (\_ -> - catchError (getTrans fallbackFile) + catchError (getTrans $ T.unpack fallbackFile) (\e -> do report $ CouldNotLoadTranslations (renderLang lang) $ case e of PandocCouldNotFindDataFileError _ -> - "data file " ++ fallbackFile ++ " not found" + "data file " <> fallbackFile <> " not found" _ -> "" -- make sure we don't try again... modifyCommonState $ \st -> st{ stTranslations = Nothing } @@ -426,13 +428,13 @@ getTranslations = do -- | Get a translation from the current term map. -- Issue a warning if the term is not defined. -translateTerm :: PandocMonad m => Term -> m String +translateTerm :: PandocMonad m => Term -> m T.Text translateTerm term = do translations <- getTranslations case lookupTerm term translations of Just s -> return s Nothing -> do - report $ NoTranslation (show term) + report $ NoTranslation $ T.pack $ show term return "" -- | Evaluate a 'PandocIO' operation. @@ -458,7 +460,7 @@ liftIOError :: (String -> IO a) -> String -> PandocIO a liftIOError f u = do res <- liftIO $ tryIOError $ f u case res of - Left e -> throwError $ PandocIOError u e + Left e -> throwError $ PandocIOError (T.pack u) e Right r -> return r -- | Show potential IO errors to the user continuing execution anyway @@ -466,24 +468,24 @@ logIOError :: IO () -> PandocIO () logIOError f = do res <- liftIO $ tryIOError f case res of - Left e -> report $ IgnoredIOError (E.displayException e) + Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e Right _ -> pure () instance PandocMonad PandocIO where - lookupEnv = liftIO . IO.lookupEnv + lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack getCurrentTime = liftIO IO.getCurrentTime getCurrentTimeZone = liftIO IO.getCurrentTimeZone newStdGen = liftIO IO.newStdGen newUniqueHash = hashUnique <$> liftIO IO.newUnique openURL u - | Just u'' <- stripPrefix "data:" u = do - let mime = takeWhile (/=',') u'' + | Just u'' <- T.stripPrefix "data:" u = do + let mime = T.takeWhile (/=',') u'' let contents = UTF8.fromString $ - unEscapeString $ drop 1 $ dropWhile (/=',') u'' + unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u'' return (decodeLenient contents, Just mime) | otherwise = do - let toReqHeader (n, v) = (CI.mk (UTF8.fromString n), UTF8.fromString v) + let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v) customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders report $ Fetching u res <- liftIO $ E.try $ withSocketsDo $ do @@ -493,11 +495,11 @@ instance PandocMonad PandocIO where Left _ -> return x Right pr -> parseReq pr >>= \r -> return (addProxy (host r) (port r) x) - req <- parseReq u >>= addProxy' + req <- parseReq (T.unpack u) >>= addProxy' let req' = req{requestHeaders = customHeaders ++ requestHeaders req} resp <- newManager tlsManagerSettings >>= httpLbs req' return (B.concat $ toChunks $ responseBody resp, - UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) + UTF8.toText `fmap` lookup hContentType (responseHeaders resp)) case res of Right r -> return r @@ -519,22 +521,22 @@ instance PandocMonad PandocIO where logOutput msg = liftIO $ do UTF8.hPutStr stderr $ "[" ++ show (messageVerbosity msg) ++ "] " - alertIndent $ lines $ showLogMessage msg + alertIndent $ T.lines $ showLogMessage msg -alertIndent :: [String] -> IO () +alertIndent :: [T.Text] -> IO () alertIndent [] = return () alertIndent (l:ls) = do - UTF8.hPutStrLn stderr l + UTF8.hPutStrLn stderr $ T.unpack l mapM_ go ls where go l' = do UTF8.hPutStr stderr " " - UTF8.hPutStrLn stderr l' + UTF8.hPutStrLn stderr $ T.unpack l' -- | Specialized version of parseURIReference that disallows -- single-letter schemes. Reason: these are usually windows absolute -- paths. -parseURIReference' :: String -> Maybe URI +parseURIReference' :: T.Text -> Maybe URI parseURIReference' s = - case parseURIReference s of + case parseURIReference (T.unpack s) of Just u | length (uriScheme u) > 2 -> Just u | null (uriScheme u) -> Just u -- protocol-relative @@ -554,16 +556,16 @@ getUserDataDir = getsCommonState stUserDataDir -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. fetchItem :: PandocMonad m - => String + => T.Text -> m (B.ByteString, Maybe MimeType) fetchItem s = do mediabag <- getMediaBag - case lookupMedia s mediabag of + case lookupMedia (T.unpack s) mediabag of Just (mime, bs) -> return (BL.toStrict bs, Just mime) Nothing -> downloadOrRead s downloadOrRead :: PandocMonad m - => String + => T.Text -> m (B.ByteString, Maybe MimeType) downloadOrRead s = do sourceURL <- getsCommonState stSourceURL @@ -571,19 +573,19 @@ downloadOrRead s = do ensureEscaped, ensureEscaped s) of (Just u, s') -> -- try fetching from relative path at source case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u + Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u Nothing -> openURL s' -- will throw error - (Nothing, s'@('/':'/':c:_)) | c /= '?' -> -- protocol-relative URI + (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' -> -- protocol-relative URI -- we exclude //? because of //?UNC/ on Windows case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon + Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon Nothing -> openURL s' -- will throw error (Nothing, s') -> - case parseURI s' of -- requires absolute URI + case parseURI (T.unpack s') of -- requires absolute URI Just u' | uriScheme u' == "file:" -> - readLocalFile $ uriPathToPath (uriPath u') + readLocalFile $ uriPathToPath (T.pack $ uriPath u') -- We don't want to treat C:/ as a scheme: - Just u' | length (uriScheme u') > 2 -> openURL (show u') + Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u') _ -> readLocalFile fp -- get from local file system where readLocalFile f = do resourcePath <- getResourcePath @@ -596,13 +598,13 @@ downloadOrRead s = do uriPath = "", uriQuery = "", uriFragment = "" } - dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') - fp = unEscapeString $ dropFragmentAndQuery s + dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#') + fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s mime = case takeExtension fp of ".gz" -> getMimeType $ dropExtension fp ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" x -> getMimeType x - ensureEscaped = escapeURIString isAllowedInURI . map convertSlash + ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash convertSlash '\\' = '/' convertSlash x = x @@ -770,7 +772,7 @@ readDefaultDataFile "reference.odt" = readDefaultDataFile fname = #ifdef EMBED_DATA_FILES case lookup (makeCanonical fname) dataFiles of - Nothing -> throwError $ PandocCouldNotFindDataFileError fname + Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname Just contents -> return contents #else getDataFileName fname' >>= checkExistence >>= readFileStrict @@ -781,7 +783,7 @@ checkExistence fn = do exists <- fileExists fn if exists then return fn - else throwError $ PandocCouldNotFindDataFileError fn + else throwError $ PandocCouldNotFindDataFileError $ T.pack fn #endif makeCanonical :: FilePath -> FilePath @@ -792,7 +794,7 @@ makeCanonical = Posix.joinPath . transformPathParts . splitDirectories go as x = x : as withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a -withPaths [] _ fp = throwError $ PandocResourceNotFound fp +withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp withPaths (p:ps) action fp = catchError (action (p </> fp)) (\_ -> withPaths ps action fp) @@ -800,14 +802,14 @@ withPaths (p:ps) action fp = -- | Fetch local or remote resource (like an image) and provide data suitable -- for adding it to the MediaBag. fetchMediaResource :: PandocMonad m - => String -> m (FilePath, Maybe MimeType, BL.ByteString) + => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) fetchMediaResource src = do (bs, mt) <- downloadOrRead src - let ext = fromMaybe (takeExtension src) + let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) (mt >>= extensionFromMimeType) let bs' = BL.fromChunks [bs] let basename = showDigest $ sha1 bs' - let fname = basename <.> ext + let fname = basename <.> T.unpack ext return (fname, mt, bs') -- | Traverse tree, filling media bag for any images that @@ -817,12 +819,12 @@ fillMediaBag d = walkM handleImage d where handleImage :: PandocMonad m => Inline -> m Inline handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag - case lookupMedia src mediabag of + case lookupMedia (T.unpack src) mediabag of Just (_, _) -> return $ Image attr lab (src, tit) Nothing -> do (fname, mt, bs) <- fetchMediaResource src insertMedia fname mt bs - return $ Image attr lab (fname, tit)) + return $ Image attr lab (T.pack fname, tit)) (\e -> case e of PandocResourceNotFound _ -> do @@ -832,7 +834,7 @@ fillMediaBag d = walkM handleImage d return $ Span ("",["image"],[]) lab PandocHttpError u er -> do report $ CouldNotFetchResource u - (show er ++ "\rReplacing image with description.") + (T.pack $ show er ++ "\rReplacing image with description.") -- emit alt text return $ Span ("",["image"],[]) lab _ -> throwError e) @@ -856,15 +858,15 @@ writeMedia dir mediabag subpath = do let fullpath = dir </> unEscapeString (normalise subpath) let mbcontents = lookupMedia subpath mediabag case mbcontents of - Nothing -> throwError $ PandocResourceNotFound subpath + Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath Just (_, bs) -> do - report $ Extracting fullpath + report $ Extracting $ T.pack fullpath liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) logIOError $ BL.writeFile fullpath bs adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline adjustImagePath dir paths (Image attr lab (src, tit)) - | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) + | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit) adjustImagePath _ _ x = x -- | The 'PureState' contains ersatz representations @@ -878,7 +880,7 @@ data PureState = PureState { stStdGen :: StdGen -- contain every -- element at most -- once, e.g. [1..] - , stEnv :: [(String, String)] + , stEnv :: [(T.Text, T.Text)] , stTime :: UTCTime , stTimeZone :: TimeZone , stReferenceDocx :: Archive @@ -996,12 +998,12 @@ instance PandocMonad PandocPure where fps <- getsPureState stFiles case infoFileContents <$> getFileInfo fp fps of Just bs -> return (BL.fromStrict bs) - Nothing -> throwError $ PandocResourceNotFound fp + Nothing -> throwError $ PandocResourceNotFound $ T.pack fp readFileStrict fp = do fps <- getsPureState stFiles case infoFileContents <$> getFileInfo fp fps of Just bs -> return bs - Nothing -> throwError $ PandocResourceNotFound fp + Nothing -> throwError $ PandocResourceNotFound $ T.pack fp glob s = do FileTree ftmap <- getsPureState stFiles @@ -1019,7 +1021,7 @@ instance PandocMonad PandocPure where fps <- getsPureState stFiles case infoFileMTime <$> getFileInfo fp fps of Just tm -> return tm - Nothing -> throwError $ PandocIOError fp + Nothing -> throwError $ PandocIOError (T.pack fp) (userError "Can't get modification time") getCommonState = PandocPure $ lift get @@ -1070,7 +1072,7 @@ instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where when tracing $ do pos <- getPosition Debug.Trace.trace - ("[trace] Parsed " ++ msg ++ " at line " ++ + ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++ show (sourceLine pos) ++ if sourceName pos == "chunk" then " of chunk" diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs index 92a07b4c2..ec544e15d 100644 --- a/src/Text/Pandoc/Emoji.hs +++ b/src/Text/Pandoc/Emoji.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Emoji Copyright : Copyright (C) 2015 John MacFarlane @@ -13,9 +14,10 @@ Emoji symbol lookup from canonical string identifier. module Text.Pandoc.Emoji ( emojis, emojiToInline ) where import Prelude import qualified Data.Map as M +import qualified Data.Text as T import Text.Pandoc.Definition (Inline (Span, Str)) -emojis :: M.Map String String +emojis :: M.Map T.Text T.Text emojis = M.fromList [("+1","\128077") ,("-1","\128078") @@ -1810,6 +1812,6 @@ emojis = M.fromList ,("zzz","\128164") ] -emojiToInline :: String -> Maybe Inline +emojiToInline :: T.Text -> Maybe Inline emojiToInline emojikey = makeSpan <$> M.lookup emojikey emojis where makeSpan = Span ("", ["emoji"], [("data-emoji", emojikey)]) . (:[]) . Str diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 113ab9d6e..38db4fda9 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Error Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -22,6 +23,8 @@ import Prelude import Control.Exception (Exception) import Data.Typeable (Typeable) import Data.Word (Word8) +import Data.Text (Text) +import qualified Data.Text as T import GHC.Generics (Generic) import Network.HTTP.Client (HttpException) import System.Exit (ExitCode (..), exitWith) @@ -31,32 +34,32 @@ import Text.Printf (printf) import Text.Parsec.Error import Text.Parsec.Pos hiding (Line) -type Input = String +type Input = Text -data PandocError = PandocIOError String IOError - | PandocHttpError String HttpException - | PandocShouldNeverHappenError String - | PandocSomeError String - | PandocParseError String +data PandocError = PandocIOError Text IOError + | PandocHttpError Text HttpException + | PandocShouldNeverHappenError Text + | PandocSomeError Text + | PandocParseError Text | PandocParsecError Input ParseError - | PandocMakePDFError String - | PandocOptionError String - | PandocSyntaxMapError String + | PandocMakePDFError Text + | PandocOptionError Text + | PandocSyntaxMapError Text | PandocFailOnWarningError - | PandocPDFProgramNotFoundError String - | PandocPDFError String - | PandocFilterError String String - | PandocCouldNotFindDataFileError String - | PandocResourceNotFound String - | PandocTemplateError String - | PandocAppError String - | PandocEpubSubdirectoryError String - | PandocMacroLoop String - | PandocUTF8DecodingError String Int Word8 - | PandocIpynbDecodingError String - | PandocUnknownReaderError String - | PandocUnknownWriterError String - | PandocUnsupportedExtensionError String String + | PandocPDFProgramNotFoundError Text + | PandocPDFError Text + | PandocFilterError Text Text + | PandocCouldNotFindDataFileError Text + | PandocResourceNotFound Text + | PandocTemplateError Text + | PandocAppError Text + | PandocEpubSubdirectoryError Text + | PandocMacroLoop Text + | PandocUTF8DecodingError Text Int Word8 + | PandocIpynbDecodingError Text + | PandocUnknownReaderError Text + | PandocUnknownWriterError Text + | PandocUnsupportedExtensionError Text Text deriving (Show, Typeable, Generic) instance Exception PandocError @@ -68,23 +71,23 @@ handleError (Left e) = case e of PandocIOError _ err' -> ioError err' PandocHttpError u err' -> err 61 $ - "Could not fetch " ++ u ++ "\n" ++ show err' + "Could not fetch " <> u <> "\n" <> tshow err' PandocShouldNeverHappenError s -> err 62 $ - "Something we thought was impossible happened!\n" ++ - "Please report this to pandoc's developers: " ++ s + "Something we thought was impossible happened!\n" <> + "Please report this to pandoc's developers: " <> s PandocSomeError s -> err 63 s PandocParseError s -> err 64 s PandocParsecError input err' -> let errPos = errorPos err' errLine = sourceLine errPos errColumn = sourceColumn errPos - ls = lines input ++ [""] + ls = T.lines input <> [""] errorInFile = if length ls > errLine - 1 - then concat ["\n", ls !! (errLine - 1) - ,"\n", replicate (errColumn - 1) ' ' - ,"^"] + then T.concat ["\n", ls !! (errLine - 1) + ,"\n", T.replicate (errColumn - 1) " " + ,"^"] else "" - in err 65 $ "\nError at " ++ show err' ++ + in err 65 $ "\nError at " <> tshow err' <> -- if error comes from a chunk or included file, -- then we won't get the right text this way: if sourceName errPos == "source" @@ -95,49 +98,52 @@ handleError (Left e) = PandocSyntaxMapError s -> err 67 s PandocFailOnWarningError -> err 3 "Failing because there were warnings." PandocPDFProgramNotFoundError pdfprog -> err 47 $ - pdfprog ++ " not found. Please select a different --pdf-engine or install " ++ pdfprog - PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" ++ logmsg - PandocFilterError filtername msg -> err 83 $ "Error running filter " ++ - filtername ++ ":\n" ++ msg + pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog + PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg + PandocFilterError filtername msg -> err 83 $ "Error running filter " <> + filtername <> ":\n" <> msg PandocCouldNotFindDataFileError fn -> err 97 $ - "Could not find data file " ++ fn + "Could not find data file " <> fn PandocResourceNotFound fn -> err 99 $ - "File " ++ fn ++ " not found in resource path" - PandocTemplateError s -> err 5 $ "Error compiling template " ++ s + "File " <> fn <> " not found in resource path" + PandocTemplateError s -> err 5 $ "Error compiling template " <> s PandocAppError s -> err 4 s PandocEpubSubdirectoryError s -> err 31 $ - "EPUB subdirectory name '" ++ s ++ "' contains illegal characters" + "EPUB subdirectory name '" <> s <> "' contains illegal characters" PandocMacroLoop s -> err 91 $ - "Loop encountered in expanding macro " ++ s + "Loop encountered in expanding macro " <> s PandocUTF8DecodingError f offset w -> err 92 $ - "UTF-8 decoding error in " ++ f ++ " at byte offset " ++ show offset ++ - " (" ++ printf "%2x" w ++ ").\n" ++ + "UTF-8 decoding error in " <> f <> " at byte offset " <> tshow offset <> + " (" <> T.pack (printf "%2x" w) <> ").\n" <> "The input must be a UTF-8 encoded text." PandocIpynbDecodingError w -> err 93 $ - "ipynb decoding error: " ++ w + "ipynb decoding error: " <> w PandocUnknownReaderError r -> err 21 $ - "Unknown input format " ++ r ++ + "Unknown input format " <> r <> case r of - "doc" -> "\nPandoc can convert from DOCX, but not from DOC." ++ - "\nTry using Word to save your DOC file as DOCX," ++ + "doc" -> "\nPandoc can convert from DOCX, but not from DOC." <> + "\nTry using Word to save your DOC file as DOCX," <> " and convert that with pandoc." "pdf" -> "\nPandoc can convert to PDF, but not from PDF." _ -> "" PandocUnknownWriterError w -> err 22 $ - "Unknown output format " ++ w ++ + "Unknown output format " <> w <> case w of - "pdf" -> "To create a pdf using pandoc, use" ++ - " -t latex|beamer|context|ms|html5" ++ - "\nand specify an output file with " ++ + "pdf" -> "To create a pdf using pandoc, use" <> + " -t latex|beamer|context|ms|html5" <> + "\nand specify an output file with " <> ".pdf extension (-o filename.pdf)." "doc" -> "\nPandoc can convert to DOCX, but not from DOC." _ -> "" PandocUnsupportedExtensionError ext f -> err 23 $ - "The extension " ++ ext ++ " is not supported " ++ - "for " ++ f + "The extension " <> ext <> " is not supported " <> + "for " <> f -err :: Int -> String -> IO a +err :: Int -> Text -> IO a err exitCode msg = do - UTF8.hPutStrLn stderr msg + UTF8.hPutStrLn stderr (T.unpack msg) exitWith $ ExitFailure exitCode return undefined + +tshow :: Show a => a -> Text +tshow = T.pack . show diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index cdf4f159d..f079a9432 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -4,6 +4,7 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Extensions Copyright : Copyright (C) 2012-2019 John MacFarlane @@ -35,6 +36,7 @@ where import Prelude import Data.Bits (clearBit, setBit, testBit, (.|.)) import Data.Data (Data) +import qualified Data.Text as T import Data.Typeable (Typeable) import GHC.Generics (Generic) import Safe (readMay) @@ -304,7 +306,7 @@ strictExtensions = extensionsFromList ] -- | Default extensions from format-describing string. -getDefaultExtensions :: String -> Extensions +getDefaultExtensions :: T.Text -> Extensions getDefaultExtensions "markdown_strict" = strictExtensions getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions getDefaultExtensions "markdown_mmd" = multimarkdownExtensions @@ -402,7 +404,7 @@ allMarkdownExtensions = -- | Get all valid extensions for a format. This is used -- mainly in checking format specifications for validity. -getAllExtensions :: String -> Extensions +getAllExtensions :: T.Text -> Extensions getAllExtensions f = universalExtensions <> getAll f where autoIdExtensions = extensionsFromList @@ -507,14 +509,14 @@ getAllExtensions f = universalExtensions <> getAll f -- | Parse a format-specifying string into a markup format, -- a set of extensions to enable, and a set of extensions to disable. -parseFormatSpec :: String - -> Either ParseError (String, [Extension], [Extension]) +parseFormatSpec :: T.Text + -> Either ParseError (T.Text, [Extension], [Extension]) parseFormatSpec = parse formatSpec "" where formatSpec = do name <- formatName (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$> many extMod - return (name, reverse extsToEnable, reverse extsToDisable) + return (T.pack name, reverse extsToEnable, reverse extsToDisable) formatName = many1 $ noneOf "-+" extMod = do polarity <- oneOf "-+" diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs index c1cbf91a9..e8e737499 100644 --- a/src/Text/Pandoc/Filter/JSON.hs +++ b/src/Text/Pandoc/Filter/JSON.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Filter Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -18,6 +19,7 @@ import Control.Monad.Trans (MonadIO (liftIO)) import Data.Aeson (eitherDecode', encode) import Data.Char (toLower) import Data.Maybe (isNothing) +import qualified Data.Text as T import System.Directory (executable, doesFileExist, findExecutable, getPermissions) import System.Environment (getEnvironment) @@ -28,7 +30,7 @@ import Text.Pandoc.Error (PandocError (PandocFilterError)) import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Process (pipeProcess) -import Text.Pandoc.Shared (pandocVersion) +import Text.Pandoc.Shared (pandocVersion, tshow) import qualified Control.Exception as E import qualified Text.Pandoc.UTF8 as UTF8 @@ -61,18 +63,20 @@ externalFilter ropts f args' d = liftIO $ do unless (exists && isExecutable) $ do mbExe <- findExecutable f' when (isNothing mbExe) $ - E.throwIO $ PandocFilterError f ("Could not find executable " ++ f') + E.throwIO $ PandocFilterError fText (T.pack $ "Could not find executable " <> f') env <- getEnvironment let env' = Just - ( ("PANDOC_VERSION", pandocVersion) + ( ("PANDOC_VERSION", T.unpack pandocVersion) : ("PANDOC_READER_OPTIONS", UTF8.toStringLazy (encode ropts)) : env ) (exitcode, outbs) <- E.handle filterException $ pipeProcess env' f' args'' $ encode d case exitcode of - ExitSuccess -> either (E.throwIO . PandocFilterError f) + ExitSuccess -> either (E.throwIO . PandocFilterError fText . T.pack) return $ eitherDecode' outbs - ExitFailure ec -> E.throwIO $ PandocFilterError f - ("Filter returned error status " ++ show ec) - where filterException :: E.SomeException -> IO a - filterException e = E.throwIO $ PandocFilterError f (show e) + ExitFailure ec -> E.throwIO $ PandocFilterError fText + ("Filter returned error status " <> tshow ec) + where fText = T.pack f + + filterException :: E.SomeException -> IO a + filterException e = E.throwIO $ PandocFilterError fText $ tshow e diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs index afe525ab1..87c51ac42 100644 --- a/src/Text/Pandoc/Filter/Lua.hs +++ b/src/Text/Pandoc/Filter/Lua.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Filter.Lua (apply) where import Prelude import Control.Exception (throw) import Control.Monad ((>=>)) +import qualified Data.Text as T import Text.Pandoc.Class (PandocIO) import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Error (PandocError (PandocFilterError)) @@ -35,7 +36,7 @@ apply ropts args fp doc = do (x:_) -> x _ -> error "Format not supplied for Lua filter" runLua >=> forceResult fp $ do - setGlobals [ FORMAT format + setGlobals [ FORMAT $ T.pack format , PANDOC_READER_OPTIONS ropts , PANDOC_SCRIPT_FILE fp ] @@ -44,4 +45,4 @@ apply ropts args fp doc = do forceResult :: FilePath -> Either LuaException Pandoc -> PandocIO Pandoc forceResult fp eitherResult = case eitherResult of Right x -> return x - Left (LuaException s) -> throw (PandocFilterError fp s) + Left (LuaException s) -> throw (PandocFilterError (T.pack fp) s) diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 62aa5afc4..3e02355f7 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Highlighting Copyright : Copyright (C) 2008-2019 John MacFarlane @@ -35,7 +36,6 @@ module Text.Pandoc.Highlighting ( highlightingStyles ) where import Prelude import Control.Monad -import Data.Char (toLower) import qualified Data.Map as M import Data.Maybe (fromMaybe) import qualified Data.Text as T @@ -43,7 +43,7 @@ import Skylighting import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) -highlightingStyles :: [(String, Style)] +highlightingStyles :: [(T.Text, Style)] highlightingStyles = [("pygments", pygments), ("tango", tango), @@ -54,18 +54,18 @@ highlightingStyles = ("breezedark", breezeDark), ("haddock", haddock)] -languages :: [String] -languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap] +languages :: [T.Text] +languages = [T.toLower (sName s) | s <- M.elems defaultSyntaxMap] -languagesByExtension :: String -> [String] +languagesByExtension :: T.Text -> [T.Text] languagesByExtension ext = - [T.unpack (T.toLower (sName s)) | s <- syntaxesByExtension defaultSyntaxMap ext] + [T.toLower (sName s) | s <- syntaxesByExtension defaultSyntaxMap (T.unpack ext)] highlight :: SyntaxMap -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter -> Attr -- ^ Attributes of the CodeBlock - -> String -- ^ Raw contents of the CodeBlock - -> Either String a + -> T.Text -- ^ Raw contents of the CodeBlock + -> Either T.Text a highlight syntaxmap formatter (ident, classes, keyvals) rawCode = let firstNum = fromMaybe 1 (safeRead (fromMaybe "1" $ lookup "startFrom" keyvals)) fmtOpts = defaultFormatOpts{ @@ -74,38 +74,36 @@ highlight syntaxmap formatter (ident, classes, keyvals) rawCode = ["line-anchors", "lineAnchors"]) classes, numberLines = any (`elem` ["number","numberLines", "number-lines"]) classes, - lineIdPrefix = if null ident + lineIdPrefix = if T.null ident then mempty - else T.pack (ident ++ "-") } + else ident <> "-" } tokenizeOpts = TokenizerConfig{ syntaxMap = syntaxmap , traceOutput = False } - classes' = map T.pack classes - rawCode' = T.pack rawCode - in case msum (map (`lookupSyntax` syntaxmap) classes') of + in case msum (map (`lookupSyntax` syntaxmap) classes) of Nothing | numberLines fmtOpts -> Right $ formatter fmtOpts{ codeClasses = [], - containerClasses = classes' } + containerClasses = classes } $ map (\ln -> [(NormalTok, ln)]) - $ T.lines rawCode' + $ T.lines rawCode | otherwise -> Left "" - Just syntax -> + Just syntax -> either (Left . T.pack) Right $ formatter fmtOpts{ codeClasses = [T.toLower (sShortname syntax)], - containerClasses = classes' } <$> - tokenize tokenizeOpts syntax rawCode' + containerClasses = classes } <$> + tokenize tokenizeOpts syntax rawCode -- Functions for correlating latex listings package's language names -- with skylighting language names: -langToListingsMap :: M.Map String String +langToListingsMap :: M.Map T.Text T.Text langToListingsMap = M.fromList langsList -listingsToLangMap :: M.Map String String +listingsToLangMap :: M.Map T.Text T.Text listingsToLangMap = M.fromList $ map switch langsList where switch (a,b) = (b,a) -langsList :: [(String, String)] +langsList :: [(T.Text, T.Text)] langsList = [("abap","ABAP"), ("acm","ACM"), @@ -212,9 +210,9 @@ langsList = ("xslt","XSLT")] -- | Determine listings language name from skylighting language name. -toListingsLanguage :: String -> Maybe String -toListingsLanguage lang = M.lookup (map toLower lang) langToListingsMap +toListingsLanguage :: T.Text -> Maybe T.Text +toListingsLanguage lang = M.lookup (T.toLower lang) langToListingsMap -- | Determine skylighting language name from listings language name. -fromListingsLanguage :: String -> Maybe String +fromListingsLanguage :: T.Text -> Maybe T.Text fromListingsLanguage lang = M.lookup lang listingsToLangMap diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index afbba9b8b..d9ded22be 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} +{-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-type-defaults #-} {- | Module : Text.Pandoc.ImageSize @@ -49,6 +50,8 @@ import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.XML.Light as Xml import qualified Data.Map as M +import qualified Data.Text as T +import qualified Data.Text.Encoding as TE import Control.Monad.Except import Control.Applicative import Data.Maybe (fromMaybe) @@ -72,12 +75,12 @@ data Dimension = Pixel Integer deriving Eq instance Show Dimension where - show (Pixel a) = show a ++ "px" - show (Centimeter a) = showFl a ++ "cm" - show (Millimeter a) = showFl a ++ "mm" - show (Inch a) = showFl a ++ "in" - show (Percent a) = show a ++ "%" - show (Em a) = showFl a ++ "em" + show (Pixel a) = show a ++ "px" + show (Centimeter a) = T.unpack (showFl a) ++ "cm" + show (Millimeter a) = T.unpack (showFl a) ++ "mm" + show (Inch a) = T.unpack (showFl a) ++ "in" + show (Percent a) = show a ++ "%" + show (Em a) = T.unpack (showFl a) ++ "em" data ImageSize = ImageSize{ pxX :: Integer @@ -88,14 +91,13 @@ data ImageSize = ImageSize{ instance Default ImageSize where def = ImageSize 300 200 72 72 -showFl :: (RealFloat a) => a -> String -showFl a = removeExtra0s $ showFFloat (Just 5) a "" +showFl :: (RealFloat a) => a -> T.Text +showFl a = removeExtra0s $ T.pack $ showFFloat (Just 5) a "" -removeExtra0s :: String -> String -removeExtra0s s = - case dropWhile (=='0') $ reverse s of - '.':xs -> reverse xs - xs -> reverse xs +removeExtra0s :: T.Text -> T.Text +removeExtra0s s = case T.dropWhileEnd (=='0') s of + (T.unsnoc -> Just (xs, '.')) -> xs + xs -> xs imageType :: ByteString -> Maybe ImageType imageType img = case B.take 4 img of @@ -119,7 +121,7 @@ imageType img = case B.take 4 img of findSvgTag :: ByteString -> Bool findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img -imageSize :: WriterOptions -> ByteString -> Either String ImageSize +imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize imageSize opts img = case imageType img of Just Png -> mbToEither "could not determine PNG size" $ pngSize img @@ -194,22 +196,22 @@ inPixel opts dim = where dpi = fromIntegral $ writerDpi opts --- | Convert a Dimension to a String denoting its equivalent in inches, for example "2.00000". +-- | Convert a Dimension to Text denoting its equivalent in inches, for example "2.00000". -- Note: Dimensions in percentages are converted to the empty string. -showInInch :: WriterOptions -> Dimension -> String +showInInch :: WriterOptions -> Dimension -> T.Text showInInch _ (Percent _) = "" showInInch opts dim = showFl $ inInch opts dim --- | Convert a Dimension to a String denoting its equivalent in pixels, for example "600". +-- | Convert a Dimension to Text denoting its equivalent in pixels, for example "600". -- Note: Dimensions in percentages are converted to the empty string. -showInPixel :: WriterOptions -> Dimension -> String +showInPixel :: WriterOptions -> Dimension -> T.Text showInPixel _ (Percent _) = "" -showInPixel opts dim = show $ inPixel opts dim +showInPixel opts dim = T.pack $ show $ inPixel opts dim -- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm") -numUnit :: String -> Maybe (Double, String) +numUnit :: T.Text -> Maybe (Double, T.Text) numUnit s = - let (nums, unit) = span (\c -> isDigit c || ('.'==c)) s + let (nums, unit) = T.span (\c -> isDigit c || ('.'==c)) s in case safeRead nums of Just n -> Just (n, unit) Nothing -> Nothing @@ -235,7 +237,7 @@ dimension dir (_, _, kvs) = where extractDim key = lookup key kvs >>= lengthToDim -lengthToDim :: String -> Maybe Dimension +lengthToDim :: T.Text -> Maybe Dimension lengthToDim s = numUnit s >>= uncurry toDim where toDim a "cm" = Just $ Centimeter a @@ -258,8 +260,8 @@ epsSize img = do [] -> mzero (x:_) -> case B.words x of [_, _, _, ux, uy] -> do - ux' <- safeRead $ B.unpack ux - uy' <- safeRead $ B.unpack uy + ux' <- safeRead $ TE.decodeUtf8 ux + uy' <- safeRead $ TE.decodeUtf8 uy return ImageSize{ pxX = ux' , pxY = uy' @@ -284,7 +286,7 @@ pPdfSize = do [x1,y1,x2,y2] <- A.count 4 $ do A.skipSpace raw <- A.many1 $ A.satisfy (\c -> isDigit c || c == '.') - case safeRead raw of + case safeRead $ T.pack raw of Just (r :: Double) -> return $ floor r Nothing -> mzero A.skipSpace @@ -345,7 +347,7 @@ svgSize opts img = do doc <- Xml.parseXMLDoc $ UTF8.toString img let dpi = fromIntegral $ writerDpi opts let dirToInt dir = do - dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim + dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack return $ inPixel opts dim w <- dirToInt "width" h <- dirToInt "height" @@ -388,7 +390,7 @@ emfSize img = Right (_, _, size) -> Just size -jpegSize :: ByteString -> Either String ImageSize +jpegSize :: ByteString -> Either T.Text ImageSize jpegSize img = let (hdr, rest) = B.splitAt 4 img in if B.length rest < 14 @@ -398,7 +400,7 @@ jpegSize img = "\xff\xd8\xff\xe1" -> exifSize rest _ -> Left "unable to determine JPEG size" -jfifSize :: ByteString -> Either String ImageSize +jfifSize :: ByteString -> Either T.Text ImageSize jfifSize rest = case map fromIntegral $ unpack $ B.take 5 $ B.drop 9 rest of [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] -> @@ -416,7 +418,7 @@ jfifSize rest = , dpiY = dpiy } _ -> Left "unable to determine JFIF size" -findJfifSize :: ByteString -> Either String (Integer,Integer) +findJfifSize :: ByteString -> Either T.Text (Integer,Integer) findJfifSize bs = let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs in case B.uncons bs' of @@ -433,19 +435,18 @@ findJfifSize bs = _ -> Left "JFIF parse error" Nothing -> Left "Did not find JFIF length record" -runGet' :: Get (Either String a) -> BL.ByteString -> Either String a +runGet' :: Get (Either T.Text a) -> BL.ByteString -> Either T.Text a runGet' p bl = #if MIN_VERSION_binary(0,7,0) case runGetOrFail p bl of - Left (_,_,msg) -> Left msg + Left (_,_,msg) -> Left $ T.pack msg Right (_,_,x) -> x #else runGet p bl #endif - -exifSize :: ByteString -> Either String ImageSize -exifSize bs =runGet' header bl +exifSize :: ByteString -> Either T.Text ImageSize +exifSize bs = runGet' header bl where bl = BL.fromChunks [bs] header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do @@ -454,7 +455,7 @@ exifSize bs =runGet' header bl -- be parsed. But we only get an Alternative instance for Get in binary 0.6, -- and binary 0.5 ships with ghc 7.6. -exifHeader :: BL.ByteString -> ExceptT String Get ImageSize +exifHeader :: BL.ByteString -> ExceptT T.Text Get ImageSize exifHeader hdr = do _app1DataSize <- lift getWord16be exifHdr <- lift getWord32be @@ -479,7 +480,7 @@ exifHeader hdr = do ifdOffset <- lift getWord32 lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF numentries <- lift getWord16 - let ifdEntry :: ExceptT String Get (TagType, DataFormat) + let ifdEntry :: ExceptT T.Text Get (TagType, DataFormat) ifdEntry = do tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable <$> lift getWord16 @@ -502,7 +503,7 @@ exifHeader hdr = do 10 -> return (SignedRational <$> getRational, 8) 11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4) 12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8) - _ -> throwError $ "Unknown data format " ++ show dataFormat + _ -> throwError $ "Unknown data format " <> T.pack (show dataFormat) let totalBytes = fromIntegral $ numComponents * bytesPerComponent payload <- if totalBytes <= 4 -- data is right here then lift $ fmt <* skip (4 - totalBytes) diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 74b8e1bb2..f13139fa2 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -31,7 +31,6 @@ import Data.Aeson.Encode.Pretty (Config (..), defConfig, encodePretty', keyOrder) import qualified Data.ByteString.Lazy as BL import Data.Data (Data, toConstr) -import Data.List (isSuffixOf, intercalate) import qualified Data.Text as Text import Data.Typeable (Typeable) import GHC.Generics (Generic) @@ -62,43 +61,43 @@ instance FromYAML Verbosity where _ -> mzero data LogMessage = - SkippedContent String SourcePos - | IgnoredElement String - | CouldNotParseYamlMetadata String SourcePos - | DuplicateLinkReference String SourcePos - | DuplicateNoteReference String SourcePos - | NoteDefinedButNotUsed String SourcePos - | DuplicateIdentifier String SourcePos - | ReferenceNotFound String SourcePos - | CircularReference String SourcePos - | UndefinedToggle String SourcePos - | ParsingUnescaped String SourcePos - | CouldNotLoadIncludeFile String SourcePos - | MacroAlreadyDefined String SourcePos + SkippedContent Text.Text SourcePos + | IgnoredElement Text.Text + | CouldNotParseYamlMetadata Text.Text SourcePos + | DuplicateLinkReference Text.Text SourcePos + | DuplicateNoteReference Text.Text SourcePos + | NoteDefinedButNotUsed Text.Text SourcePos + | DuplicateIdentifier Text.Text SourcePos + | ReferenceNotFound Text.Text SourcePos + | CircularReference Text.Text SourcePos + | UndefinedToggle Text.Text SourcePos + | ParsingUnescaped Text.Text SourcePos + | CouldNotLoadIncludeFile Text.Text SourcePos + | MacroAlreadyDefined Text.Text SourcePos | InlineNotRendered Inline | BlockNotRendered Block - | DocxParserWarning String - | IgnoredIOError String - | CouldNotFetchResource String String - | CouldNotDetermineImageSize String String - | CouldNotConvertImage String String - | CouldNotDetermineMimeType String - | CouldNotConvertTeXMath String String - | CouldNotParseCSS String - | Fetching String - | Extracting String - | NoTitleElement String + | DocxParserWarning Text.Text + | IgnoredIOError Text.Text + | CouldNotFetchResource Text.Text Text.Text + | CouldNotDetermineImageSize Text.Text Text.Text + | CouldNotConvertImage Text.Text Text.Text + | CouldNotDetermineMimeType Text.Text + | CouldNotConvertTeXMath Text.Text Text.Text + | CouldNotParseCSS Text.Text + | Fetching Text.Text + | Extracting Text.Text + | NoTitleElement Text.Text | NoLangSpecified - | InvalidLang String - | CouldNotHighlight String - | MissingCharacter String - | Deprecated String String - | NoTranslation String - | CouldNotLoadTranslations String String - | UnusualConversion String - | UnexpectedXmlElement String String - | UnknownOrgExportOption String - | CouldNotDeduceFormat [String] String + | InvalidLang Text.Text + | CouldNotHighlight Text.Text + | MissingCharacter Text.Text + | Deprecated Text.Text Text.Text + | NoTranslation Text.Text + | CouldNotLoadTranslations Text.Text Text.Text + | UnusualConversion Text.Text + | UnexpectedXmlElement Text.Text Text.Text + | UnknownOrgExportOption Text.Text + | CouldNotDeduceFormat [Text.Text] Text.Text deriving (Show, Eq, Data, Ord, Typeable, Generic) instance ToJSON LogMessage where @@ -107,65 +106,65 @@ instance ToJSON LogMessage where "type" .= toJSON (show $ toConstr x) : case x of SkippedContent s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= sourceLine pos, "column" .= sourceColumn pos] IgnoredElement s -> - ["contents" .= Text.pack s] + ["contents" .= s] CouldNotParseYamlMetadata s pos -> - ["message" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["message" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] DuplicateLinkReference s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] NoteDefinedButNotUsed s pos -> - ["key" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["key" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] DuplicateNoteReference s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] DuplicateIdentifier s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] ReferenceNotFound s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] CircularReference s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] UndefinedToggle s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] ParsingUnescaped s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] CouldNotLoadIncludeFile fp pos -> - ["path" .= Text.pack fp, - "source" .= Text.pack (sourceName pos), + ["path" .= fp, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] MacroAlreadyDefined name pos -> - ["name" .= Text.pack name, - "source" .= Text.pack (sourceName pos), + ["name" .= name, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] InlineNotRendered il -> @@ -173,60 +172,60 @@ instance ToJSON LogMessage where BlockNotRendered bl -> ["contents" .= toJSON bl] DocxParserWarning s -> - ["contents" .= Text.pack s] + ["contents" .= s] IgnoredIOError s -> - ["contents" .= Text.pack s] + ["contents" .= s] CouldNotFetchResource fp s -> - ["path" .= Text.pack fp, - "message" .= Text.pack s] + ["path" .= fp, + "message" .= s] CouldNotDetermineImageSize fp s -> - ["path" .= Text.pack fp, - "message" .= Text.pack s] + ["path" .= fp, + "message" .= s] CouldNotConvertImage fp s -> - ["path" .= Text.pack fp, - "message" .= Text.pack s] + ["path" .= fp, + "message" .= s] CouldNotDetermineMimeType fp -> - ["path" .= Text.pack fp] + ["path" .= fp] CouldNotConvertTeXMath s msg -> - ["contents" .= Text.pack s, - "message" .= Text.pack msg] + ["contents" .= s, + "message" .= msg] CouldNotParseCSS msg -> - ["message" .= Text.pack msg] + ["message" .= msg] Fetching fp -> - ["path" .= Text.pack fp] + ["path" .= fp] Extracting fp -> - ["path" .= Text.pack fp] + ["path" .= fp] NoTitleElement fallback -> - ["fallback" .= Text.pack fallback] + ["fallback" .= fallback] NoLangSpecified -> [] InvalidLang s -> - ["lang" .= Text.pack s] + ["lang" .= s] CouldNotHighlight msg -> - ["message" .= Text.pack msg] + ["message" .= msg] MissingCharacter msg -> - ["message" .= Text.pack msg] + ["message" .= msg] Deprecated thing msg -> - ["thing" .= Text.pack thing, - "message" .= Text.pack msg] + ["thing" .= thing, + "message" .= msg] NoTranslation term -> - ["term" .= Text.pack term] + ["term" .= term] CouldNotLoadTranslations lang msg -> - ["lang" .= Text.pack lang, - "message" .= Text.pack msg] + ["lang" .= lang, + "message" .= msg] UnusualConversion msg -> - ["message" .= Text.pack msg] + ["message" .= msg] UnexpectedXmlElement element parent -> - ["element" .= Text.pack element, - "parent" .= Text.pack parent] + ["element" .= element, + "parent" .= parent] UnknownOrgExportOption option -> - ["option" .= Text.pack option] + ["option" .= option] CouldNotDeduceFormat exts format -> - ["extensions" .= map Text.pack exts - ,"format" .= Text.pack format] + ["extensions" .= exts + ,"format" .= format] -showPos :: SourcePos -> String -showPos pos = sn ++ "line " ++ +showPos :: SourcePos -> Text.Text +showPos pos = Text.pack $ sn ++ "line " ++ show (sourceLine pos) ++ " column " ++ show (sourceColumn pos) where sn = if sourceName pos == "source" || sourceName pos == "" then "" @@ -238,140 +237,140 @@ encodeLogMessages ms = keyOrder [ "type", "verbosity", "contents", "message", "path", "source", "line", "column" ] } ms -showLogMessage :: LogMessage -> String +showLogMessage :: LogMessage -> Text.Text showLogMessage msg = case msg of SkippedContent s pos -> - "Skipped '" ++ s ++ "' at " ++ showPos pos + "Skipped '" <> s <> "' at " <> showPos pos IgnoredElement s -> - "Ignored element " ++ s + "Ignored element " <> s CouldNotParseYamlMetadata s pos -> - "Could not parse YAML metadata at " ++ showPos pos ++ - if null s then "" else ": " ++ s + "Could not parse YAML metadata at " <> showPos pos <> + if Text.null s then "" else ": " <> s DuplicateLinkReference s pos -> - "Duplicate link reference '" ++ s ++ "' at " ++ showPos pos + "Duplicate link reference '" <> s <> "' at " <> showPos pos DuplicateNoteReference s pos -> - "Duplicate note reference '" ++ s ++ "' at " ++ showPos pos + "Duplicate note reference '" <> s <> "' at " <> showPos pos NoteDefinedButNotUsed s pos -> - "Note with key '" ++ s ++ "' defined at " ++ showPos pos ++ + "Note with key '" <> s <> "' defined at " <> showPos pos <> " but not used." DuplicateIdentifier s pos -> - "Duplicate identifier '" ++ s ++ "' at " ++ showPos pos + "Duplicate identifier '" <> s <> "' at " <> showPos pos ReferenceNotFound s pos -> - "Reference not found for '" ++ s ++ "' at " ++ showPos pos + "Reference not found for '" <> s <> "' at " <> showPos pos CircularReference s pos -> - "Circular reference '" ++ s ++ "' at " ++ showPos pos + "Circular reference '" <> s <> "' at " <> showPos pos UndefinedToggle s pos -> - "Undefined toggle '" ++ s ++ "' at " ++ showPos pos + "Undefined toggle '" <> s <> "' at " <> showPos pos ParsingUnescaped s pos -> - "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos + "Parsing unescaped '" <> s <> "' at " <> showPos pos CouldNotLoadIncludeFile fp pos -> - "Could not load include file '" ++ fp ++ "' at " ++ showPos pos + "Could not load include file '" <> fp <> "' at " <> showPos pos MacroAlreadyDefined name pos -> - "Macro '" ++ name ++ "' already defined, ignoring at " ++ showPos pos + "Macro '" <> name <> "' already defined, ignoring at " <> showPos pos InlineNotRendered il -> - "Not rendering " ++ show il + "Not rendering " <> Text.pack (show il) BlockNotRendered bl -> - "Not rendering " ++ show bl + "Not rendering " <> Text.pack (show bl) DocxParserWarning s -> - "Docx parser warning: " ++ s + "Docx parser warning: " <> s IgnoredIOError s -> - "IO Error (ignored): " ++ s + "IO Error (ignored): " <> s CouldNotFetchResource fp s -> - "Could not fetch resource '" ++ fp ++ "'" ++ - if null s then "" else ": " ++ s + "Could not fetch resource '" <> fp <> "'" <> + if Text.null s then "" else ": " <> s CouldNotDetermineImageSize fp s -> - "Could not determine image size for '" ++ fp ++ "'" ++ - if null s then "" else ": " ++ s + "Could not determine image size for '" <> fp <> "'" <> + if Text.null s then "" else ": " <> s CouldNotConvertImage fp s -> - "Could not convert image '" ++ fp ++ "'" ++ - if null s then "" else ": " ++ s + "Could not convert image '" <> fp <> "'" <> + if Text.null s then "" else ": " <> s CouldNotDetermineMimeType fp -> - "Could not determine mime type for '" ++ fp ++ "'" + "Could not determine mime type for '" <> fp <> "'" CouldNotConvertTeXMath s m -> - "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++ - if null m then "" else ':' : '\n' : m + "Could not convert TeX math '" <> s <> "', rendering as TeX" <> + if Text.null m then "" else ":\n" <> m CouldNotParseCSS m -> - "Could not parse CSS" ++ if null m then "" else ':' : '\n' : m + "Could not parse CSS" <> if Text.null m then "" else ":\n" <> m Fetching fp -> - "Fetching " ++ fp ++ "..." + "Fetching " <> fp <> "..." Extracting fp -> - "Extracting " ++ fp ++ "..." + "Extracting " <> fp <> "..." NoTitleElement fallback -> - "This document format requires a nonempty <title> element.\n" ++ - "Defaulting to '" ++ fallback ++ "' as the title.\n" ++ - "To specify a title, use 'title' in metadata or " ++ + "This document format requires a nonempty <title> element.\n" <> + "Defaulting to '" <> fallback <> "' as the title.\n" <> + "To specify a title, use 'title' in metadata or " <> "--metadata title=\"...\"." NoLangSpecified -> - "No value for 'lang' was specified in the metadata.\n" ++ + "No value for 'lang' was specified in the metadata.\n" <> "It is recommended that lang be specified for this format." InvalidLang s -> - "Invalid 'lang' value '" ++ s ++ "'.\n" ++ + "Invalid 'lang' value '" <> s <> "'.\n" <> "Use an IETF language tag like 'en-US'." CouldNotHighlight m -> - "Could not highlight code block:\n" ++ m + "Could not highlight code block:\n" <> m MissingCharacter m -> - "Missing character: " ++ m + "Missing character: " <> m Deprecated t m -> - "Deprecated: " ++ t ++ - if null m + "Deprecated: " <> t <> + if Text.null m then "" - else ". " ++ m + else ". " <> m NoTranslation t -> - "The term " ++ t ++ " has no translation defined." + "The term " <> t <> " has no translation defined." CouldNotLoadTranslations lang m -> - "Could not load translations for " ++ lang ++ - if null m then "" else '\n' : m + "Could not load translations for " <> lang <> + if Text.null m then "" else "\n" <> m UnusualConversion m -> - "Unusual conversion: " ++ m + "Unusual conversion: " <> m UnexpectedXmlElement element parent -> - "Unexpected XML element " ++ element ++ " in " ++ parent + "Unexpected XML element " <> element <> " in " <> parent UnknownOrgExportOption option -> - "Ignoring unknown Org export option: " ++ option + "Ignoring unknown Org export option: " <> option CouldNotDeduceFormat exts format -> - "Could not deduce format from file extension " ++ - intercalate " or " exts ++ "\n" ++ - "Defaulting to " ++ format + "Could not deduce format from file extension " <> + Text.intercalate " or " exts <> "\n" <> + "Defaulting to " <> format -messageVerbosity:: LogMessage -> Verbosity +messageVerbosity :: LogMessage -> Verbosity messageVerbosity msg = case msg of - SkippedContent{} -> INFO - IgnoredElement{} -> INFO - CouldNotParseYamlMetadata{} -> WARNING - DuplicateLinkReference{} -> WARNING - DuplicateNoteReference{} -> WARNING - NoteDefinedButNotUsed{} -> WARNING - DuplicateIdentifier{} -> WARNING - ReferenceNotFound{} -> WARNING - CircularReference{} -> WARNING - UndefinedToggle{} -> WARNING + SkippedContent{} -> INFO + IgnoredElement{} -> INFO + CouldNotParseYamlMetadata{} -> WARNING + DuplicateLinkReference{} -> WARNING + DuplicateNoteReference{} -> WARNING + NoteDefinedButNotUsed{} -> WARNING + DuplicateIdentifier{} -> WARNING + ReferenceNotFound{} -> WARNING + CircularReference{} -> WARNING + UndefinedToggle{} -> WARNING CouldNotLoadIncludeFile f _ - | ".sty" `isSuffixOf` f -> INFO - | otherwise -> WARNING - MacroAlreadyDefined{} -> WARNING - ParsingUnescaped{} -> INFO - InlineNotRendered{} -> INFO - BlockNotRendered{} -> INFO - DocxParserWarning{} -> INFO - IgnoredIOError{} -> WARNING - CouldNotFetchResource{} -> WARNING - CouldNotDetermineImageSize{} -> WARNING - CouldNotConvertImage{} -> WARNING - CouldNotDetermineMimeType{} -> WARNING - CouldNotConvertTeXMath{} -> WARNING - CouldNotParseCSS{} -> WARNING - Fetching{} -> INFO - Extracting{} -> INFO - NoTitleElement{} -> WARNING - NoLangSpecified -> INFO - InvalidLang{} -> WARNING - CouldNotHighlight{} -> WARNING - MissingCharacter{} -> WARNING - Deprecated{} -> WARNING - NoTranslation{} -> WARNING - CouldNotLoadTranslations{} -> WARNING - UnusualConversion {} -> WARNING - UnexpectedXmlElement {} -> WARNING - UnknownOrgExportOption {} -> WARNING - CouldNotDeduceFormat{} -> WARNING + | ".sty" `Text.isSuffixOf` f -> INFO + | otherwise -> WARNING + MacroAlreadyDefined{} -> WARNING + ParsingUnescaped{} -> INFO + InlineNotRendered{} -> INFO + BlockNotRendered{} -> INFO + DocxParserWarning{} -> INFO + IgnoredIOError{} -> WARNING + CouldNotFetchResource{} -> WARNING + CouldNotDetermineImageSize{} -> WARNING + CouldNotConvertImage{} -> WARNING + CouldNotDetermineMimeType{} -> WARNING + CouldNotConvertTeXMath{} -> WARNING + CouldNotParseCSS{} -> WARNING + Fetching{} -> INFO + Extracting{} -> INFO + NoTitleElement{} -> WARNING + NoLangSpecified -> INFO + InvalidLang{} -> WARNING + CouldNotHighlight{} -> WARNING + MissingCharacter{} -> WARNING + Deprecated{} -> WARNING + NoTranslation{} -> WARNING + CouldNotLoadTranslations{} -> WARNING + UnusualConversion {} -> WARNING + UnexpectedXmlElement {} -> WARNING + UnknownOrgExportOption {} -> WARNING + CouldNotDeduceFormat{} -> WARNING diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index 9416bf41f..74c7058f3 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Lua.Filter @@ -180,7 +180,7 @@ constructorsFor :: DataType -> [String] constructorsFor x = map show (dataTypeConstrs x) inlineElementNames :: [String] -inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str [])) +inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str mempty)) blockElementNames :: [String] blockElementNames = "Block" : constructorsFor (dataTypeOf (Para [])) diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs index b9b6c9cd9..20963f831 100644 --- a/src/Text/Pandoc/Lua/Global.hs +++ b/src/Text/Pandoc/Lua/Global.hs @@ -27,11 +27,12 @@ import Text.Pandoc.Lua.Marshaling () import Text.Pandoc.Lua.Util (addFunction) import Text.Pandoc.Options (ReaderOptions) +import qualified Data.Text as Text import qualified Foreign.Lua as Lua -- | Permissible global Lua variables. data Global = - FORMAT String + FORMAT Text.Text | PANDOC_API_VERSION | PANDOC_DOCUMENT Pandoc | PANDOC_READER_OPTIONS ReaderOptions diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index f1cab7e82..cf6c71231 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -28,13 +28,14 @@ import Text.Pandoc.Lua.Packages (LuaPackageParams (..), installPandocPackageSearcher) import Text.Pandoc.Lua.Util (loadScriptFromDataDir) +import qualified Data.Text as Text import qualified Foreign.Lua as Lua import qualified Foreign.Lua.Module.Text as Lua import qualified Text.Pandoc.Definition as Pandoc import qualified Text.Pandoc.Lua.Module.Pandoc as ModulePandoc -- | Lua error message -newtype LuaException = LuaException String deriving (Show) +newtype LuaException = LuaException Text.Text deriving (Show) -- | Run the lua interpreter, using pandoc's default way of environment -- initialization. @@ -56,7 +57,7 @@ runLua luaOp = do return (opResult, st) liftIO $ setForeignEncoding enc case res of - Left (Lua.Exception msg) -> return $ Left (LuaException msg) + Left (Lua.Exception msg) -> return $ Left (LuaException $ Text.pack msg) Right (x, newState) -> do putCommonState newState return $ Right x diff --git a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs index eed1500ec..b65396f68 100644 --- a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs +++ b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs @@ -1,6 +1,7 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Marshaling.CommonState Copyright : © 2012-2019 John MacFarlane @@ -23,6 +24,7 @@ import Text.Pandoc.Logging (LogMessage, showLogMessage) import Text.Pandoc.Lua.Marshaling.AnyValue (AnyValue (..)) import qualified Data.Map as Map +import qualified Data.Text as Text import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil @@ -46,7 +48,7 @@ indexCommonState st (AnyValue idx) = Lua.ltype idx >>= \case Lua.TypeString -> 1 <$ (Lua.peek idx >>= pushField) _ -> 1 <$ Lua.pushnil where - pushField :: String -> Lua () + pushField :: Text.Text -> Lua () pushField name = case lookup name commonStateFields of Just pushValue -> pushValue st Nothing -> Lua.pushnil @@ -71,7 +73,7 @@ pairsCommonState st = do (nextKey, pushValue):_ -> 2 <$ (Lua.push nextKey *> pushValue st) _ -> 2 <$ (Lua.pushnil *> Lua.pushnil) -commonStateFields :: [(String, CommonState -> Lua ())] +commonStateFields :: [(Text.Text, CommonState -> Lua ())] commonStateFields = [ ("input_files", Lua.push . stInputFiles) , ("output_file", Lua.push . Lua.Optional . stOutputFile) @@ -98,5 +100,5 @@ instance Pushable LogMessage where pushLogMessageMetatable = ensureUserdataMetatable logMessageTypeName $ LuaUtil.addFunction "__tostring" tostringLogMessage -tostringLogMessage :: LogMessage -> Lua String +tostringLogMessage :: LogMessage -> Lua Text.Text tostringLogMessage = return . showLogMessage diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs index 5395f6fc8..226fe2e71 100644 --- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs +++ b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs @@ -25,6 +25,7 @@ import Text.Pandoc.Lua.Marshaling.CommonState () import Text.Pandoc.Options (ReaderOptions (..), TrackChanges) import qualified Data.Set as Set +import qualified Data.Text as Text import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil @@ -44,9 +45,9 @@ instance Pushable ReaderOptions where (standalone :: Bool) (columns :: Int) (tabStop :: Int) - (indentedCodeClasses :: [String]) - (abbreviations :: Set.Set String) - (defaultImageExtension :: String) + (indentedCodeClasses :: [Text.Text]) + (abbreviations :: Set.Set Text.Text) + (defaultImageExtension :: Text.Text) (trackChanges :: TrackChanges) (stripComments :: Bool) = ro diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 261785665..951571ddd 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -15,7 +15,6 @@ module Text.Pandoc.Lua.Module.MediaBag import Prelude import Control.Monad (zipWithM_) -import Data.Maybe (fromMaybe) import Foreign.Lua (Lua, NumResults, Optional, liftIO) import Text.Pandoc.Class (CommonState (..), fetchItem, putCommonState, runIOorExplode, setMediaBag) @@ -25,6 +24,7 @@ import Text.Pandoc.Lua.Util (addFunction) import Text.Pandoc.MIME (MimeType) import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T import qualified Foreign.Lua as Lua import qualified Text.Pandoc.MediaBag as MB @@ -113,7 +113,7 @@ mediaDirectoryFn = do Lua.push "length" *> Lua.push contentLength *> Lua.rawset (-3) Lua.rawseti (-2) idx -fetch :: String +fetch :: T.Text -> Lua NumResults fetch src = do commonState <- getCommonState @@ -122,6 +122,6 @@ fetch src = do putCommonState commonState setMediaBag mediaBag fetchItem src - Lua.push $ fromMaybe "" mimeType + Lua.push $ maybe "" T.unpack mimeType Lua.push bs return 2 -- returns 2 values: contents, mimetype diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 182008da7..36d6f4009 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Module.Pandoc Copyright : Copyright © 2017-2019 Albert Krewinkel @@ -19,7 +20,6 @@ import Control.Monad (when) import Control.Monad.Except (throwError) import Data.Default (Default (..)) import Data.Maybe (fromMaybe) -import Data.Text (pack) import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable) import System.Exit (ExitCode (..)) import Text.Pandoc.Class (runIO) @@ -33,6 +33,7 @@ import Text.Pandoc.Readers (Reader (..), getReader) import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BSL +import qualified Data.Text as T import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil import Text.Pandoc.Error @@ -59,22 +60,22 @@ walkInline = walkElement walkBlock :: Block -> LuaFilter -> Lua Block walkBlock = walkElement -readDoc :: String -> Optional String -> Lua NumResults +readDoc :: T.Text -> Optional T.Text -> Lua NumResults readDoc content formatSpecOrNil = do let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil) res <- Lua.liftIO . runIO $ getReader formatSpec >>= \(rdr,es) -> case rdr of TextReader r -> - r def{ readerExtensions = es } (pack content) + r def{ readerExtensions = es } content _ -> throwError $ PandocSomeError $ "Only textual formats are supported" case res of Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc Left (PandocUnknownReaderError f) -> Lua.raiseError $ - "Unknown reader: " ++ f + "Unknown reader: " <> f Left (PandocUnsupportedExtensionError e f) -> Lua.raiseError $ - "Extension " ++ e ++ " not supported for " ++ f + "Extension " <> e <> " not supported for " <> f Left e -> Lua.raiseError $ show e -- | Pipes input through a command. @@ -86,10 +87,10 @@ pipeFn command args input = do (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input case ec of ExitSuccess -> 1 <$ Lua.push output - ExitFailure n -> Lua.raiseError (PipeError command n output) + ExitFailure n -> Lua.raiseError (PipeError (T.pack command) n output) data PipeError = PipeError - { pipeErrorCommand :: String + { pipeErrorCommand :: T.Text , pipeErrorCode :: Int , pipeErrorOutput :: BL.ByteString } @@ -118,7 +119,7 @@ instance Pushable PipeError where pipeErrorMessage :: PipeError -> Lua BL.ByteString pipeErrorMessage (PipeError cmd errorCode output) = return $ mconcat [ BSL.pack "Error running " - , BSL.pack cmd + , BSL.pack $ T.unpack cmd , BSL.pack " (error code " , BSL.pack $ show errorCode , BSL.pack "): " diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index 057e6580b..7d6dd0fab 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -15,7 +15,6 @@ module Text.Pandoc.Lua.Module.Utils import Prelude import Control.Applicative ((<|>)) -import Data.Char (toLower) import Data.Default (def) import Data.Version (Version) import Foreign.Lua (Peekable, Lua, NumResults) @@ -27,6 +26,7 @@ import Text.Pandoc.Lua.Util (addFunction) import qualified Data.Digest.Pure.SHA as SHA import qualified Data.ByteString.Lazy as BSL +import qualified Data.Text as T import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Filter.JSON as JSONFilter @@ -64,7 +64,7 @@ makeSections number baselevel = -- limit years to the range 1601-9999 (ISO 8601 accepts greater than -- or equal to 1583, but MS Word only accepts dates starting 1601). -- Returns nil instead of a string if the conversion failed. -normalizeDate :: String -> Lua (Lua.Optional String) +normalizeDate :: T.Text -> Lua (Lua.Optional T.Text) normalizeDate = return . Lua.Optional . Shared.normalizeDate -- | Run a JSON filter on the given document. @@ -88,13 +88,13 @@ runJSONFilter mbDatadir doc filterFile optArgs = do -- | Calculate the hash of the given contents. sha1 :: BSL.ByteString - -> Lua String -sha1 = return . SHA.showDigest . SHA.sha1 + -> Lua T.Text +sha1 = return . T.pack . SHA.showDigest . SHA.sha1 -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). -stringify :: AstElement -> Lua String +stringify :: AstElement -> Lua T.Text stringify el = return $ case el of PandocElement pd -> Shared.stringify pd InlineElement i -> Shared.stringify i @@ -102,11 +102,11 @@ stringify el = return $ case el of MetaElement m -> Shared.stringify m CitationElement c -> Shared.stringify c MetaValueElement m -> stringifyMetaValue m - _ -> "" + _ -> mempty -stringifyMetaValue :: MetaValue -> String +stringifyMetaValue :: MetaValue -> T.Text stringifyMetaValue mv = case mv of - MetaBool b -> map toLower (show b) + MetaBool b -> T.toLower $ T.pack (show b) MetaString s -> s _ -> Shared.stringify mv @@ -139,5 +139,5 @@ instance Peekable AstElement where "Expected an AST element, but could not parse value as such." -- | Convert a number < 4000 to uppercase roman numeral. -toRomanNumeral :: Lua.Integer -> Lua String +toRomanNumeral :: Lua.Integer -> Lua T.Text toRomanNumeral = return . Shared.toRomanNumeral . fromIntegral diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index ee0fe3efb..77f4c4b96 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.MIME Copyright : Copyright (C) 2011-2019 John MacFarlane @@ -13,14 +14,13 @@ Mime type lookup for ODT writer. module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType, mediaCategory ) where import Prelude -import Data.Char (toLower) import Data.List (isPrefixOf, isSuffixOf) -import Data.List.Split (splitOn) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe (fromMaybe, listToMaybe) import System.FilePath -type MimeType = String +type MimeType = T.Text -- | Determine mime type appropriate for file path. getMimeType :: FilePath -> Maybe MimeType @@ -31,34 +31,34 @@ getMimeType fp | "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp = Just "application/vnd.oasis.opendocument.formula" -- generic - | otherwise = M.lookup (map toLower $ drop 1 $ takeExtension fp) mimeTypes + | otherwise = M.lookup (T.toLower $ T.drop 1 $ T.pack $ takeExtension fp) mimeTypes -- | Determime mime type appropriate for file path, defaulting to -- “application/octet-stream” if nothing else fits. getMimeTypeDef :: FilePath -> MimeType getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType -extensionFromMimeType :: MimeType -> Maybe String +extensionFromMimeType :: MimeType -> Maybe T.Text extensionFromMimeType mimetype = - M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes + M.lookup (T.takeWhile (/=';') mimetype) reverseMimeTypes -- note: we just look up the basic mime type, dropping the content-encoding etc. -- | Determine general media category for file path, e.g. -- -- prop> mediaCategory "foo.jpg" = Just "image" -mediaCategory :: FilePath -> Maybe String -mediaCategory fp = getMimeType fp >>= listToMaybe . splitOn "/" +mediaCategory :: FilePath -> Maybe T.Text +mediaCategory fp = getMimeType fp >>= listToMaybe . T.splitOn "/" -reverseMimeTypes :: M.Map MimeType String +reverseMimeTypes :: M.Map MimeType T.Text reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList -mimeTypes :: M.Map String MimeType +mimeTypes :: M.Map T.Text MimeType mimeTypes = M.fromList mimeTypesList -- | Collection of common mime types. -- Except for first entry, list borrowed from -- <https://github.com/Happstack/happstack-server/blob/master/src/Happstack/Server/FileServe/BuildingBlocks.hs happstack-server> -mimeTypesList :: [(String, MimeType)] +mimeTypesList :: [(T.Text, MimeType)] mimeTypesList = [("cpt","image/x-corelphotopaint") ,("gz","application/x-gzip") diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index bb6fc88ac..87af5c7f8 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -36,7 +36,7 @@ import Text.Pandoc.MIME (MimeType, getMimeTypeDef) -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map [FilePath] (MimeType, BL.ByteString)) deriving (Semigroup, Monoid, Data, Typeable) instance Show MediaBag where @@ -72,12 +72,12 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. -mediaDirectory :: MediaBag -> [(String, MimeType, Int)] +mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldrWithKey (\fp (mime,contents) -> ((Posix.joinPath fp, mime, fromIntegral $ BL.length contents):)) [] mediamap -mediaItems :: MediaBag -> [(String, MimeType, BL.ByteString)] +mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = M.foldrWithKey (\fp (mime,contents) -> ((Posix.joinPath fp, mime, contents):)) [] mediamap diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 66193ef60..0fe80be4e 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -36,9 +36,10 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions import Prelude import Control.Applicative ((<|>)) import Data.Char (toLower) +import Data.Maybe (fromMaybe) import Data.Data (Data) import Data.Default -import Data.Text (Text, unpack) +import Data.Text (Text) import Text.DocTemplates (Context(..)) import qualified Data.Set as Set import Data.Typeable (Typeable) @@ -46,7 +47,7 @@ import GHC.Generics (Generic) import Skylighting (SyntaxMap, defaultSyntaxMap) import Text.Pandoc.Extensions import Text.Pandoc.Highlighting (Style, pygments) -import Text.Pandoc.Shared (camelCaseToHyphenated) +import Text.Pandoc.Shared (camelCaseStrToHyphenated) import Text.DocTemplates (Template) import Data.Aeson.TH (deriveJSON, defaultOptions, Options(..), SumEncoding(..)) @@ -60,10 +61,10 @@ data ReaderOptions = ReaderOptions{ , readerStandalone :: Bool -- ^ Standalone document with header , readerColumns :: Int -- ^ Number of columns in terminal , readerTabStop :: Int -- ^ Tab stop - , readerIndentedCodeClasses :: [String] -- ^ Default classes for + , readerIndentedCodeClasses :: [Text] -- ^ Default classes for -- indented code blocks - , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations - , readerDefaultImageExtension :: String -- ^ Default extension for images + , readerAbbreviations :: Set.Set Text -- ^ Strings to treat as abbreviations + , readerDefaultImageExtension :: Text -- ^ Default extension for images , readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx , readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML } deriving (Show, Read, Data, Typeable, Generic) @@ -84,7 +85,7 @@ instance Default ReaderOptions , readerStripComments = False } -defaultAbbrevs :: Set.Set String +defaultAbbrevs :: Set.Set Text defaultAbbrevs = Set.fromList [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", @@ -99,11 +100,11 @@ defaultAbbrevs = Set.fromList data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable, Generic) data HTMLMathMethod = PlainMath - | WebTeX String -- url of TeX->image script. + | WebTeX Text -- url of TeX->image script. | GladTeX | MathML - | MathJax String -- url of MathJax.js - | KaTeX String -- url of KaTeX files + | MathJax Text -- url of MathJax.js + | KaTeX Text -- url of KaTeX files deriving (Show, Read, Eq, Data, Typeable, Generic) instance FromYAML HTMLMathMethod where @@ -111,18 +112,18 @@ instance FromYAML HTMLMathMethod where (withMap "HTMLMathMethod" $ \m -> do method <- m .: "method" mburl <- m .:? "url" - case unpack method of + case method :: Text of "plain" -> return PlainMath - "webtex" -> return $ WebTeX $ maybe "" unpack mburl + "webtex" -> return $ WebTeX $ fromMaybe "" mburl "gladtex" -> return GladTeX "mathml" -> return MathML "mathjax" -> return $ MathJax $ - maybe defaultMathJaxURL unpack mburl + fromMaybe defaultMathJaxURL mburl "katex" -> return $ KaTeX $ - maybe defaultKaTeXURL unpack mburl + fromMaybe defaultKaTeXURL mburl _ -> fail $ "Unknown HTML math method " ++ show method) node <|> (withStr "HTMLMathMethod" $ \method -> - case unpack method of + case method of "plain" -> return PlainMath "webtex" -> return $ WebTeX "" "gladtex" -> return GladTeX @@ -246,7 +247,7 @@ data WriterOptions = WriterOptions , writerWrapText :: WrapOption -- ^ Option for wrapping text , writerColumns :: Int -- ^ Characters in a line (for text wrapping) , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails - , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML + , writerIdentifierPrefix :: Text -- ^ Prefix for section & note ids in HTML -- and for footnote marks in markdown , writerCiteMethod :: CiteMethod -- ^ How to print cites , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML @@ -256,8 +257,8 @@ data WriterOptions = WriterOptions , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting -- (Nothing = no highlighting) , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown - , writerEpubSubdirectory :: String -- ^ Subdir for epub in OCF - , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB + , writerEpubSubdirectory :: Text -- ^ Subdir for epub in OCF + , writerEpubMetadata :: Maybe Text -- ^ Metadata to include in EPUB , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) , writerTOCDepth :: Int -- ^ Number of levels to include in TOC @@ -309,10 +310,10 @@ instance HasSyntaxExtensions WriterOptions where isEnabled :: HasSyntaxExtensions a => Extension -> a -> Bool isEnabled ext opts = ext `extensionEnabled` getExtensions opts -defaultMathJaxURL :: String +defaultMathJaxURL :: Text defaultMathJaxURL = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/" -defaultKaTeXURL :: String +defaultKaTeXURL :: Text defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/" $(deriveJSON defaultOptions ''ReaderOptions) @@ -325,7 +326,7 @@ $(deriveJSON defaultOptions{ } ''HTMLMathMethod) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated + camelCaseStrToHyphenated } ''CiteMethod) $(deriveJSON defaultOptions{ constructorTagModifier = @@ -339,17 +340,17 @@ $(deriveJSON defaultOptions{ constructorTagModifier = $(deriveJSON defaultOptions ''HTMLSlideVariant) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated + camelCaseStrToHyphenated } ''TrackChanges) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated + camelCaseStrToHyphenated } ''WrapOption) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated . drop 8 + camelCaseStrToHyphenated . drop 8 } ''TopLevelDivision) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated + camelCaseStrToHyphenated } ''ReferenceLocation) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index d7e61109f..1d307cdd4 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -46,7 +46,7 @@ import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..)) import Text.Pandoc.Process (pipeProcess) import System.Process (readProcessWithExitCode) -import Text.Pandoc.Shared (inDirectory, stringify) +import Text.Pandoc.Shared (inDirectory, stringify, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk (walkM) import Text.Pandoc.Writers.Shared (getField, metaToContext) @@ -141,7 +141,7 @@ makeWithWkhtmltopdf program pdfargs writer opts doc@(Pandoc meta _) = do (return . literal . stringify) (return . literal . stringify) meta - let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd + let toArgs (f, mbd) = maybe [] (\d -> ["--" <> f, T.unpack d]) mbd let args = pdfargs ++ mathArgs ++ concatMap toArgs [("page-size", getField "papersize" meta') ,("title", getField "title" meta') @@ -173,19 +173,19 @@ handleImages opts tmpdir doc = convertImages :: WriterOptions -> FilePath -> Inline -> PandocIO Inline convertImages opts tmpdir (Image attr ils (src, tit)) = do - img <- liftIO $ convertImage opts tmpdir src + img <- liftIO $ convertImage opts tmpdir $ T.unpack src newPath <- case img of Left e -> do report $ CouldNotConvertImage src e return src - Right fp -> return fp + Right fp -> return $ T.pack fp return (Image attr ils (newPath, tit)) convertImages _ _ x = return x -- Convert formats which do not work well in pdf to png convertImage :: WriterOptions -> FilePath -> FilePath - -> IO (Either String FilePath) + -> IO (Either Text FilePath) convertImage opts tmpdir fname = do let dpi = show $ writerDpi opts case mime of @@ -202,14 +202,14 @@ convertImage opts tmpdir fname = do then return $ Right pdfOut else return $ Left "conversion from SVG failed") (\(e :: E.SomeException) -> return $ Left $ - "check that rsvg-convert is in path.\n" ++ - show e) + "check that rsvg-convert is in path.\n" <> + tshow e) _ -> JP.readImage fname >>= \res -> case res of - Left e -> return $ Left e + Left e -> return $ Left $ T.pack e Right img -> E.catch (Right pngOut <$ JP.savePngImage pngOut img) $ - \(e :: E.SomeException) -> return (Left (show e)) + \(e :: E.SomeException) -> return (Left (tshow e)) where pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir @@ -262,12 +262,11 @@ missingCharacterWarnings :: Verbosity -> ByteString -> PandocIO () missingCharacterWarnings verbosity log' = do let ls = BC.lines log' let isMissingCharacterWarning = BC.isPrefixOf "Missing character: " - let addCodePoint [] = [] - addCodePoint (c:cs) - | isAscii c = c : addCodePoint cs - | otherwise = c : " (U+" ++ printf "%04X" (ord c) ++ ")" ++ - addCodePoint cs - let warnings = [ addCodePoint (utf8ToString (BC.drop 19 l)) + let toCodePoint c + | isAscii c = T.singleton c + | otherwise = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")" + let addCodePoint = T.concatMap toCodePoint + let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l)) | l <- ls , isMissingCharacterWarning l ] @@ -513,7 +512,7 @@ showVerboseInfo mbTmpDir program programArgs env source = do handlePDFProgramNotFound :: String -> IE.IOError -> IO a handlePDFProgramNotFound program e | IE.isDoesNotExistError e = - E.throwIO $ PandocPDFProgramNotFoundError program + E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program | otherwise = E.throwIO e utf8ToString :: ByteString -> String diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 68e900004..f56b13b66 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ExplicitForAll #-} {-# LANGUAGE FlexibleContexts #-} @@ -7,6 +7,8 @@ {-# LANGUAGE IncoherentInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Parsing Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -18,13 +20,21 @@ A utility library with parsers used in pandoc readers. -} + module Text.Pandoc.Parsing ( takeWhileP, takeP, + countChar, + textStr, anyLine, anyLineNewline, indentWith, + manyChar, + many1Char, + manyTillChar, + many1TillChar, many1Till, manyUntil, + manyUntilChar, sepBy1', notFollowedBy', oneOfStrings, @@ -183,12 +193,14 @@ import Control.Monad.Reader import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isPunctuation, isSpace, ord, toLower, toUpper) import Data.Default -import Data.List (intercalate, isSuffixOf, transpose) +import Data.Functor (($>)) +import Data.List (intercalate, transpose) import qualified Data.Map as M import Data.Maybe (mapMaybe, fromMaybe) import qualified Data.Set as Set import Data.String import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines) @@ -240,45 +252,56 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where mempty = return mempty mappend = (<>) +-- | Like @count@, but packs its result +countChar :: (Stream s m Char, Monad m) + => Int + -> ParsecT s st m Char + -> ParsecT s st m Text +countChar n = fmap T.pack . count n + +-- | Like @string@, but uses @Text@. +textStr :: Stream s m Char => Text -> ParsecT s u m Text +textStr t = string (T.unpack t) $> t + -- | Parse characters while a predicate is true. takeWhileP :: Monad m => (Char -> Bool) - -> ParserT [Char] st m [Char] + -> ParserT Text st m Text takeWhileP f = do -- faster than 'many (satisfy f)' inp <- getInput pos <- getPosition - let (xs, rest) = span f inp + let (xs, rest) = T.span f inp -- needed to persuade parsec that this won't match an empty string: anyChar setInput rest - setPosition $ updatePosString pos xs + setPosition $ updatePosString pos $ T.unpack xs return xs -- Parse n characters of input (or the rest of the input if -- there aren't n characters). -takeP :: Monad m => Int -> ParserT [Char] st m [Char] +takeP :: Monad m => Int -> ParserT Text st m Text takeP n = do guard (n > 0) -- faster than 'count n anyChar' inp <- getInput pos <- getPosition - let (xs, rest) = splitAt n inp + let (xs, rest) = T.splitAt n inp -- needed to persuade parsec that this won't match an empty string: anyChar setInput rest - setPosition $ updatePosString pos xs + setPosition $ updatePosString pos $ T.unpack xs return xs -- | Parse any line of text -anyLine :: Monad m => ParserT [Char] st m [Char] +anyLine :: Monad m => ParserT Text st m Text anyLine = do -- This is much faster than: -- manyTill anyChar newline inp <- getInput pos <- getPosition - case break (=='\n') inp of - (this, '\n':rest) -> do + case T.break (=='\n') inp of + (this, T.uncons -> Just ('\n', rest)) -> do -- needed to persuade parsec that this won't match an empty string: anyChar setInput rest @@ -287,20 +310,39 @@ anyLine = do _ -> mzero -- | Parse any line, include the final newline in the output -anyLineNewline :: Monad m => ParserT [Char] st m [Char] -anyLineNewline = (++ "\n") <$> anyLine +anyLineNewline :: Monad m => ParserT Text st m Text +anyLineNewline = (<> "\n") <$> anyLine -- | Parse indent by specified number of spaces (or equiv. tabs) indentWith :: Stream s m Char => HasReaderOptions st - => Int -> ParserT s st m [Char] + => Int -> ParserT s st m Text indentWith num = do tabStop <- getOption readerTabStop if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) + then countChar num (char ' ') + else choice [ try (countChar num (char ' ')) , try (char '\t' >> indentWith (num - tabStop)) ] +-- | Like @many@, but packs its result. +manyChar :: Stream s m t + => ParserT s st m Char + -> ParserT s st m Text +manyChar = fmap T.pack . many + +-- | Like @many1@, but packs its result. +many1Char :: Stream s m t + => ParserT s st m Char + -> ParserT s st m Text +many1Char = fmap T.pack . many1 + +-- | Like @manyTill@, but packs its result. +manyTillChar :: Stream s m t + => ParserT s st m Char + -> ParserT s st m a + -> ParserT s st m Text +manyTillChar p = fmap T.pack . manyTill p + -- | Like @manyTill@, but reads at least one item. many1Till :: (Show end, Stream s m t) => ParserT s st m a @@ -312,6 +354,13 @@ many1Till p end = do rest <- manyTill p end return (first:rest) +-- | Like @many1Till@, but packs its result +many1TillChar :: (Show end, Stream s m t) + => ParserT s st m Char + -> ParserT s st m end + -> ParserT s st m Text +many1TillChar p = fmap T.pack . many1Till p + -- | Like @manyTill@, but also returns the result of end parser. manyUntil :: ParserT s u m a -> ParserT s u m b @@ -325,6 +374,14 @@ manyUntil p end = scan (xs, e) <- scan return (x:xs, e)) +-- | Like @manyUntil@, but also packs its result. +manyUntilChar :: ParserT s u m Char + -> ParserT s u m b + -> ParserT s u m (Text, b) +manyUntilChar p = fmap go . manyUntil p + where + go (x, y) = (T.pack x, y) + -- | Like @sepBy1@ from Parsec, -- but does not fail if it @sep@ succeeds and @p@ fails. sepBy1' :: ParsecT s u m a @@ -342,14 +399,18 @@ notFollowedBy' p = try $ join $ do a <- try p return (return ()) -- (This version due to Andrew Pimlott on the Haskell mailing list.) -oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String -oneOfStrings' _ [] = Prelude.fail "no strings" -oneOfStrings' matches strs = try $ do +oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text +oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack + +-- TODO: This should be re-implemented in a Text-aware way +oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String +oneOfStrings'' _ [] = Prelude.fail "no strings" +oneOfStrings'' matches strs = try $ do c <- anyChar let strs' = [xs | (x:xs) <- strs, x `matches` c] case strs' of [] -> Prelude.fail "not found" - _ -> (c:) <$> oneOfStrings' matches strs' + _ -> (c:) <$> oneOfStrings'' matches strs' <|> if "" `elem` strs' then return [c] else Prelude.fail "not found" @@ -357,11 +418,14 @@ oneOfStrings' matches strs = try $ do -- | Parses one of a list of strings. If the list contains -- two strings one of which is a prefix of the other, the longer -- string will be matched if possible. -oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String +oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text oneOfStrings = oneOfStrings' (==) -- | Parses one of a list of strings (tried in order), case insensitive. -oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String + +-- TODO: This will not be accurate with general Unicode (neither +-- Text.toLower nor Text.toCaseFold can be implemented with a map) +oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text oneOfStringsCI = oneOfStrings' ciMatch where ciMatch x y = toLower' x == toLower' y -- this optimizes toLower by checking common ASCII case @@ -388,13 +452,13 @@ blankline :: Stream s m Char => ParserT s st m Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: Stream s m Char => ParserT s st m [Char] -blanklines = many1 blankline +blanklines :: Stream s m Char => ParserT s st m Text +blanklines = T.pack <$> many1 blankline -- | Gobble n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT [Char] st m () + => Int -> ParserT Text st m () gobbleSpaces 0 = return () gobbleSpaces n | n < 0 = error "gobbleSpaces called with negative number" @@ -402,18 +466,18 @@ gobbleSpaces n char ' ' <|> eatOneSpaceOfTab gobbleSpaces (n - 1) -eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Char +eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char eatOneSpaceOfTab = do char '\t' tabstop <- getOption readerTabStop inp <- getInput - setInput $ replicate (tabstop - 1) ' ' ++ inp + setInput $ T.replicate (tabstop - 1) " " <> inp return ' ' -- | Gobble up to n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleAtMostSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT [Char] st m Int + => Int -> ParserT Text st m Int gobbleAtMostSpaces 0 = return 0 gobbleAtMostSpaces n | n < 0 = error "gobbleAtMostSpaces called with negative number" @@ -430,23 +494,26 @@ enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = do +stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text +stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack + +stringAnyCase' :: Stream s m Char => String -> ParserT s st m String +stringAnyCase' [] = string "" +stringAnyCase' (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) - rest <- stringAnyCase xs + rest <- stringAnyCase' xs return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. parseFromString :: (Stream s m Char, IsString s) => ParserT s st m r - -> String + -> Text -> ParserT s st m r parseFromString parser str = do oldPos <- getPosition setPosition $ initialPos "chunk" oldInput <- getInput - setInput $ fromString str + setInput $ fromString $ T.unpack str result <- parser spaces eof @@ -458,7 +525,7 @@ parseFromString parser str = do -- This resets 'stateLastStrPos', which is almost always what we want. parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u) => ParserT s u m a - -> String + -> Text -> ParserT s u m a parseFromString' parser str = do oldLastStrPos <- getLastStrPos <$> getState @@ -468,9 +535,9 @@ parseFromString' parser str = do return res -- | Parse raw line block up to and including blank lines. -lineClump :: Monad m => ParserT [Char] st m String +lineClump :: Monad m => ParserT Text st m Text lineClump = blanklines - <|> (unlines <$> many1 (notFollowedBy blankline >> anyLine)) + <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine)) -- | Parse a string of characters between an open character -- and a close character, including text between balanced @@ -478,15 +545,15 @@ lineClump = blanklines -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char - -> ParserT s st m String + -> ParserT s st m Text charsInBalanced open close parser = try $ do char open let isDelim c = c == open || c == close - raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser) + raw <- many $ T.pack <$> many1 (notFollowedBy (satisfy isDelim) >> parser) <|> (do res <- charsInBalanced open close parser - return $ [open] ++ res ++ [close]) + return $ T.singleton open <> res <> T.singleton close) char close - return $ concat raw + return $ T.concat raw -- old charsInBalanced would be: -- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline) @@ -532,10 +599,10 @@ romanNumeral upperCase = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: Stream s m Char => ParserT s st m (String, String) +emailAddress :: Stream s m Char => ParserT s st m (Text, Text) emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) - where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom - in (full, escapeURI $ "mailto:" ++ full) + where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom + in (full, escapeURI $ "mailto:" <> full) mailbox = intercalate "." <$> (emailWord `sepBy1'` dot) domain = intercalate "." <$> (subdomain `sepBy1'` dot) dot = char '.' @@ -553,14 +620,14 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) xs <- many (satisfy isEmailChar) return (x:xs) isEmailChar c = isAlphaNum c || isEmailPunct c - isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;" + isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;" -uriScheme :: Stream s m Char => ParserT s st m String +uriScheme :: Stream s m Char => ParserT s st m Text uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: Stream s m Char => ParserT s st m (String, String) +uri :: Stream s m Char => ParserT s st m (Text, Text) uri = try $ do scheme <- uriScheme char ':' @@ -571,12 +638,12 @@ uri = try $ do -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- as a URL, while NOT picking up the closing paren in -- (http://wikipedia.org). So we include balanced parens in the URL. - str <- concat <$> many1 (uriChunkBetween '(' ')' - <|> uriChunkBetween '{' '}' - <|> uriChunkBetween '[' ']' - <|> uriChunk) - str' <- option str $ char '/' >> return (str ++ "/") - let uri' = scheme ++ ":" ++ fromEntities str' + str <- T.concat <$> many1 (uriChunkBetween '(' ')' + <|> uriChunkBetween '{' '}' + <|> uriChunkBetween '[' ']' + <|> T.pack <$> uriChunk) + str' <- option str $ char '/' >> return (str <> "/") + let uri' = scheme <> ":" <> fromEntities str' return (uri', escapeURI uri') where wordChar = alphaNum <|> oneOf "#$%+/@\\_-&=" @@ -588,51 +655,54 @@ uri = try $ do <|> entity <|> try (punct <* lookAhead (void wordChar <|> void percentEscaped)) uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk - return ([l] ++ chunk ++ [r]) + return (T.pack $ [l] ++ chunk ++ [r]) -mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String +mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text mathInlineWith op cl = try $ do - string op + textStr op when (op == "$") $ notFollowedBy space - words' <- many1Till (count 1 (noneOf " \t\n\\") + words' <- many1Till (countChar 1 (noneOf " \t\n\\") <|> (char '\\' >> -- This next clause is needed because \text{..} can -- contain $, \(\), etc. (try (string "text" >> - (("\\text" ++) <$> inBalancedBraces 0 "")) - <|> (\c -> ['\\',c]) <$> anyChar)) + (("\\text" <>) <$> inBalancedBraces 0 "")) + <|> (\c -> T.pack ['\\',c]) <$> anyChar)) <|> do (blankline <* notFollowedBy' blankline) <|> (oneOf " \t" <* skipMany (oneOf " \t")) notFollowedBy (char '$') return " " - ) (try $ string cl) + ) (try $ textStr cl) notFollowedBy digit -- to prevent capture of $5 - return $ trimMath $ concat words' + return $ trimMath $ T.concat words' where - inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String - inBalancedBraces 0 "" = do + inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text + inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack + + inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String + inBalancedBraces' 0 "" = do c <- anyChar if c == '{' - then inBalancedBraces 1 "{" + then inBalancedBraces' 1 "{" else mzero - inBalancedBraces 0 s = return $ reverse s - inBalancedBraces numOpen ('\\':xs) = do + inBalancedBraces' 0 s = return $ reverse s + inBalancedBraces' numOpen ('\\':xs) = do c <- anyChar - inBalancedBraces numOpen (c:'\\':xs) - inBalancedBraces numOpen xs = do + inBalancedBraces' numOpen (c:'\\':xs) + inBalancedBraces' numOpen xs = do c <- anyChar case c of - '}' -> inBalancedBraces (numOpen - 1) (c:xs) - '{' -> inBalancedBraces (numOpen + 1) (c:xs) - _ -> inBalancedBraces numOpen (c:xs) + '}' -> inBalancedBraces' (numOpen - 1) (c:xs) + '{' -> inBalancedBraces' (numOpen + 1) (c:xs) + _ -> inBalancedBraces' numOpen (c:xs) -mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String -mathDisplayWith op cl = try $ do - string op - many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl) +mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text +mathDisplayWith op cl = try $ fmap T.pack $ do + textStr op + many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl) mathDisplay :: (HasReaderOptions st, Stream s m Char) - => ParserT s st m String + => ParserT s st m Text mathDisplay = (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -641,7 +711,7 @@ mathDisplay = mathDisplayWith "\\\\[" "\\\\]") mathInline :: (HasReaderOptions st , Stream s m Char) - => ParserT s st m String + => ParserT s st m Text mathInline = (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -665,8 +735,8 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. withRaw :: Monad m - => ParsecT [Char] st m a - -> ParsecT [Char] st m (a, [Char]) + => ParsecT Text st m a + -> ParsecT Text st m (a, Text) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -674,11 +744,11 @@ withRaw parser = do pos2 <- getPosition let (l1,c1) = (sourceLine pos1, sourceColumn pos1) let (l2,c2) = (sourceLine pos2, sourceColumn pos2) - let inplines = take ((l2 - l1) + 1) $ lines inp + let inplines = take ((l2 - l1) + 1) $ T.lines inp let raw = case inplines of [] -> "" - [l] -> take (c2 - c1) l - ls -> unlines (init ls) ++ take (c2 - 1) (last ls) + [l] -> T.take (c2 - c1) l + ls -> T.unlines (init ls) <> T.take (c2 - 1) (last ls) return (result, raw) -- | Parses backslash, then applies character parser. @@ -716,7 +786,7 @@ lowerRoman = do decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) decimal = do num <- many1 digit - return (Decimal, fromMaybe 1 $ safeRead num) + return (Decimal, fromMaybe 1 $ safeRead $ T.pack num) -- | Parses a '@' and optional label and -- returns (DefaultStyle, [next example number]). The next @@ -726,10 +796,10 @@ exampleNum :: Stream s m Char => ParserT s ParserState m (ListNumberStyle, Int) exampleNum = do char '@' - lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) + lab <- T.pack <$> many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) st <- getState let num = stateNextExample st - let newlabels = if null lab + let newlabels = if T.null lab then stateExamples st else M.insert lab num $ stateExamples st updateState $ \s -> s{ stateNextExample = num + 1 @@ -825,25 +895,25 @@ orderedListMarker style delim = do charRef :: Stream s m Char => ParserT s st m Inline charRef = do c <- characterReference - return $ Str [c] + return $ Str $ T.singleton c -lineBlockLine :: Monad m => ParserT [Char] st m String +lineBlockLine :: Monad m => ParserT Text st m Text lineBlockLine = try $ do char '|' char ' ' - white <- many (spaceChar >> return '\160') + white <- T.pack <$> many (spaceChar >> return '\160') notFollowedBy newline line <- anyLine continuations <- many (try $ char ' ' >> anyLine) - return $ white ++ unwords (line : continuations) + return $ white <> T.unwords (line : continuations) blankLineBlockLine :: Stream s m Char => ParserT s st m Char blankLineBlockLine = try (char '|' >> blankline) -- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Monad m => ParserT [Char] st m [String] +lineBlockLines :: Monad m => ParserT Text st m [Text] lineBlockLines = try $ do - lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine)) + lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine)) skipMany blankline return lines' @@ -927,9 +997,9 @@ gridTableWith' blocks headless = tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter -gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine :: [Int] -> Text -> [Text] gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line + splitTextByIndices (init indices) $ trimr line gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment) gridPart ch = do @@ -949,9 +1019,10 @@ gridPart ch = do gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse +removeFinalBar :: Text -> Text +removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|') + where + go c = T.any (== c) " \t" -- | Separator between rows of grid table. gridTableSep :: Stream s m Char => Char -> ParserT s st m Char @@ -969,7 +1040,7 @@ gridTableHeader headless blocks = try $ do then return $ repeat "" else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> - many1Till anyChar newline) + T.pack <$> many1Till anyChar newline) underDashes <- if headless then return dashes else gridDashedLines '=' @@ -979,16 +1050,16 @@ gridTableHeader headless blocks = try $ do let aligns = map snd underDashes let rawHeads = if headless then replicate (length underDashes) "" - else map (unlines . map trim) $ transpose + else map (T.unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads return (heads, aligns, indices) -gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String] +gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline - return (gridTableSplitLine indices line) + return (gridTableSplitLine indices $ T.pack line) -- | Parse row of grid table. gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st) @@ -997,7 +1068,7 @@ gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st) -> ParserT s st m (mf [Blocks]) gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ + let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $ transpose colLines compactifyCell bs = case compactify [bs] of [] -> mempty @@ -1005,40 +1076,41 @@ gridTableRow blocks indices = do cells <- sequence <$> mapM (parseFromString' blocks) cols return $ fmap (map compactifyCell) cells -removeOneLeadingSpace :: [String] -> [String] +removeOneLeadingSpace :: [Text] -> [Text] removeOneLeadingSpace xs = if all startsWithSpace xs - then map (drop 1) xs + then map (T.drop 1) xs else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' + where startsWithSpace t = case T.uncons t of + Nothing -> True + Just (c, _) -> c == ' ' -- | Parse footer for a grid table. -gridTableFooter :: Stream s m Char => ParserT s st m [Char] +gridTableFooter :: Stream s m Char => ParserT s st m Text gridTableFooter = blanklines --- -- | Removes the ParsecT layer from the monad transformer stack -readWithM :: (Stream s m Char, ToString s) +readWithM :: (Stream s m Char, ToText s) => ParserT s st m a -- ^ parser -> st -- ^ initial state -> s -- ^ input -> m (Either PandocError a) readWithM parser state input = - mapLeft (PandocParsecError $ toString input) `liftM` runParserT parser state "source" input + mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input -- | Parse a string with a given parser and state -readWith :: Parser [Char] st a +readWith :: Parser Text st a -> st - -> String + -> Text -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp -- | Parse a string with @parser@ (for testing). testStringWith :: Show a - => ParserT [Char] ParserState Identity a - -> [Char] + => ParserT Text ParserState Identity a + -> Text -> IO () testStringWith parser str = UTF8.putStrLn $ show $ readWith parser defaultParserState str @@ -1057,23 +1129,23 @@ data ParserState = ParserState stateSubstitutions :: SubstTable, -- ^ List of substitution references stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) - stateNoteRefs :: Set.Set String, -- ^ List of note references used + stateNoteRefs :: Set.Set Text, -- ^ List of note references used stateMeta :: Meta, -- ^ Document metadata stateMeta' :: F Meta, -- ^ Document metadata - stateCitations :: M.Map String String, -- ^ RST-style citations + stateCitations :: M.Map Text Text, -- ^ RST-style citations stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateIdentifiers :: Set.Set String, -- ^ Header identifiers used + stateIdentifiers :: Set.Set Text, -- ^ Header identifiers used stateNextExample :: Int, -- ^ Number of next example - stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers + stateExamples :: M.Map Text Int, -- ^ Map from example labels to numbers stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far - stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role - stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles + stateRstDefaultRole :: Text, -- ^ Current rST default interpreted text role + stateRstCustomRoles :: M.Map Text (Text, Maybe Text, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: -- roles), 3) Additional classes (rest of Attr is unused)). stateCaption :: Maybe Inlines, -- ^ Caption in current environment - stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed + stateInHtmlBlock :: Maybe Text, -- ^ Tag type of HTML block being parsed stateFencedDivLevel :: Int, -- ^ Depth of fenced div - stateContainers :: [String], -- ^ parent include files + stateContainers :: [Text], -- ^ parent include files stateLogMessages :: [LogMessage], -- ^ log messages stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context } @@ -1112,8 +1184,8 @@ instance Monad m => HasQuoteContext ParserState m where return result class HasIdentifierList st where - extractIdentifierList :: st -> Set.Set String - updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st + extractIdentifierList :: st -> Set.Set Text + updateIdentifierList :: (Set.Set Text -> Set.Set Text) -> st -> st instance HasIdentifierList ParserState where extractIdentifierList = stateIdentifiers @@ -1144,8 +1216,8 @@ instance HasLogMessages ParserState where getLogMessages st = reverse $ stateLogMessages st class HasIncludeFiles st where - getIncludeFiles :: st -> [String] - addIncludeFile :: String -> st -> st + getIncludeFiles :: st -> [Text] + addIncludeFile :: Text -> st -> st dropLatestIncludeFile :: st -> st instance HasIncludeFiles ParserState where @@ -1232,17 +1304,21 @@ data QuoteContext | NoQuote -- ^ Used when not parsing inside quotes deriving (Eq, Show) -type NoteTable = [(String, String)] +type NoteTable = [(Text, Text)] -type NoteTable' = M.Map String (SourcePos, F Blocks) +type NoteTable' = M.Map Text (SourcePos, F Blocks) -- used in markdown reader -newtype Key = Key String deriving (Show, Read, Eq, Ord) +newtype Key = Key Text deriving (Show, Read, Eq, Ord) -toKey :: String -> Key -toKey = Key . map toLower . unwords . words . unbracket - where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs - unbracket xs = xs +toKey :: Text -> Key +toKey = Key . T.toLower . T.unwords . T.words . unbracket + where unbracket t + | Just ('[', t') <- T.uncons t + , Just (t'', ']') <- T.unsnoc t' + = t'' + | otherwise + = t type KeyTable = M.Map Key (Target, Attr) @@ -1261,17 +1337,17 @@ registerHeader :: (Stream s m a, HasReaderOptions st, registerHeader (ident,classes,kvs) header' = do ids <- extractIdentifierList <$> getState exts <- getOption readerExtensions - if null ident && Ext_auto_identifiers `extensionEnabled` exts + if T.null ident && Ext_auto_identifiers `extensionEnabled` exts then do let id' = uniqueIdent exts (B.toList header') ids let id'' = if Ext_ascii_identifiers `extensionEnabled` exts - then mapMaybe toAsciiChar id' + then T.pack $ mapMaybe toAsciiChar $ T.unpack id' else id' updateState $ updateIdentifierList $ Set.insert id' updateState $ updateIdentifierList $ Set.insert id'' return (id'',classes,kvs) else do - unless (null ident) $ do + unless (T.null ident) $ do when (ident `Set.member` ids) $ do pos <- getPosition logMessage $ DuplicateIdentifier ident pos @@ -1314,7 +1390,7 @@ failIfInQuoteContext context = do context' <- getQuoteContext when (context' == context) $ Prelude.fail "already inside quotes" -charOrRef :: Stream s m Char => String -> ParserT s st m Char +charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) @@ -1379,7 +1455,7 @@ nested p = do return res citeKey :: (Stream s m Char, HasLastStrPosition st) - => ParserT s st m (Bool, String) + => ParserT s st m (Bool, Text) citeKey = try $ do guard =<< notAfterString suppress_author <- option False (True <$ char '-') @@ -1390,15 +1466,15 @@ citeKey = try $ do rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> try (oneOf ":/" <* lookAhead (char '/')) let key = firstChar:rest - return (suppress_author, key) + return (suppress_author, T.pack key) token :: (Stream s m t) - => (t -> String) + => (t -> Text) -> (t -> SourcePos) -> (t -> Maybe a) -> ParsecT s st m a -token pp pos match = tokenPrim pp (\_ t _ -> pos t) match +token pp pos match = tokenPrim (T.unpack . pp) (\_ t _ -> pos t) match infixr 5 <+?> (<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a @@ -1409,27 +1485,27 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') where ident' = fromMaybe ident (lookup "id" kvs) cls' = case lookup "class" kvs of - Just cl -> words cl + Just cl -> T.words cl Nothing -> cls kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st) - => ParserT [a] st m (mf Blocks) - -> (String -> [a]) + => ParserT a st m (mf Blocks) + -> (Text -> a) -> [FilePath] -> FilePath - -> ParserT [a] st m (mf Blocks) + -> ParserT a st m (mf Blocks) insertIncludedFile' blocks totoks dirs f = do oldPos <- getPosition oldInput <- getInput containers <- getIncludeFiles <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show oldPos - updateState $ addIncludeFile f + when (T.pack f `elem` containers) $ + throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show oldPos + updateState $ addIncludeFile $ T.pack f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of Just s -> return s Nothing -> do - report $ CouldNotLoadIncludeFile f oldPos + report $ CouldNotLoadIncludeFile (T.pack f) oldPos return "" setPosition $ newPos f 1 1 setInput $ totoks contents @@ -1443,7 +1519,7 @@ insertIncludedFile' blocks totoks dirs f = do -- @PandocParseError@. insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) => ParserT [a] st m Blocks - -> (String -> [a]) + -> (Text -> [a]) -> [FilePath] -> FilePath -> ParserT [a] st m Blocks insertIncludedFile blocks totoks dirs f = @@ -1452,7 +1528,7 @@ insertIncludedFile blocks totoks dirs f = -- | Parse content of include file as future blocks. Circular includes result in -- an @PandocParseError@. insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) - => ParserT String st m (Future st Blocks) + => ParserT Text st m (Future st Blocks) -> [FilePath] -> FilePath - -> ParserT String st m (Future st Blocks) + -> ParserT Text st m (Future st Blocks) insertIncludedFileF p = insertIncludedFile' p id diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 3ad479287..461f7f4d9 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -1,6 +1,7 @@ {-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -59,8 +60,8 @@ import Control.Monad (unless) import Control.Monad.Except (throwError) import Data.Aeson import qualified Data.ByteString.Lazy as BL -import Data.List (intercalate) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Error @@ -99,7 +100,7 @@ data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc) | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) -- | Association list of formats and readers. -readers :: PandocMonad m => [(String, Reader m)] +readers :: PandocMonad m => [(Text, Reader m)] readers = [ ("native" , TextReader readNative) ,("json" , TextReader readJSON) ,("markdown" , TextReader readMarkdown) @@ -135,11 +136,11 @@ readers = [ ("native" , TextReader readNative) ] -- | Retrieve reader, extensions based on formatSpec (format+extensions). -getReader :: PandocMonad m => String -> m (Reader m, Extensions) +getReader :: PandocMonad m => Text -> m (Reader m, Extensions) getReader s = case parseFormatSpec s of Left e -> throwError $ PandocAppError - $ intercalate "\n" [m | Message m <- errorMessages e] + $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e] Right (readerName, extsToEnable, extsToDisable) -> case lookup readerName readers of Nothing -> throwError $ PandocUnknownReaderError @@ -154,7 +155,7 @@ getReader s = unless (extensionEnabled ext allExts) $ throwError $ PandocUnsupportedExtensionError - (drop 4 $ show ext) readerName) + (T.drop 4 $ T.pack $ show ext) readerName) (extsToEnable ++ extsToDisable) return (r, exts) diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 368c86d4f..40b6f77c9 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.CommonMark Copyright : Copyright (C) 2015-2019 John MacFarlane @@ -18,9 +20,9 @@ where import Prelude import CMarkGFM import Control.Monad.State -import Data.List (groupBy) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojiToInline) @@ -40,24 +42,24 @@ readCommonMark opts s = return $ [ extTable | isEnabled Ext_pipe_tables opts ] ++ [ extAutolink | isEnabled Ext_autolink_bare_uris opts ] -convertEmojis :: String -> [Inline] -convertEmojis s@(':':xs) = - case break (==':') xs of - (ys,':':zs) -> +convertEmojis :: Text -> [Inline] +convertEmojis s@(T.uncons -> Just (':',xs)) = + case T.break (==':') xs of + (ys, T.uncons -> Just (':',zs)) -> case emojiToInline ys of Just em -> em : convertEmojis zs - Nothing -> Str (':' : ys) : convertEmojis (':':zs) + Nothing -> Str (":" <> ys) : convertEmojis (":" <> zs) _ -> [Str s] convertEmojis s = - case break (==':') s of + case T.break (==':') s of ("","") -> [] (_,"") -> [Str s] - (xs,ys) -> Str xs:convertEmojis ys + (xs,ys) -> Str xs : convertEmojis ys addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty -addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block +addHeaderId :: ReaderOptions -> Block -> State (Set.Set Text) Block addHeaderId opts (Header lev (_,classes,kvs) ils) = do ids <- get let ident = uniqueIdent (readerExtensions opts) ils ids @@ -82,14 +84,14 @@ addBlock _ (Node _ THEMATIC_BREAK _) = addBlock opts (Node _ BLOCK_QUOTE nodes) = (BlockQuote (addBlocks opts nodes) :) addBlock opts (Node _ (HTML_BLOCK t) _) - | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :) + | isEnabled Ext_raw_html opts = (RawBlock (Format "html") t :) | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = id addBlock _ (Node _ (CODE_BLOCK info t) _) = - (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) + (CodeBlock ("", take 1 (T.words info), []) t :) addBlock opts (Node _ (HEADING lev) nodes) = (Header lev ("",[],[]) (addInlines opts nodes) :) addBlock opts (Node _ (LIST listAttrs) nodes) = @@ -176,29 +178,28 @@ addInlines opts = foldr (addInline opts) [] addInline :: ReaderOptions -> Node -> [Inline] -> [Inline] addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++) - where raw = unpack t - clumps = groupBy samekind raw + where clumps = T.groupBy samekind t samekind ' ' ' ' = True samekind ' ' _ = False samekind _ ' ' = False samekind _ _ = True - toinl (' ':_) = [Space] - toinl xs = if isEnabled Ext_emoji opts - then convertEmojis xs - else [Str xs] + toinl (T.uncons -> Just (' ', _)) = [Space] + toinl xs = if isEnabled Ext_emoji opts + then convertEmojis xs + else [Str xs] addInline _ (Node _ LINEBREAK _) = (LineBreak :) addInline opts (Node _ SOFTBREAK _) | isEnabled Ext_hard_line_breaks opts = (LineBreak :) | otherwise = (SoftBreak :) addInline opts (Node _ (HTML_INLINE t) _) - | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :) + | isEnabled Ext_raw_html opts = (RawInline (Format "html") t :) | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = id addInline _ (Node _ (CODE t) _) = - (Code ("",[],[]) (unpack t) :) + (Code ("",[],[]) t :) addInline opts (Node _ EMPH nodes) = (Emph (addInlines opts nodes) :) addInline opts (Node _ STRONG nodes) = @@ -206,7 +207,7 @@ addInline opts (Node _ STRONG nodes) = addInline opts (Node _ STRIKETHROUGH nodes) = (Strikeout (addInlines opts nodes) :) addInline opts (Node _ (LINK url title) nodes) = - (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :) + (Link nullAttr (addInlines opts nodes) (url, title) :) addInline opts (Node _ (IMAGE url title) nodes) = - (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :) + (Image nullAttr (addInlines opts nodes) (url, title) :) addInline _ _ = id diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index ceb63ac84..1aa1dfaa4 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -19,6 +19,7 @@ import Control.Monad.Except (guard, liftM2, throwError) import qualified Data.Foldable as F import Data.Maybe (fromMaybe) import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.Definition @@ -70,7 +71,6 @@ parseCreole = do eof return $ B.doc bs - -- -- block parsers -- @@ -92,9 +92,9 @@ nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart >> manyTill content nowikiEnd) where content = brackets <|> line - brackets = try $ option "" ((:[]) <$> newline) - <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol) - line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol + brackets = try $ option "" (T.singleton <$> newline) + <+> (char ' ' >> (manyChar (char ' ') <+> textStr "}}}") <* eol) + line = option "" (T.singleton <$> newline) <+> manyTillChar anyChar eol eol = lookAhead $ try $ nowikiEnd <|> newline nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline @@ -106,7 +106,7 @@ header = try $ do fmap length (many1 (char '=')) guard $ level <= 6 skipSpaces - content <- B.str <$> manyTill (noneOf "\n") headerEnd + content <- B.str <$> manyTillChar (noneOf "\n") headerEnd return $ B.header level content where headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline @@ -204,7 +204,7 @@ inline = choice [ whitespace escapedChar :: PandocMonad m => CRLParser m B.Inlines escapedChar = - fmap (B.str . (:[])) (try $ char '~' >> noneOf "\t\n ") + fmap (B.str . T.singleton) (try $ char '~' >> noneOf "\t\n ") escapedLink :: PandocMonad m => CRLParser m B.Inlines escapedLink = try $ do @@ -217,8 +217,8 @@ image = try $ do (orig, src) <- wikiImg return $ B.image src "" (B.str orig) where - linkSrc = many $ noneOf "|}\n\r\t" - linkDsc = char '|' >> many (noneOf "}\n\r\t") + linkSrc = manyChar $ noneOf "|}\n\r\t" + linkDsc = char '|' >> manyChar (noneOf "}\n\r\t") wikiImg = try $ do string "{{" src <- linkSrc @@ -231,11 +231,11 @@ link = try $ do (orig, src) <- uriLink <|> wikiLink return $ B.link src "" orig where - linkSrc = many $ noneOf "|]\n\r\t" - linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines + linkSrc = manyChar $ noneOf "|]\n\r\t" + linkDsc :: PandocMonad m => Text -> CRLParser m B.Inlines linkDsc otxt = B.str <$> try (option otxt - (char '|' >> many (noneOf "]\n\r\t"))) + (char '|' >> manyChar (noneOf "]\n\r\t"))) linkImg = try $ char '|' >> image wikiLink = try $ do string "[[" @@ -248,7 +248,7 @@ link = try $ do return (B.str orig, src) inlineNowiki :: PandocMonad m => CRLParser m B.Inlines -inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end) +inlineNowiki = B.code <$> (start >> manyTillChar (noneOf "\n\r") end) where start = try $ string "{{{" end = try $ string "}}}" >> lookAhead (noneOf "}") @@ -271,11 +271,11 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) innerNewline = return B.space symbol :: PandocMonad m => CRLParser m B.Inlines -symbol = fmap (B.str . (:[])) (oneOf specialChars) +symbol = fmap (B.str . T.singleton) (oneOf specialChars) str :: PandocMonad m => CRLParser m B.Inlines str = let strChar = noneOf ("\t\n " ++ specialChars) in - fmap B.str (many1 strChar) + fmap B.str (many1Char strChar) bold :: PandocMonad m => CRLParser m B.Inlines bold = B.strong . mconcat <$> diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 3f44f83f8..ade9d27a3 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.DocBook Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -536,20 +537,22 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions - $ T.unpack $ crFilter inp + let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. -- Other xml instructions are simply removed from the input stream. -handleInstructions :: String -> String -handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs -handleInstructions xs = case break (=='<') xs of +handleInstructions :: Text -> Text +handleInstructions = T.pack . handleInstructions' . T.unpack + +handleInstructions' :: String -> String +handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs +handleInstructions' xs = case break (=='<') xs of (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs + ([], '<':zs) -> '<' : handleInstructions' zs + (ys, zs) -> ys ++ handleInstructions' zs getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do @@ -580,13 +583,13 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr elt = - fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function -named :: String -> Element -> Bool -named s e = qName (elName e) == s +named :: Text -> Element -> Bool +named s e = qName (elName e) == T.unpack s -- @@ -611,7 +614,7 @@ addMetadataFromElement e = do [z] -> getInlines z >>= addMeta fieldname zs -> mapM getInlines zs >>= addMeta fieldname -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m () addMeta field val = modify (setMeta field val) instance HasMeta DBState where @@ -638,10 +641,8 @@ admonitionTags :: [String] admonitionTags = ["important","caution","note","tip","warning"] -- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs +trimNl :: Text -> Text +trimNl = T.dropAround (== '\n') -- meld text into beginning of first paragraph of Blocks. -- assumes Blocks start with a Para; if not, does nothing. @@ -668,7 +669,7 @@ getMediaobject e = do h = case atVal "depth" of "" -> [] d -> [("height", d)] - atr = (atVal "id", words $ atVal "role", w ++ h) + atr = (atVal "id", T.words $ atVal "role", w ++ h) in return (atVal "fileref", atr) let getCaption el = case filterChild (\x -> named "caption" x || named "textobject" x @@ -691,8 +692,8 @@ parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x + else return $ plain $ trimInlines $ text $ T.pack s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x parseBlock (Elem e) = case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated @@ -740,7 +741,7 @@ parseBlock (Elem e) = "refsect2" -> sect 2 "refsect3" -> sect 3 "refsection" -> gets dbSectionLevel >>= sect . (+1) - l@_ | l `elem` admonitionTags -> parseAdmonition l + l@_ | l `elem` admonitionTags -> parseAdmonition $ T.pack l "area" -> skip "areaset" -> skip "areaspec" -> skip @@ -800,7 +801,7 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - lift $ report $ IgnoredElement $ qName (elName e) + lift $ report $ IgnoredElement $ T.pack $ qName (elName e) return mempty parseMixed container conts = do @@ -818,7 +819,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e + $ trimNl $ T.pack $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -871,9 +872,9 @@ parseBlock (Elem e) = _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> + $ safeRead $ "0" <> T.filter (\x -> (x >= '0' && x <= '9') - || x == '.') w + || x == '.') (T.pack w) Nothing -> 0 :: Double let numrows = case bodyrows of [] -> 0 @@ -938,9 +939,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text s +parseInline (Text (CData _ s _)) = return $ text $ T.pack s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) text $ lookupEntity ref + return $ maybe (text $ T.toUpper $ T.pack ref) (text . T.pack) $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of "equation" -> equation e displayMath @@ -980,7 +981,7 @@ parseInline (Elem e) = "constant" -> codeWithLang "userinput" -> codeWithLang "varargs" -> return $ code "(...)" - "keycap" -> return (str $ strContent e) + "keycap" -> return (str $ T.pack $ strContent e) "keycombo" -> keycombo <$> mapM parseInline (elContent e) "menuchoice" -> menuchoice <$> @@ -992,20 +993,20 @@ parseInline (Elem e) = let title = case attrValue "endterm" e of "" -> maybe "???" xrefTitleByElem (findElementById linkend content) - endterm -> maybe "???" strContent + endterm -> maybe "???" (T.pack . strContent) (findElementById endterm content) - return $ link ('#' : linkend) "" (text title) - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e + return $ link ("#" <> linkend) "" (text title) + "email" -> return $ link ("mailto:" <> T.pack (strContent e)) "" + $ str $ T.pack $ strContent e + "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e "ulink" -> link (attrValue "url" e) "" <$> innerInlines "link" -> do ils <- innerInlines let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> '#' : attrValue "linkend" e + Just h -> T.pack h + _ -> "#" <> attrValue "linkend" e let ils' = if ils == mempty then str href else ils - let attr = (attrValue "id" e, words $ attrValue "role" e, []) + let attr = (attrValue "id" e, T.words $ attrValue "role" e, []) return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of @@ -1023,7 +1024,7 @@ parseInline (Elem e) = "br" -> return linebreak _ -> skip >> innerInlines where skip = do - lift $ report $ IgnoredElement $ qName (elName e) + lift $ report $ IgnoredElement $ T.pack $ qName (elName e) return mempty innerInlines = (trimInlines . mconcat) <$> @@ -1032,7 +1033,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -1062,8 +1063,8 @@ parseInline (Elem e) = -- if there's no such attribute, employ some heuristics based on what -- docbook-xsl does. xrefTitleByElem el - | not (null xrefLabel) = xrefLabel - | otherwise = case qName (elName el) of + | not (T.null xrefLabel) = xrefLabel + | otherwise = case qName (elName el) of "chapter" -> descendantContent "title" el "section" -> descendantContent "title" el "sect1" -> descendantContent "title" el @@ -1073,10 +1074,10 @@ parseInline (Elem e) = "sect5" -> descendantContent "title" el "cmdsynopsis" -> descendantContent "command" el "funcsynopsis" -> descendantContent "function" el - _ -> qName (elName el) ++ "_title" + _ -> T.pack $ qName (elName el) ++ "_title" where xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" strContent + descendantContent name = maybe "???" (T.pack . strContent) . filterElementName (\n -> qName n == name) -- | Extract a math equation from an element @@ -1088,20 +1089,20 @@ equation :: Monad m => Element -- ^ The element from which to extract a mathematical equation - -> (String -> Inlines) + -> (Text -> Inlines) -- ^ A constructor for some Inlines, taking the TeX code as input -> m Inlines equation e constructor = - return $ mconcat $ map constructor $ mathMLEquations ++ latexEquations + return $ mconcat $ map constructor $ mathMLEquations <> latexEquations where - mathMLEquations :: [String] + mathMLEquations :: [Text] mathMLEquations = map writeTeX $ rights $ readMath (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") - (readMathML . showElement) + (readMathML . T.pack . showElement) - latexEquations :: [String] + latexEquations :: [Text] latexEquations = readMath (\x -> qName (elName x) == "mathphrase") - (concat . fmap showVerbatimCData . elContent) + (T.concat . fmap showVerbatimCData . elContent) readMath :: (Element -> Bool) -> (Element -> b) -> [b] readMath childPredicate fromElement = @@ -1111,9 +1112,10 @@ equation e constructor = -- | Get the actual text stored in a CData block. 'showContent' -- returns the text still surrounded by the [[CDATA]] tags. -showVerbatimCData :: Content -> String -showVerbatimCData (Text (CData _ d _)) = d -showVerbatimCData c = showContent c +showVerbatimCData :: Content -> Text +showVerbatimCData (Text (CData _ d _)) = T.pack d +showVerbatimCData c = T.pack $ showContent c + -- | Set the prefix of a name to 'Nothing' removePrefix :: QName -> QName diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 387c3c7e2..cd4ff01db 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -4,6 +4,7 @@ {-# LANGUAGE PatternGuards #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Docx Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -68,12 +69,12 @@ import Data.Default (Default) import Data.List (delete, intersect) import Data.Char (isSpace) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe (isJust, fromMaybe) import Data.Sequence (ViewL (..), viewl) import qualified Data.Sequence as Seq import qualified Data.Set as Set import Text.Pandoc.Builder --- import Text.Pandoc.Definition import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Options import Text.Pandoc.Readers.Docx.Combine @@ -101,14 +102,14 @@ readDocx opts bytes readDocx _ _ = throwError $ PandocSomeError "couldn't parse docx file" -data DState = DState { docxAnchorMap :: M.Map String String - , docxAnchorSet :: Set.Set String - , docxImmedPrevAnchor :: Maybe String +data DState = DState { docxAnchorMap :: M.Map T.Text T.Text + , docxAnchorSet :: Set.Set T.Text + , docxImmedPrevAnchor :: Maybe T.Text , docxMediaBag :: MediaBag , docxDropCap :: Inlines -- keep track of (numId, lvl) values for -- restarting - , docxListState :: M.Map (String, String) Integer + , docxListState :: M.Map (T.Text, T.Text) Integer , docxPrevPara :: Inlines } @@ -142,7 +143,7 @@ spansToKeep = [] divsToKeep :: [ParaStyleName] divsToKeep = ["Definition", "Definition Term"] -metaStyles :: M.Map ParaStyleName String +metaStyles :: M.Map ParaStyleName T.Text metaStyles = M.fromList [ ("Title", "title") , ("Subtitle", "subtitle") , ("Author", "author") @@ -167,7 +168,7 @@ isEmptyPar (Paragraph _ parParts) = isEmptyElem _ = True isEmptyPar _ = False -bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue) +bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue) bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp @@ -232,22 +233,22 @@ runElemToInlines Tab = space runElemToInlines SoftHyphen = text "\xad" runElemToInlines NoBreakHyphen = text "\x2011" -runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString LnBrk = ['\n'] -runElemToString Tab = ['\t'] -runElemToString SoftHyphen = ['\xad'] -runElemToString NoBreakHyphen = ['\x2011'] +runElemToText :: RunElem -> T.Text +runElemToText (TextRun s) = s +runElemToText LnBrk = T.singleton '\n' +runElemToText Tab = T.singleton '\t' +runElemToText SoftHyphen = T.singleton '\xad' +runElemToText NoBreakHyphen = T.singleton '\x2011' -runToString :: Run -> String -runToString (Run _ runElems) = concatMap runElemToString runElems -runToString _ = "" +runToText :: Run -> T.Text +runToText (Run _ runElems) = T.concat $ map runElemToText runElems +runToText _ = "" -parPartToString :: ParPart -> String -parPartToString (PlainRun run) = runToString run -parPartToString (InternalHyperLink _ runs) = concatMap runToString runs -parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs -parPartToString _ = "" +parPartToText :: ParPart -> T.Text +parPartToText (PlainRun run) = runToText run +parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText _ = "" blacklistedCharStyles :: [CharStyleName] blacklistedCharStyles = ["Hyperlink"] @@ -310,7 +311,7 @@ runToInlines :: PandocMonad m => Run -> DocxContext m Inlines runToInlines (Run rs runElems) | maybe False isCodeCharStyle $ rParentStyle rs = do rPr <- resolveDependentRunStyle rs - let codeString = code $ concatMap runElemToString runElems + let codeString = code $ T.concat $ map runElemToText runElems return $ case rVertAlign rPr of Just SupScrpt -> superscript codeString Just SubScrpt -> subscript codeString @@ -328,17 +329,17 @@ runToInlines (Endnote bps) = do return $ note blksList runToInlines (InlineDrawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt + return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" extentToAttr :: Extent -> Attr extentToAttr (Just (w, h)) = ("", [], [("width", showDim w), ("height", showDim h)] ) where - showDim d = show (d / 914400) ++ "in" + showDim d = tshow (d / 914400) <> "in" extentToAttr _ = nullAttr -blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines +blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines blocksToInlinesWarn cmtId blks = do let blkList = toList blks notParaOrPlain :: Block -> Bool @@ -347,7 +348,7 @@ blocksToInlinesWarn cmtId blks = do notParaOrPlain _ = True unless ( not (any notParaOrPlain blkList)) $ lift $ P.report $ DocxParserWarning $ - "Docx comment " ++ cmtId ++ " will not retain formatting" + "Docx comment " <> cmtId <> " will not retain formatting" return $ blocksToInlines' blkList -- The majority of work in this function is done in the primed @@ -440,12 +441,12 @@ parPartToInlines' (BookMark _ anchor) = return $ spanWith (newAnchor, ["anchor"], []) mempty parPartToInlines' (Drawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt + return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt parPartToInlines' Chart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" parPartToInlines' (InternalHyperLink anchor runs) = do ils <- smushInlines <$> mapM runToInlines runs - return $ link ('#' : anchor) "" ils + return $ link ("#" <> anchor) "" ils parPartToInlines' (ExternalHyperLink target runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link target "" ils @@ -463,7 +464,7 @@ isAnchorSpan (Span (_, classes, kvs) _) = null kvs isAnchorSpan _ = False -dummyAnchors :: [String] +dummyAnchors :: [T.Text] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks @@ -477,7 +478,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) , (Span (anchIdent, ["anchor"], _) cIls) <- c = do hdrIDMap <- gets docxAnchorMap exts <- readerExtensions <$> asks docxOptions - let newIdent = if null ident + let newIdent = if T.null ident then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident newIls = concatMap f ils where f il | il == c = cIls @@ -490,7 +491,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap exts <- readerExtensions <$> asks docxOptions - let newIdent = if null ident + let newIdent = if T.null ident then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} @@ -558,8 +559,8 @@ parStyleToTransform pPr else transform parStyleToTransform _ = return id -normalizeToClassName :: (FromStyleName a) => a -> String -normalizeToClassName = map go . fromStyleName +normalizeToClassName :: (FromStyleName a) => a -> T.Text +normalizeToClassName = T.map go . fromStyleName where go c | isSpace c = '-' | otherwise = c @@ -574,7 +575,8 @@ bodyPartToBlocks (Paragraph pPr parparts) return $ transform $ codeBlock $ - concatMap parPartToString parparts + T.concat $ + map parPartToText parparts | Just (style, n) <- pHeading pPr = do ils <-local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) @@ -646,7 +648,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do , ("num-id", numId) , ("format", fmt) , ("text", txt) - , ("start", show start) + , ("start", tshow start) ] modify $ \st -> st{ docxListState = -- expire all the continuation data for lists of level > this one: @@ -705,12 +707,12 @@ bodyPartToBlocks (OMathPara e) = -- replace targets with generated anchors. rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline -rewriteLink' l@(Link attr ils ('#':target, title)) = do +rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do anchorMap <- gets docxAnchorMap case M.lookup target anchorMap of Just newTarget -> do modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)} - return $ Link attr ils ('#':newTarget, title) + return $ Link attr ils ("#" <> newTarget, title) Nothing -> do modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)} return l diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index da40a80ea..82791d669 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Combine Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>, diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index e7a916f1c..05d9dd697 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Fields Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..) ) where import Prelude +import Data.Functor (($>)) +import qualified Data.Text as T import Text.Parsec -import Text.Parsec.String (Parser) +import Text.Parsec.Text (Parser) -type URL = String +type URL = T.Text data FieldInfo = HyperlinkField URL | UnknownField deriving (Show) -parseFieldInfo :: String -> Either ParseError FieldInfo +parseFieldInfo :: T.Text -> Either ParseError FieldInfo parseFieldInfo = parse fieldInfo "" fieldInfo :: Parser FieldInfo @@ -34,31 +37,31 @@ fieldInfo = <|> return UnknownField -escapedQuote :: Parser String -escapedQuote = string "\\\"" +escapedQuote :: Parser T.Text +escapedQuote = string "\\\"" $> "\\\"" -inQuotes :: Parser String +inQuotes :: Parser T.Text inQuotes = - (try escapedQuote) <|> (anyChar >>= (\c -> return [c])) + (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c)) -quotedString :: Parser String +quotedString :: Parser T.Text quotedString = do char '"' - concat <$> manyTill inQuotes (try (char '"')) + T.concat <$> manyTill inQuotes (try (char '"')) -unquotedString :: Parser String -unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof) +unquotedString :: Parser T.Text +unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof) -fieldArgument :: Parser String +fieldArgument :: Parser T.Text fieldArgument = quotedString <|> unquotedString -- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25 -hyperlinkSwitch :: Parser (String, String) +hyperlinkSwitch :: Parser (T.Text, T.Text) hyperlinkSwitch = do sw <- string "\\l" spaces farg <- fieldArgument - return (sw, farg) + return (T.pack sw, farg) hyperlink :: Parser URL hyperlink = do @@ -68,6 +71,6 @@ hyperlink = do farg <- fieldArgument switches <- spaces *> many hyperlinkSwitch let url = case switches of - ("\\l", s) : _ -> farg ++ ('#': s) + ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index eb24640c5..b7b7a3835 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -22,6 +22,7 @@ import Prelude import Data.List import Data.Maybe import Data.String (fromString) +import qualified Data.Text as T import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.JSON import Text.Pandoc.Readers.Docx.Parse (ParaStyleName) @@ -45,20 +46,20 @@ getNumId _ = Nothing getNumIdN :: Block -> Integer getNumIdN b = fromMaybe (-1) (getNumId b) -getText :: Block -> Maybe String +getText :: Block -> Maybe T.Text getText (Div (_, _, kvs) _) = lookup "text" kvs getText _ = Nothing data ListType = Itemized | Enumerated ListAttributes -listStyleMap :: [(String, ListNumberStyle)] +listStyleMap :: [(T.Text, ListNumberStyle)] listStyleMap = [("upperLetter", UpperAlpha), ("lowerLetter", LowerAlpha), ("upperRoman", UpperRoman), ("lowerRoman", LowerRoman), ("decimal", Decimal)] -listDelimMap :: [(String, ListNumberDelim)] +listDelimMap :: [(T.Text, ListNumberDelim)] listDelimMap = [("%1)", OneParen), ("(%1)", TwoParens), ("%1.", Period)] @@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b = _ -> Nothing getListType _ = Nothing -listParagraphDivs :: [String] +listParagraphDivs :: [T.Text] listParagraphDivs = ["list-paragraph"] listParagraphStyles :: [ParaStyleName] -listParagraphStyles = map fromString listParagraphDivs +listParagraphStyles = map (fromString . T.unpack) listParagraphDivs -- This is a first stab at going through and attaching meaning to list -- paragraphs, without an item marker, following a list item. We diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 889bd80fc..8598ada6f 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import System.FilePath import Text.Pandoc.Readers.Docx.Util @@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead) import qualified Text.Pandoc.UTF8 as UTF8 import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) -import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont) +import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC @@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show -data ReaderState = ReaderState { stateWarnings :: [String] +data ReaderState = ReaderState { stateWarnings :: [T.Text] , stateFldCharState :: FldCharState } deriving Show @@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] concatMapM f xs = liftM concat (mapM f xs) - -- This is similar to `mapMaybe`: it maps a function returning the D -- monad over a list, and only keeps the non-erroring return values. mapD :: (a -> D b) -> [a] -> D [b] @@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show -data Numb = Numb String String [LevelOverride] +data Numb = Numb T.Text T.Text [LevelOverride] deriving Show -- ilvl startOverride lvl -data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level) +data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level) deriving Show -data AbstractNumb = AbstractNumb String [Level] +data AbstractNumb = AbstractNumb T.Text [Level] deriving Show -- ilvl format string start -data Level = Level String String String (Maybe Integer) +data Level = Level T.Text T.Text T.Text (Maybe Integer) deriving Show data DocumentLocation = InDocument | InFootnote | InEndnote @@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces - (Maybe (M.Map String Element)) - (Maybe (M.Map String Element)) + (Maybe (M.Map T.Text Element)) + (Maybe (M.Map T.Text Element)) deriving Show -data Comments = Comments NameSpaces (M.Map String Element) +data Comments = Comments NameSpaces (M.Map T.Text Element) deriving Show data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer @@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String (Maybe Level) [ParPart] - | Tbl String TblGrid TblLook [Row] + | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart] + | Tbl T.Text TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -279,7 +280,7 @@ data ParPart = PlainRun Run | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] - | Drawing FilePath String String B.ByteString Extent -- title, alt + | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | Chart -- placeholder for now | PlainOMath [Exp] | Field FieldInfo [Run] @@ -290,28 +291,28 @@ data ParPart = PlainRun Run data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] - | InlineDrawing FilePath String String B.ByteString Extent -- title, alt + | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | InlineChart -- placeholder deriving Show -data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen +data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show -type Target = String -type Anchor = String -type URL = String -type BookMarkId = String -type RelId = String -type ChangeId = String -type CommentId = String -type Author = String -type ChangeDate = String -type CommentDate = String +type Target = T.Text +type Anchor = T.Text +type URL = T.Text +type BookMarkId = T.Text +type RelId = T.Text +type ChangeId = T.Text +type CommentId = T.Text +type Author = T.Text +type ChangeDate = T.Text +type CommentDate = T.Text archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive -archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text]) archiveToDocxWithWarnings archive = do docXmlPath <- case getDocumentXmlPath archive of Just fp -> Right fp @@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e -getDocumentXmlPath :: Archive -> Maybe String +getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry @@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle , numInfo = Nothing , psParentStyle = Nothing , pStyleName = stName - , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName + , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName } archiveToNotes :: Archive -> Notes @@ -441,8 +442,8 @@ filePathToRelType path docXmlPath = relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element + relId <- findAttrText (QName "Id" Nothing Nothing) element + target <- findAttrText (QName "Target" Nothing Nothing) element return $ Relationship relType relId target relElemToRelationship _ _ = Nothing @@ -464,7 +465,7 @@ filePathIsMedia fp = in (dir == "word/media/") -lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do (absNumId, ovrrides) <- lookup numId $ map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs @@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride loElemToLevelOverride ns element | isElem ns "w" "lvlOverride" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element let startOverride = findChildByName ns "w" "startOverride" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttrByName ns "w" "numId" element + numId <- findAttrTextByName ns "w" "numId" element absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let lvlOverrides = mapMaybe (loElemToLevelOverride ns) (findChildrenByName ns "w" "lvlOverride" element) @@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrByName ns "w" "abstractNumId" element + absNumId <- findAttrTextByName ns "w" "abstractNumId" element let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels @@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" txt <- findChildByName ns "w" "lvlText" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let start = findChildByName ns "w" "start" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element) +elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element - | isElem ns "w" (notetype ++ "s") element = + | isElem ns "w" (notetype <> "s") element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" notetype element) in @@ -556,11 +557,11 @@ elemToNotes ns notetype element M.fromList pairs elemToNotes _ _ _ = Nothing -elemToComments :: NameSpaces -> Element -> M.Map String Element +elemToComments :: NameSpaces -> Element -> M.Map T.Text Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" "comment" element) in @@ -632,7 +633,7 @@ testBitMask bitMaskS n = pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle -pNumInfo :: ParagraphStyle -> Maybe (String, String) +pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle elemToBodyPart :: NameSpaces -> Element -> D BodyPart @@ -640,7 +641,7 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do - expsLst <- eitherToD $ readOMML $ showElement c + expsLst <- eitherToD $ readOMML $ T.pack $ showElement c return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element @@ -664,7 +665,7 @@ elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g @@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels = where pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels -expandDrawingId :: String -> D (FilePath, B.ByteString) +expandDrawingId :: T.Text -> D (FilePath, B.ByteString) expandDrawingId s = do location <- asks envLocation - target <- asks (lookupRelationship location s . envRelationships) + target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -699,12 +700,12 @@ expandDrawingId s = do Nothing -> throwError DocxError Nothing -> throwError DocxError -getTitleAndAlt :: NameSpaces -> Element -> (String, String) +getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text) getTitleAndAlt ns element = let mbDocPr = findChildByName ns "wp" "inline" element >>= findChildByName ns "wp" "docPr" - title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") - alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") + title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart @@ -716,7 +717,7 @@ elemToParPart ns element = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrByName ns "r" "embed" + >>= findAttrTextByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -726,7 +727,7 @@ elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrByName ns "r" "id" + >>= findAttrTextByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -795,7 +796,7 @@ elemToParPart ns element fldCharState <- gets stateFldCharState case fldCharState of FldCharOpen -> do - info <- eitherToD $ parseFieldInfo $ strContent instrText + info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} return NullParPart _ -> return NullParPart @@ -816,56 +817,56 @@ elemToParPart ns element return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrByName ns "w" "id" element - , Just bmName <- findAttrByName ns "w" "name" element = + , Just bmId <- findAttrTextByName ns "w" "id" element + , Just bmName <- findAttrTextByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttrByName ns "r" "id" element = do + , Just relId <- findAttrTextByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> - case findAttrByName ns "w" "anchor" element of - Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs + case findAttrTextByName ns "w" "anchor" element of + Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrByName ns "w" "anchor" element = do + , Just anchor <- findAttrTextByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrByName ns "w" "id" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrByName ns "w" "id" element = + , Just cmtId <- findAttrTextByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - fmap PlainOMath (eitherToD $ readOMML $ showElement element) + fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttrByName ns "w" "id" element - , Just cmtAuthor <- findAttrByName ns "w" "author" element - , Just cmtDate <- findAttrByName ns "w" "date" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element + , Just cmtAuthor <- findAttrTextByName ns "w" "author" element + , Just cmtDate <- findAttrTextByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem -lookupFootnote :: String -> Notes -> Maybe Element +lookupFootnote :: T.Text -> Notes -> Maybe Element lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s -lookupEndnote :: String -> Notes -> Maybe Element +lookupEndnote :: T.Text -> Notes -> Maybe Element lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s elemToExtent :: Element -> Extent @@ -876,7 +877,7 @@ elemToExtent drawingElem = where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead + >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack childElemToRun :: NameSpaces -> Element -> D Run @@ -887,7 +888,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= @@ -900,7 +901,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrByName ns "w" "id" element = do + , Just fnId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -908,7 +909,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrByName ns "w" "id" element = do + , Just enId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) getTrackedChange _ _ = Nothing @@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (fmap ParaStyleId . findAttrByName ns "w" "val") + (fmap ParaStyleId . findAttrTextByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style @@ -1010,7 +1011,7 @@ elemToRunStyleD ns element charStyles <- asks envCharStyles let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrByName ns "w" "val" >>= + findAttrTextByName ns "w" "val" >>= flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle @@ -1020,12 +1021,12 @@ elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element || isElem ns "m" "t" element = do - let str = strContent element + let str = T.pack $ strContent element font <- asks envFont case font of Nothing -> return $ TextRun str Just f -> return . TextRun $ - map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str + T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab | isElem ns "w" "softHyphen" element = return SoftHyphen @@ -1043,11 +1044,11 @@ getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = case readLitChar ("\\x" ++ s) of - [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char + [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element - getFont = stringToFont =<< findAttrByName ns "w" "font" element + getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element lowerFromPrivate ('F':xs) = '0':xs lowerFromPrivate xs = xs getSymChar _ _ = TextRun "" @@ -1059,7 +1060,7 @@ elemToRunElems ns element let qualName = elemName ns "w" let font = do fontElem <- findElement (qualName "rFonts") element - stringToFont =<< + textToFont . T.pack =<< foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index ac2d6fa07..f81707e92 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -3,6 +3,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse.Styles Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -46,20 +47,19 @@ import Prelude import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except -import Data.Char (toLower) -import Data.List import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light -newtype CharStyleId = CharStyleId String +newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) -newtype ParaStyleId = ParaStyleId String +newtype ParaStyleId = ParaStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) newtype CharStyleName = CharStyleName CIString @@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString deriving (Show, Eq, Ord, IsString, FromStyleName) -- Case-insensitive comparisons -newtype CIString = CIString String deriving (Show, IsString, FromStyleName) +newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName) class FromStyleName a where - fromStyleName :: a -> String + fromStyleName :: a -> T.Text instance FromStyleName String where + fromStyleName = T.pack + +instance FromStyleName T.Text where fromStyleName = id class FromStyleId a where - fromStyleId :: a -> String + fromStyleId :: a -> T.Text instance FromStyleId String where + fromStyleId = T.pack + +instance FromStyleId T.Text where fromStyleId = id instance Eq CIString where - (==) = (==) `on` map toLower . coerce + (==) = (==) `on` T.toCaseFold . coerce instance Ord CIString where - compare = compare `on` map toLower . coerce + compare = compare `on` T.toCaseFold . coerce data VertAlign = BaseLn | SupScrpt | SubScrpt deriving Show @@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool deriving Show data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) - , numInfo :: Maybe (String, String) + , numInfo :: Maybe (T.Text, T.Text) , psParentStyle :: Maybe ParStyle , pStyleName :: ParaStyleName , pStyleId :: ParaStyleId @@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element @@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) <*> getElementStyleName ns element <*> (Just $ elemToRunStyle ns element parentStyle) @@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger =<< - (stripPrefix "heading " . map toLower $ + , Just n <- stringToInteger . T.unpack =<< + (T.stripPrefix "heading " . T.toLower $ fromStyleName styleName) , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing -getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") - <|> findAttrByName ns "w" "styleId" el) + ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val") + <|> findAttrTextByName ns "w" "styleId" el) -getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text) getNumInfo ns element = do let numPr = findChildByName ns "w" "pPr" element >>= findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= findChildByName ns "w" "ilvl" >>= - findAttrByName ns "w" "val") + findAttrTextByName ns "w" "val") numId <- numPr >>= findChildByName ns "w" "numId" >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" return (numId, lvl) elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle elemToParStyleData ns element parentStyle - | Just styleId <- findAttrByName ns "w" "styleId" element + | Just styleId <- findAttrTextByName ns "w" "styleId" element , Just styleName <- getElementStyleName ns element = Just $ ParStyle { diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index f4855efd2..0de1114bd 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName + , findAttrText , findAttrByName + , findAttrTextByName ) where import Prelude import Data.Maybe (mapMaybe) +import qualified Data.Text as T import Text.XML.Light type NameSpaces = [(String, String)] @@ -55,7 +58,13 @@ findChildrenByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChildren (elemName ns' pref name) el +findAttrText :: QName -> Element -> Maybe T.Text +findAttrText x = fmap T.pack . findAttr x + findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String findAttrByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findAttr (elemName ns' pref name) el + +findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text +findAttrTextByName a b c = fmap T.pack . findAttrByName a b c diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index 60d406df1..3a92cfa19 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -2,6 +2,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.DokuWiki Copyright : Copyright (C) 2018-2019 Alexander Krotov @@ -20,8 +21,7 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isDigit) import qualified Data.Foldable as F -import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf) -import Data.List.Split (splitOn) +import Data.List (transpose) import Data.Maybe (fromMaybe, catMaybes) import Data.Text (Text) import qualified Data.Text as T @@ -31,7 +31,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, trim, underlineSpan) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow) -- | Read DokuWiki from an input string and return a Pandoc document. readDokuWiki :: PandocMonad m @@ -42,7 +42,7 @@ readDokuWiki opts s = do let input = crFilter s res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input case res of - Left e -> throwError $ PandocParsecError (T.unpack input) e + Left e -> throwError $ PandocParsecError input e Right d -> return d type DWParser = ParserT Text ParserState @@ -71,9 +71,9 @@ parseDokuWiki = B.doc . mconcat <$> many block <* spaces <* eof -- | Parse <code> and <file> attributes -codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)]) +codeLanguage :: PandocMonad m => DWParser m (Text, [Text], [(Text, Text)]) codeLanguage = try $ do - rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>'))) + rawLang <- option "-" (spaceChar *> manyTillChar anyChar (lookAhead (spaceChar <|> char '>'))) let attr = case rawLang of "-" -> [] l -> [l] @@ -81,16 +81,16 @@ codeLanguage = try $ do -- | Generic parser for <code> and <file> tags codeTag :: PandocMonad m - => ((String, [String], [(String, String)]) -> String -> a) - -> String + => ((Text, [Text], [(Text, Text)]) -> Text -> a) + -> Text -> DWParser m a codeTag f tag = try $ f <$ char '<' - <* string tag + <* textStr tag <*> codeLanguage <* manyTill anyChar (char '>') <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</" <* string tag <* char '>') + <*> manyTillChar anyChar (try $ string "</" <* textStr tag <* char '>') -- * Inline parsers @@ -167,19 +167,19 @@ underlined :: PandocMonad m => DWParser m B.Inlines underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines nowiki :: PandocMonad m => DWParser m B.Inlines -nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>") +nowiki = try $ B.text <$ string "<nowiki>" <*> manyTillChar anyChar (try $ string "</nowiki>") percent :: PandocMonad m => DWParser m B.Inlines -percent = try $ B.text <$> enclosed (string "%%") nestedString +percent = try $ B.text <$> enclosed (string "%%") nestedText -nestedString :: (Show a, PandocMonad m) - => DWParser m a -> DWParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar +nestedText :: (Show a, PandocMonad m) + => DWParser m a -> DWParser m Text +nestedText end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ many1Char spaceChar <* notFollowedBy end monospaced :: PandocMonad m => DWParser m B.Inlines -monospaced = try $ B.code <$> enclosed (string "''") nestedString +monospaced = try $ B.code <$> enclosed (string "''") nestedText subscript :: PandocMonad m => DWParser m B.Inlines subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines @@ -201,12 +201,12 @@ inlineFile :: PandocMonad m => DWParser m B.Inlines inlineFile = codeTag B.codeWith "file" inlineHtml :: PandocMonad m => DWParser m B.Inlines -inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>") +inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTillChar anyChar (try $ string "</html>") inlinePhp :: PandocMonad m => DWParser m B.Inlines -inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>") +inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTillChar anyChar (try $ string "</php>") -makeLink :: (String, String) -> B.Inlines +makeLink :: (Text, Text) -> B.Inlines makeLink (text, url) = B.link url "" $ B.str text autoEmail :: PandocMonad m => DWParser m B.Inlines @@ -220,7 +220,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- uri - guard $ checkLink (last url) + guard $ checkLink (T.last url) return $ makeLink (text, url) where checkLink c @@ -234,10 +234,10 @@ nocache :: PandocMonad m => DWParser m B.Inlines nocache = try $ mempty <$ string "~~NOCACHE~~" str :: PandocMonad m => DWParser m B.Inlines -str = B.str <$> (many1 alphaNum <|> count 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) symbol :: PandocMonad m => DWParser m B.Inlines -symbol = B.str <$> count 1 nonspaceChar +symbol = B.str <$> countChar 1 nonspaceChar link :: PandocMonad m => DWParser m B.Inlines link = try $ do @@ -248,77 +248,78 @@ link = try $ do setState $ st{ stateAllowLinks = True } return l -isExternalLink :: String -> Bool -isExternalLink s = - case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of - (':':'/':'/':_) -> True - _ -> False - -isAbsolutePath :: String -> Bool -isAbsolutePath ('.':_) = False -isAbsolutePath s = ':' `elem` s - -normalizeDots :: String -> String -normalizeDots path@('.':_) = - case dropWhile (== '.') path of - ':':_ -> path - _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path -normalizeDots path = path +isExternalLink :: Text -> Bool +isExternalLink s = "://" `T.isPrefixOf` sSuff + where + sSuff = T.dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s + +isAbsolutePath :: Text -> Bool +isAbsolutePath (T.uncons -> Just ('.', _)) = False +isAbsolutePath s = T.any (== ':') s + +normalizeDots :: Text -> Text +normalizeDots path + | not (T.null pref) = case T.uncons suff of + Just (':', _) -> path + _ -> pref <> ":" <> suff + | otherwise = path + where + (pref, suff) = T.span (== '.') path -normalizeInternalPath :: String -> String +normalizeInternalPath :: Text -> Text normalizeInternalPath path = if isAbsolutePath path then ensureAbsolute normalizedPath else normalizedPath where - normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path - ensureAbsolute s@('/':_) = s - ensureAbsolute s = '/':s + normalizedPath = T.intercalate "/" $ dropWhile (== ".") $ T.splitOn ":" $ normalizeDots path + ensureAbsolute s@(T.uncons -> Just ('/', _)) = s + ensureAbsolute s = "/" <> s -normalizePath :: String -> String +normalizePath :: Text -> Text normalizePath path = if isExternalLink path then path else normalizeInternalPath path -urlToText :: String -> String +urlToText :: Text -> Text urlToText url = if isExternalLink url then url - else reverse $ takeWhile (/= ':') $ reverse url + else T.takeWhileEnd (/= ':') url -- Parse link or image parseLink :: PandocMonad m - => (String -> Maybe B.Inlines -> B.Inlines) - -> String - -> String + => (Text -> Maybe B.Inlines -> B.Inlines) + -> Text + -> Text -> DWParser m B.Inlines parseLink f l r = f - <$ string l - <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r))) - <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r))) - <* string r + <$ textStr l + <*> many1TillChar anyChar (lookAhead (void (char '|') <|> try (void $ textStr r))) + <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ textStr r))) + <* textStr r -- | Split Interwiki link into left and right part -- | Return Nothing if it is not Interwiki link -splitInterwiki :: String -> Maybe (String, String) +splitInterwiki :: Text -> Maybe (Text, Text) splitInterwiki path = - case span (\c -> isAlphaNum c || c == '.') path of - (l, '>':r) -> Just (l, r) + case T.span (\c -> isAlphaNum c || c == '.') path of + (l, T.uncons -> Just ('>', r)) -> Just (l, r) _ -> Nothing -interwikiToUrl :: String -> String -> String -interwikiToUrl "callto" page = "callto://" ++ page -interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page -interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page -interwikiToUrl "tel" page = "tel:" ++ page -interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page -interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page -interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky" +interwikiToUrl :: Text -> Text -> Text +interwikiToUrl "callto" page = "callto://" <> page +interwikiToUrl "doku" page = "https://www.dokuwiki.org/" <> page +interwikiToUrl "phpfn" page = "https://secure.php.net/" <> page +interwikiToUrl "tel" page = "tel:" <> page +interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" <> page +interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" <> page +interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page +interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page +interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page +interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page +interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky" linkText :: PandocMonad m => DWParser m B.Inlines linkText = parseLink fromRaw "[[" "]]" @@ -338,23 +339,23 @@ linkText = parseLink fromRaw "[[" "]]" Just (_, r) -> r -- Matches strings like "100x100" (width x height) and "50" (width) -isWidthHeightParameter :: String -> Bool +isWidthHeightParameter :: Text -> Bool isWidthHeightParameter s = - case s of - (x:xs) -> - isDigit x && case dropWhile isDigit xs of - ('x':ys@(_:_)) -> all isDigit ys - "" -> True + case T.uncons s of + Just (x, xs) -> + isDigit x && case T.uncons $ T.dropWhile isDigit xs of + Just ('x', ys) | not (T.null ys) -> T.all isDigit ys + Nothing -> True _ -> False _ -> False -parseWidthHeight :: String -> (Maybe String, Maybe String) +parseWidthHeight :: Text -> (Maybe Text, Maybe Text) parseWidthHeight s = (width, height) where - width = Just $ takeWhile isDigit s + width = Just $ T.takeWhile isDigit s height = - case dropWhile isDigit s of - ('x':xs) -> Just xs + case T.uncons $ T.dropWhile isDigit s of + Just ('x', xs) -> Just xs _ -> Nothing image :: PandocMonad m => DWParser m B.Inlines @@ -365,17 +366,17 @@ image = try $ parseLink fromRaw "{{" "}}" then B.link normalizedPath "" (fromMaybe defaultDescription description) else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description) where - (path', parameters) = span (/= '?') $ trim path + (path', parameters) = T.span (/= '?') $ trim path normalizedPath = normalizePath path' - leftPadding = " " `isPrefixOf` path - rightPadding = " " `isSuffixOf` path + leftPadding = " " `T.isPrefixOf` path + rightPadding = " " `T.isSuffixOf` path classes = case (leftPadding, rightPadding) of (False, False) -> [] (False, True) -> ["align-left"] (True, False) -> ["align-right"] (True, True) -> ["align-center"] - parameterList = splitOn "&" $ drop 1 parameters + parameterList = T.splitOn "&" $ T.drop 1 parameters linkOnly = "linkonly" `elem` parameterList (width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList) attributes = catMaybes [fmap ("width",) width, fmap ("height",) height] @@ -389,7 +390,7 @@ block = do <|> blockElements <|> para skipMany blankline - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blockElements :: PandocMonad m => DWParser m B.Blocks @@ -417,30 +418,30 @@ header = try $ do attr <- registerHeader nullAttr contents return $ B.headerWith attr (7 - lev) contents -list :: PandocMonad m => String -> DWParser m B.Blocks +list :: PandocMonad m => Text -> DWParser m B.Blocks list prefix = bulletList prefix <|> orderedList prefix -bulletList :: PandocMonad m => String -> DWParser m B.Blocks +bulletList :: PandocMonad m => Text -> DWParser m B.Blocks bulletList prefix = try $ B.bulletList <$> parseList prefix '*' -orderedList :: PandocMonad m => String -> DWParser m B.Blocks +orderedList :: PandocMonad m => Text -> DWParser m B.Blocks orderedList prefix = try $ B.orderedList <$> parseList prefix '-' parseList :: PandocMonad m - => String + => Text -> Char -> DWParser m [B.Blocks] parseList prefix marker = many1 ((<>) <$> item <*> fmap mconcat (many continuation)) where - continuation = try $ list (" " ++ prefix) - item = try $ string prefix *> char marker *> char ' ' *> itemContents + continuation = try $ list (" " <> prefix) + item = try $ textStr prefix *> char marker *> char ' ' *> itemContents itemContents = B.plain . mconcat <$> many1Till inline' eol indentedCode :: PandocMonad m => DWParser m B.Blocks -indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine +indentedCode = try $ B.codeBlock . T.unlines <$> many1 indentedLine where - indentedLine = try $ string " " *> manyTill anyChar eol + indentedLine = try $ string " " *> manyTillChar anyChar eol quote :: PandocMonad m => DWParser m B.Blocks quote = try $ nestedQuote 0 @@ -456,13 +457,13 @@ blockHtml :: PandocMonad m => DWParser m B.Blocks blockHtml = try $ B.rawBlock "html" <$ string "<HTML>" <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</HTML>") + <*> manyTillChar anyChar (try $ string "</HTML>") blockPhp :: PandocMonad m => DWParser m B.Blocks blockPhp = try $ B.codeBlockWith ("", ["php"], []) <$ string "<PHP>" <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</PHP>") + <*> manyTillChar anyChar (try $ string "</PHP>") table :: PandocMonad m => DWParser m B.Blocks table = do diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 8e9746090..93ddeb9ee 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.EPUB Copyright : Copyright (C) 2014-2019 Matthew Pickering @@ -24,7 +25,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) -import Data.List (isInfixOf, isPrefixOf) +import Data.List (isInfixOf) +import qualified Data.Text as T import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.Text.Lazy as TL @@ -67,9 +69,9 @@ archiveToEPUB os archive = do -- No need to collapse here as the image path is from the manifest file let coverDoc = fromMaybe mempty (imageToPandoc <$> cover) spine <- parseSpine items content - let escapedSpine = map (escapeURI . takeFileName . fst) spine + let escapedSpine = map (escapeURI . T.pack . takeFileName . fst) spine Pandoc _ bs <- - foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine)) + foldM' (\a b -> ((a <>) . walk (prependHash $ escapedSpine)) `liftM` parseSpineElem root b) mempty spine let ast = coverDoc <> Pandoc meta bs fetchImages (M.elems items) root archive ast @@ -79,7 +81,7 @@ archiveToEPUB os archive = do parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do doc <- mimeToReader mime r path - let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty + let docSpan = B.doc $ B.para $ B.spanWith (T.pack $ takeFileName path, [], []) mempty return $ docSpan <> doc mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (unEscapeString -> root) @@ -108,18 +110,19 @@ fetchImages mimes root arc (query iq -> links) = <$> findEntryByPath abslink arc iq :: Inline -> [FilePath] -iq (Image _ _ (url, _)) = [url] +iq (Image _ _ (url, _)) = [T.unpack url] iq _ = [] -- Remove relative paths renameImages :: FilePath -> Inline -> Inline renameImages root img@(Image attr a (url, b)) - | "data:" `isPrefixOf` url = img - | otherwise = Image attr a (collapseFilePath (root </> url), b) + | "data:" `T.isPrefixOf` url = img + | otherwise = Image attr a ( T.pack $ collapseFilePath (root </> T.unpack url) + , b) renameImages _ x = x imageToPandoc :: FilePath -> Pandoc -imageToPandoc s = B.doc . B.para $ B.image s "" mempty +imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] @@ -144,7 +147,7 @@ parseManifest content coverId = do uid <- findAttrE (emptyName "id") e href <- findAttrE (emptyName "href") e mime <- findAttrE (emptyName "media-type") e - return (uid, (href, mime)) + return (uid, (href, T.pack mime)) parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do @@ -172,11 +175,11 @@ parseMeta content = do -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem parseMetaItem :: Element -> Meta -> Meta parseMetaItem e@(stripNamespace . elName -> field) meta = - addMetaField (renameMeta field) (B.str $ strContent e) meta + addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta -renameMeta :: String -> String +renameMeta :: String -> T.Text renameMeta "creator" = "author" -renameMeta s = s +renameMeta s = T.pack s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do @@ -197,26 +200,26 @@ getManifest archive = do fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = walk (renameImages root) - . walk (fixBlockIRs filename) + . walk (fixBlockIRs filename) . walk (fixInlineIRs filename) where - (root, escapeURI -> filename) = splitFileName pathToFile + (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile fixInlineIRs :: String -> Inline -> Inline fixInlineIRs s (Span as v) = Span (fixAttrs s as) v fixInlineIRs s (Code as code) = Code (fixAttrs s as) code -fixInlineIRs s (Link as is ('#':url, tit)) = +fixInlineIRs s (Link as is (T.uncons -> Just ('#', url), tit)) = Link (fixAttrs s as) is (addHash s url, tit) fixInlineIRs s (Link as is t) = Link (fixAttrs s as) is t fixInlineIRs _ v = v -prependHash :: [String] -> Inline -> Inline +prependHash :: [T.Text] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) - | or [s `isPrefixOf` url | s <- ps] = - Link attr is ('#':url, tit) + | or [s `T.isPrefixOf` url | s <- ps] = + Link attr is ("#" <> url, tit) | otherwise = l prependHash _ i = i @@ -230,17 +233,17 @@ fixBlockIRs s (CodeBlock as code) = fixBlockIRs _ b = b fixAttrs :: FilePath -> B.Attr -> B.Attr -fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs) +fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) -addHash :: String -> String -> String +addHash :: String -> T.Text -> T.Text addHash _ "" = "" -addHash s ident = takeFileName s ++ "#" ++ ident +addHash s ident = T.pack (takeFileName s) <> "#" <> ident -removeEPUBAttrs :: [(String, String)] -> [(String, String)] +removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs -isEPUBAttr :: (String, String) -> Bool -isEPUBAttr (k, _) = "epub:" `isPrefixOf` k +isEPUBAttr :: (T.Text, a) -> Bool +isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k -- Library @@ -291,4 +294,4 @@ findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x mkE :: PandocMonad m => String -> Maybe a -> m a -mkE s = maybe (throwError . PandocParseError $ s) return +mkE s = maybe (throwError . PandocParseError $ T.pack $ s) return diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index 0b25b9fed..6eed3c104 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -1,5 +1,6 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.FB2 Copyright : Copyright (C) 2018-2019 Alexander Krotov @@ -27,12 +28,11 @@ import Control.Monad.Except (throwError) import Control.Monad.State.Strict import Data.ByteString.Lazy.Char8 ( pack ) import Data.ByteString.Base64.Lazy -import Data.Char (isSpace, toUpper) import Data.Functor -import Data.List (dropWhileEnd, intersperse) -import Data.List.Split (splitOn) +import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) +import qualified Data.Text as T import Data.Default import Data.Maybe import Text.HTML.TagSoup.Entity (lookupEntity) @@ -48,8 +48,8 @@ type FB2 m = StateT FB2State m data FB2State = FB2State{ fb2SectionLevel :: Int , fb2Meta :: Meta - , fb2Authors :: [String] - , fb2Notes :: M.Map String Blocks + , fb2Authors :: [Text] + , fb2Notes :: M.Map Text Blocks } deriving Show instance Default FB2State where @@ -76,19 +76,20 @@ readFB2 _ inp = -- * Utility functions -trim :: String -> String -trim = dropWhileEnd isSpace . dropWhile isSpace +trim :: Text -> Text +trim = T.strip -removeHash :: String -> String -removeHash ('#':xs) = xs -removeHash xs = xs +removeHash :: Text -> Text +removeHash t = case T.uncons t of + Just ('#', xs) -> xs + _ -> t -convertEntity :: String -> String -convertEntity e = fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: String -> Text +convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e parseInline :: PandocMonad m => Content -> FB2 m Inlines parseInline (Elem e) = - case qName $ elName e of + case T.pack $ qName $ elName e of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -96,12 +97,12 @@ parseInline (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ strContent e + "code" -> pure $ code $ T.pack $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement name pure mempty -parseInline (Text x) = pure $ text $ cdData x +parseInline (Text x) = pure $ text $ T.pack $ cdData x parseInline (CRef r) = pure $ str $ convertEntity r parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks @@ -111,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel < parseRootElement :: PandocMonad m => Element -> FB2 m Blocks parseRootElement e = - case qName $ elName e of + case T.pack $ qName $ elName e of "FictionBook" -> do -- Parse notes before parsing the rest of the content. case filterChild isNotesBody e of @@ -144,7 +145,7 @@ parseNote e = Just sectionId -> do content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e) oldNotes <- gets fb2Notes - modify $ \s -> s { fb2Notes = M.insert ("#" ++ sectionId) content oldNotes } + modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes } pure () where isTitle x = qName (elName x) == "title" @@ -156,7 +157,7 @@ parseNote e = -- | Parse a child of @\<FictionBook>@ element. parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks parseFictionBookChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "stylesheet" -> pure mempty -- stylesheet is ignored "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e) "body" -> if isNotesBody e @@ -168,7 +169,7 @@ parseFictionBookChild e = -- | Parse a child of @\<description>@ element. parseDescriptionChild :: PandocMonad m => Element -> FB2 m () parseDescriptionChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title-info" -> mapM_ parseTitleInfoChild (elChildren e) "src-title-info" -> pure () -- ignore "document-info" -> pure () @@ -176,13 +177,13 @@ parseDescriptionChild e = "custom-info" -> pure () "output" -> pure () name -> do - report $ IgnoredElement $ name ++ " in description" + report $ IgnoredElement $ name <> " in description" pure mempty -- | Parse a child of @\<body>@ element. parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks parseBodyChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "image" -> parseImageElement e "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) "epigraph" -> parseEpigraph e @@ -196,25 +197,25 @@ parseBinaryElement e = (Nothing, _) -> report $ IgnoredElement "binary without id attribute" (Just _, Nothing) -> report $ IgnoredElement "binary without content-type attribute" - (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e))) + (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e))) -- * Type parsers -- | Parse @authorType@ -parseAuthor :: PandocMonad m => Element -> FB2 m String -parseAuthor e = unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) +parseAuthor :: PandocMonad m => Element -> FB2 m Text +parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) -parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe String) +parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text) parseAuthorChild e = - case qName $ elName e of - "first-name" -> pure $ Just $ strContent e - "middle-name" -> pure $ Just $ strContent e - "last-name" -> pure $ Just $ strContent e - "nickname" -> pure $ Just $ strContent e - "home-page" -> pure $ Just $ strContent e - "email" -> pure $ Just $ strContent e + case T.pack $ qName $ elName e of + "first-name" -> pure $ Just $ T.pack $ strContent e + "middle-name" -> pure $ Just $ T.pack $ strContent e + "last-name" -> pure $ Just $ T.pack $ strContent e + "nickname" -> pure $ Just $ T.pack $ strContent e + "home-page" -> pure $ Just $ T.pack $ strContent e + "email" -> pure $ Just $ T.pack $ strContent e name -> do - report $ IgnoredElement $ name ++ " in author" + report $ IgnoredElement $ name <> " in author" pure Nothing -- | Parse @titleType@ @@ -236,13 +237,13 @@ parseTitleContent _ = pure Nothing parseImageElement :: PandocMonad m => Element -> FB2 m Blocks parseImageElement e = case href of - Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt + Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt Nothing -> do report $ IgnoredElement " image without href" pure mempty - where alt = maybe mempty str $ findAttr (unqual "alt") e - title = fromMaybe "" $ findAttr (unqual "title") e - imgId = fromMaybe "" $ findAttr (unqual "id") e + where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e + title = maybe "" T.pack $ findAttr (unqual "title") e + imgId = maybe "" T.pack $ findAttr (unqual "id") e href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e -- | Parse @pType@ @@ -256,7 +257,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e) -- | Parse @citeType@ child parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks parseCiteChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "empty-line" -> pure horizontalRule @@ -271,13 +272,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e) parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks parsePoemChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "epigraph" -> parseEpigraph e "stanza" -> parseStanza e "text-author" -> para <$> parsePType e - "date" -> pure $ para $ text $ strContent e + "date" -> pure $ para $ text $ T.pack $ strContent e name -> report (UnexpectedXmlElement name "poem") $> mempty parseStanza :: PandocMonad m => Element -> FB2 m Blocks @@ -290,7 +291,7 @@ joinLineBlocks [] = [] parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks parseStanzaChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "v" -> lineBlock . (:[]) <$> parsePType e @@ -300,11 +301,11 @@ parseStanzaChild e = parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks parseEpigraph e = divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e) - where divId = fromMaybe "" $ findAttr (unqual "id") e + where divId = maybe "" T.pack $ findAttr (unqual "id") e parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks parseEpigraphChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -318,7 +319,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e) parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks parseAnnotationChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -332,14 +333,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks parseSection e = do n <- gets fb2SectionLevel modify $ \st -> st{ fb2SectionLevel = n + 1 } - let sectionId = fromMaybe "" $ findAttr (unqual "id") e + let sectionId = maybe "" T.pack $ findAttr (unqual "id") e bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e) modify $ \st -> st{ fb2SectionLevel = n } pure bs parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks parseSectionChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseBodyChild e "epigraph" -> parseEpigraph e "image" -> parseImageElement e @@ -361,16 +362,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e) parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines parseNamedStyle e = do content <- mconcat <$> mapM parseNamedStyleChild (elContent e) - let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e + let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e case findAttr (unqual "name") e of - Just name -> pure $ spanWith ("", [name], lang) content + Just name -> pure $ spanWith ("", [T.pack name], lang) content Nothing -> do report $ IgnoredElement "link without required name" pure mempty parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines parseNamedStyleChild (Elem e) = - case qName (elName e) of + case T.pack $ qName (elName e) of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -378,10 +379,10 @@ parseNamedStyleChild (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ strContent e + "code" -> pure $ code $ T.pack $ strContent e "image" -> parseInlineImageElement e name -> do - report $ IgnoredElement $ name ++ " in style" + report $ IgnoredElement $ name <> " in style" pure mempty parseNamedStyleChild x = parseInline x @@ -390,7 +391,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines parseLinkType e = do content <- mconcat <$> mapM parseStyleLinkType (elContent e) notes <- gets fb2Notes - case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just href -> case findAttr (QName "type" Nothing Nothing) e of Just "note" -> case M.lookup href notes of Nothing -> pure $ link href "" content @@ -417,19 +418,21 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet -- | Parse @title-infoType@ parseTitleInfoChild :: PandocMonad m => Element -> FB2 m () parseTitleInfoChild e = - case qName (elName e) of + case T.pack $ qName (elName e) of "genre" -> pure () "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st}) - "book-title" -> modify (setMeta "title" (text $ strContent e)) + "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e)) "annotation" -> parseAnnotation e >>= modify . setMeta "abstract" - "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e)) - "date" -> modify (setMeta "date" (text $ strContent e)) + "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn "," + $ T.pack + $ strContent e)) + "date" -> modify (setMeta "date" (text $ T.pack $ strContent e)) "coverpage" -> parseCoverPage e "lang" -> pure () "src-lang" -> pure () "translator" -> pure () "sequence" -> pure () - name -> report $ IgnoredElement $ name ++ " in title-info" + name -> report $ IgnoredElement $ name <> " in title-info" parseCoverPage :: PandocMonad m => Element -> FB2 m () parseCoverPage e = @@ -437,7 +440,7 @@ parseCoverPage e = Just img -> case href of Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src)) Nothing -> pure () - where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img + where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img Nothing -> pure () -- | Parse @inlineImageType@ element @@ -450,5 +453,5 @@ parseInlineImageElement e = Nothing -> do report $ IgnoredElement "inline image without href" pure mempty - where alt = maybe mempty str $ findAttr (unqual "alt") e - href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e + where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e + href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e03ac6a97..1c2892d6a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,5 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -35,8 +35,7 @@ import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT) import Data.Char (isAlphaNum, isLetter) import Data.Default (Default (..), def) import Data.Foldable (for_) -import Data.List (isPrefixOf) -import Data.List.Split (wordsBy, splitWhen) +import Data.List.Split (splitWhen) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid (First (..)) @@ -62,8 +61,8 @@ import Text.Pandoc.Options ( extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI, - extractSpaces, htmlSpanLikeElements, - onlySimpleTableCells, safeRead, underlineSpan) + extractSpaces, htmlSpanLikeElements, elemText, splitTextBy, + onlySimpleTableCells, safeRead, underlineSpan, tshow) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) @@ -93,14 +92,14 @@ readHtml opts inp = do "source" tags case result of Right doc -> return doc - Left err -> throwError $ PandocParseError $ getError err + Left err -> throwError $ PandocParseError $ T.pack $ getError err replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes bs = do st <- getState return $ walk (replaceNotes' (noteTable st)) bs -replaceNotes' :: [(String, Blocks)] -> Inline -> Inline +replaceNotes' :: [(Text, Blocks)] -> Inline -> Inline replaceNotes' noteTbl (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) $ lookup ref noteTbl replaceNotes' _ x = x @@ -108,9 +107,9 @@ replaceNotes' _ x = x data HTMLState = HTMLState { parserState :: ParserState, - noteTable :: [(String, Blocks)], + noteTable :: [(Text, Blocks)], baseHref :: Maybe URI, - identifiers :: Set.Set String, + identifiers :: Set.Set Text, logMessages :: [LogMessage], macros :: M.Map Text Macro } @@ -134,7 +133,7 @@ pHtml :: PandocMonad m => TagParser m Blocks pHtml = try $ do (TagOpen "html" attr) <- lookAhead pAny for_ (lookup "lang" attr) $ - updateState . B.setMeta "lang" . B.text . T.unpack + updateState . B.setMeta "lang" . B.text pInTags "html" block pBody :: PandocMonad m => TagParser m Blocks @@ -146,11 +145,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny) setTitle t = mempty <$ updateState (B.setMeta "title" t) pMetaTag = do mt <- pSatisfy (matchTagOpen "meta" []) - let name = T.unpack $ fromAttrib "name" mt - if null name + let name = fromAttrib "name" mt + if T.null name then return mempty else do - let content = T.unpack $ fromAttrib "content" mt + let content = fromAttrib "content" mt updateState $ \s -> let ps = parserState s in s{ parserState = ps{ @@ -187,13 +186,13 @@ block = do , pFigure , pRawHtmlBlock ] - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res -namespaces :: PandocMonad m => [(String, TagParser m Inlines)] +namespaces :: PandocMonad m => [(Text, TagParser m Inlines)] namespaces = [(mathMLNamespace, pMath True)] -mathMLNamespace :: String +mathMLNamespace :: Text mathMLNamespace = "http://www.w3.org/1998/Math/MathML" eSwitch :: (PandocMonad m, Monoid a) @@ -233,7 +232,7 @@ eFootnote = try $ do content <- pInTags tag block addNote ident content -addNote :: PandocMonad m => String -> Blocks -> TagParser m () +addNote :: PandocMonad m => Text -> Blocks -> TagParser m () addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s}) eNoteref :: PandocMonad m => TagParser m Inlines @@ -245,8 +244,8 @@ eNoteref = try $ do -> (lookup "type" as <|> lookup "epub:type" as) == Just "noteref" _ -> False) - ident <- case T.unpack <$> lookup "href" attr of - Just ('#':rest) -> return rest + ident <- case lookup "href" attr >>= T.uncons of + Just ('#', rest) -> return rest _ -> mzero _ <- manyTill pAny (pSatisfy (\case TagClose t -> t == tag @@ -287,7 +286,7 @@ pListItem nonItem = do maybe id addId (lookup "id" attr) <$> pInTags "li" block <* skipMany nonItem -parseListStyleType :: String -> ListNumberStyle +parseListStyleType :: Text -> ListNumberStyle parseListStyleType "lower-roman" = LowerRoman parseListStyleType "upper-roman" = UpperRoman parseListStyleType "lower-alpha" = LowerAlpha @@ -295,7 +294,7 @@ parseListStyleType "upper-alpha" = UpperAlpha parseListStyleType "decimal" = Decimal parseListStyleType _ = DefaultStyle -parseTypeAttr :: String -> ListNumberStyle +parseTypeAttr :: Text -> ListNumberStyle parseTypeAttr "i" = LowerRoman parseTypeAttr "I" = UpperRoman parseTypeAttr "a" = LowerAlpha @@ -404,20 +403,19 @@ pDiv = try $ do pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do - raw <- T.unpack <$> - (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" - <|> pRawTag) + raw <- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" + <|> pRawTag) exts <- getOption readerExtensions - if extensionEnabled Ext_raw_html exts && not (null raw) + if extensionEnabled Ext_raw_html exts && not (T.null raw) then return $ B.rawBlock "html" raw else ignore raw -ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a +ignore :: (Monoid a, PandocMonad m) => Text -> TagParser m a ignore raw = do pos <- getPosition -- raw can be null for tags like <!DOCTYPE>; see paRawTag -- in this case we don't want a warning: - unless (null raw) $ + unless (T.null raw) $ logMessage $ SkippedContent raw pos return mempty @@ -438,7 +436,7 @@ eSection = try $ do headerLevel :: Text -> TagParser m Int headerLevel tagtype = - case safeRead (T.unpack (T.drop 1 tagtype)) of + case safeRead (T.drop 1 tagtype) of Just level -> -- try (do -- guardEnabled Ext_epub_html_exts @@ -468,7 +466,7 @@ pHeader = try $ do level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr - let classes = maybe [] words $ lookup "class" attr + let classes = maybe [] T.words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] attr'' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle @@ -529,14 +527,14 @@ pCol = try $ do optional $ pSatisfy (matchTagClose "col") skipMany pBlank let width = case lookup "width" attribs of - Nothing -> case lookup "style" attribs of - Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs -> - fromMaybe 0.0 $ safeRead (filter - (`notElem` (" \t\r\n%'\";" :: [Char])) xs) - _ -> 0.0 - Just x | not (null x) && last x == '%' -> - fromMaybe 0.0 $ safeRead (init x) - _ -> 0.0 + Nothing -> case lookup "style" attribs of + Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> + fromMaybe 0.0 $ safeRead (T.filter + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) + _ -> 0.0 + Just (T.unsnoc -> Just (xs, '%')) -> + fromMaybe 0.0 $ safeRead xs + _ -> 0.0 if width > 0.0 then return $ width / 100.0 else return 0.0 @@ -562,7 +560,7 @@ pCell celltype = try $ do let extractAlign' [] = "" extractAlign' ("text-align":x:_) = x extractAlign' (_:xs) = extractAlign' xs - let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':']) + let extractAlign = extractAlign' . splitTextBy (`elemText` " \t;:") let align = case maybeFromAttrib "align" tag `mplus` (extractAlign <$> maybeFromAttrib "style" tag) of Just "left" -> AlignLeft @@ -610,7 +608,7 @@ pFigure = try $ do let caption = fromMaybe mempty mbcap case B.toList <$> mbimg of Just [Image attr _ (url, tit)] -> - return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption + return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption _ -> mzero pCodeBlock :: PandocMonad m => TagParser m Blocks @@ -618,21 +616,21 @@ pCodeBlock = try $ do TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) let attr = toStringAttr attr' contents <- manyTill pAny (pCloses "pre" <|> eof) - let rawText = concatMap tagToString contents + let rawText = T.concat $ map tagToText contents -- drop leading newline if any - let result' = case rawText of - '\n':xs -> xs - _ -> rawText + let result' = case T.uncons rawText of + Just ('\n', xs) -> xs + _ -> rawText -- drop trailing newline if any - let result = case reverse result' of - '\n':_ -> init result' - _ -> result' + let result = case T.unsnoc result' of + Just (result'', '\n') -> result'' + _ -> result' return $ B.codeBlockWith (mkAttr attr) result -tagToString :: Tag Text -> String -tagToString (TagText s) = T.unpack s -tagToString (TagOpen "br" _) = "\n" -tagToString _ = "" +tagToText :: Tag Text -> Text +tagToText (TagText s) = s +tagToText (TagOpen "br" _) = "\n" +tagToText _ = "" inline :: PandocMonad m => TagParser m Inlines inline = choice @@ -667,7 +665,7 @@ pLocation = do pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSat f = do pos <- getPosition - token show (const pos) (\x -> if f x then Just x else Nothing) + token tshow (const pos) (\x -> if f x then Just x else Nothing) pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSatisfy f = try $ optional pLocation >> pSat f @@ -688,10 +686,10 @@ pQ = choice $ map try [citedQuote, normalQuote] where citedQuote = do tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst)) - url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag - let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + url <- canonicalizeUrl $ fromAttrib "cite" tag + let uid = fromMaybe (fromAttrib "name" tag) $ maybeFromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + let cls = T.words $ fromAttrib "class" tag makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)]) normalQuote = do @@ -729,7 +727,7 @@ pSpanLike = TagOpen _ attrs <- pSatisfy $ tagOpenLit tagName (const True) let (ids, cs, kvs) = mkAttr . toStringAttr $ attrs content <- mconcat <$> manyTill inline (pCloses tagName <|> eof) - return $ B.spanWith (ids, T.unpack tagName : cs, kvs) content + return $ B.spanWith (ids, tagName : cs, kvs) content pSmall :: PandocMonad m => TagParser m Inlines pSmall = pInlinesInTags "small" (B.spanWith ("",["small"],[])) @@ -753,19 +751,18 @@ pLineBreak = do -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. -maybeFromAttrib :: String -> Tag Text -> Maybe String -maybeFromAttrib name (TagOpen _ attrs) = - T.unpack <$> lookup (T.pack name) attrs +maybeFromAttrib :: Text -> Tag Text -> Maybe Text +maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs maybeFromAttrib _ _ = Nothing pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - let title = T.unpack $ fromAttrib "title" tag + let title = fromAttrib "title" tag -- take id from id attribute if present, otherwise name - let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + let uid = fromMaybe (fromAttrib "name" tag) $ maybeFromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + let cls = T.words $ fromAttrib "class" tag lab <- mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of @@ -778,34 +775,33 @@ pLink = try $ do pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag - let title = T.unpack $ fromAttrib "title" tag - let alt = T.unpack $ fromAttrib "alt" tag - let uid = T.unpack $ fromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + url <- canonicalizeUrl $ fromAttrib "src" tag + let title = fromAttrib "title" tag + let alt = fromAttrib "alt" tag + let uid = fromAttrib "id" tag + let cls = T.words $ fromAttrib "class" tag let getAtt k = case fromAttrib k tag of "" -> [] - v -> [(T.unpack k, T.unpack v)] + v -> [(k, v)] let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"] return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) -pCodeWithClass :: PandocMonad m => [(T.Text,String)] -> TagParser m Inlines -pCodeWithClass elemToClass = try $ do +pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines +pCodeWithClass elemToClass = try $ do let tagTest = flip elem . fmap fst $ elemToClass TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True) result <- manyTill pAny (pCloses open) let (ids,cs,kvs) = mkAttr . toStringAttr $ attr' cs' = maybe cs (:cs) . lookup open $ elemToClass return . B.codeWith (ids,cs',kvs) . - unwords . lines . T.unpack . innerText $ result + T.unwords . T.lines . innerText $ result pCode :: PandocMonad m => TagParser m Inlines pCode = try $ do (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) let attr = toStringAttr attr' result <- manyTill pAny (pCloses open) - return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $ - innerText result + return $ B.codeWith (mkAttr attr) $ T.unwords $ T.lines $ innerText result pSpan :: PandocMonad m => TagParser m Inlines pSpan = try $ do @@ -817,7 +813,7 @@ pSpan = try $ do where styleAttr = fromMaybe "" $ lookup "style" attr fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr classes = fromMaybe [] $ - words <$> lookup "class" attr + T.words <$> lookup "class" attr let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) return $ tag contents @@ -829,18 +825,17 @@ pRawHtmlInline = do then pSatisfy (not . isBlockTag) else pSatisfy isInlineTag exts <- getOption readerExtensions - let raw = T.unpack $ renderTags' [result] + let raw = renderTags' [result] if extensionEnabled Ext_raw_html exts then return $ B.rawInline "html" raw else ignore raw -mathMLToTeXMath :: String -> Either String String +mathMLToTeXMath :: Text -> Either Text Text mathMLToTeXMath s = writeTeX <$> readMathML s -toStringAttr :: [(Text, Text)] -> [(String, String)] +toStringAttr :: [(Text, Text)] -> [(Text, Text)] toStringAttr = map go - where go (x,y) = (T.unpack (fromMaybe x $ T.stripPrefix "data-" x), - T.unpack y) + where go (x,y) = (fromMaybe x $ T.stripPrefix "data-" x, y) pScriptMath :: PandocMonad m => TagParser m Inlines pScriptMath = try $ do @@ -849,8 +844,7 @@ pScriptMath = try $ do Just x | "math/tex" `T.isPrefixOf` x -> return $ "display" `T.isSuffixOf` x _ -> mzero - contents <- T.unpack . innerText <$> - manyTill pAny (pSatisfy (matchTagClose "script")) + contents <- innerText <$> manyTill pAny (pSatisfy (matchTagClose "script")) return $ (if isdisplay then B.displayMath else B.math) contents pMath :: PandocMonad m => Bool -> TagParser m Inlines @@ -862,11 +856,11 @@ pMath inCase = try $ do unless inCase $ guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) contents <- manyTill pAny (pSatisfy (matchTagClose "math")) - case mathMLToTeXMath (T.unpack $ renderTags $ + case mathMLToTeXMath (renderTags $ [open] <> contents <> [TagClose "math"]) of Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $ - T.unpack $ innerText contents - Right [] -> return mempty + innerText contents + Right "" -> return mempty Right x -> return $ case lookup "display" attr of Just "block" -> B.displayMath x _ -> B.math x @@ -925,7 +919,7 @@ pTagText = try $ do parsed <- lift $ lift $ flip runReaderT qu $ runParserT (many pTagContents) st "text" str case parsed of - Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'" + Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'" Right result -> return $ mconcat result pBlank :: PandocMonad m => TagParser m () @@ -954,11 +948,11 @@ pRawTeX = do guardEnabled Ext_raw_tex inp <- getInput st <- getState - res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp) + res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" inp case res of Left _ -> mzero Right (contents, raw) -> do - _ <- count (length raw) anyChar + _ <- count (T.length raw) anyChar return $ B.rawInline "tex" contents pStr :: PandocMonad m => InlinesParser m Inlines @@ -966,7 +960,7 @@ pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) updateLastStrPos - return $ B.str result + return $ B.str $ T.pack result isSpecial :: Char -> Bool isSpecial '"' = True @@ -982,7 +976,7 @@ isSpecial '\8221' = True isSpecial _ = False pSymbol :: PandocMonad m => InlinesParser m Inlines -pSymbol = satisfy isSpecial >>= return . B.str . (:[]) +pSymbol = satisfy isSpecial >>= return . B.str . T.singleton isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML @@ -1019,7 +1013,7 @@ pBad = do '\158' -> '\382' '\159' -> '\376' _ -> '?' - return $ B.str [c'] + return $ B.str $ T.singleton c' pSpace :: PandocMonad m => InlinesParser m Inlines pSpace = many1 (satisfy isSpace) >>= \xs -> @@ -1156,8 +1150,8 @@ _ `closes` _ = False -- | Matches a stretch of HTML in balanced tags. htmlInBalanced :: Monad m - => (Tag String -> Bool) - -> ParserT String st m String + => (Tag Text -> Bool) + -> ParserT Text st m Text htmlInBalanced f = try $ do lookAhead (char '<') inp <- getInput @@ -1174,21 +1168,21 @@ htmlInBalanced f = try $ do (TagClose _ : TagPosition er ec : _) -> do let ls = er - sr let cs = ec - sc - lscontents <- unlines <$> count ls anyLine + lscontents <- T.unlines <$> count ls anyLine cscontents <- count cs anyChar closetag <- do x <- many (satisfy (/='>')) char '>' return (x <> ">") - return (lscontents <> cscontents <> closetag) + return $ lscontents <> T.pack cscontents <> T.pack closetag _ -> mzero _ -> mzero -htmlInBalanced' :: String - -> [Tag String] - -> [Tag String] +htmlInBalanced' :: Text + -> [Tag Text] + -> [Tag Text] htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts - where go :: Int -> [Tag String] -> Maybe [Tag String] + where go :: Int -> [Tag Text] -> Maybe [Tag Text] go n (t@(TagOpen tn' _):rest) | tn' == tagname = (t :) <$> go (n + 1) rest go 1 (t@(TagClose tn'):_) | tn' == tagname = @@ -1204,8 +1198,8 @@ hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) - => (Tag String -> Bool) - -> ParserT [Char] st m (Tag String, String) + => (Tag Text -> Bool) + -> ParserT Text st m (Tag Text, Text) htmlTag f = try $ do lookAhead (char '<') startpos <- getPosition @@ -1213,7 +1207,7 @@ htmlTag f = try $ do let ts = canonicalizeTags $ parseTagsOptions parseOptions{ optTagWarning = False , optTagPosition = True } - (inp ++ " ") -- add space to ensure that + (inp <> " ") -- add space to ensure that -- we get a TagPosition after the tag (next, ln, col) <- case ts of (TagPosition{} : next : TagPosition ln col : _) @@ -1225,13 +1219,12 @@ htmlTag f = try $ do -- so we exclude . even though it's a valid character -- in XML element names let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_' - let isName s = case s of - [] -> False - (c:cs) -> isLetter c && all isNameChar cs - let isPI s = case s of - ('?':_) -> True -- processing instruction - _ -> False - + let isName s = case T.uncons s of + Nothing -> False + Just (c, cs) -> isLetter c && T.all isNameChar cs + let isPI s = case T.uncons s of + Just ('?', _) -> True -- processing instruction + _ -> False let endpos = if ln == 1 then setSourceColumn startpos (sourceColumn startpos + (col - 1)) @@ -1247,18 +1240,18 @@ htmlTag f = try $ do -- basic sanity check, since the parser is very forgiving -- and finds tags in stuff like x<y) guard $ isName tagname || isPI tagname - guard $ not $ null tagname + guard $ not $ T.null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] - guard $ last tagname /= ':' + guard $ T.last tagname /= ':' char '<' rendered <- manyTill anyChar endAngle - return (next, "<" ++ rendered ++ ">") + return (next, T.pack $ "<" ++ rendered ++ ">") case next of TagComment s - | "<!--" `isPrefixOf` inp -> do + | "<!--" `T.isPrefixOf` inp -> do string "<!--" - count (length s) anyChar + count (T.length s) anyChar string "-->" stripComments <- getOption readerStripComments if stripComments @@ -1272,12 +1265,12 @@ htmlTag f = try $ do handleTag tagname _ -> mzero -mkAttr :: [(String, String)] -> Attr +mkAttr :: [(Text, Text)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) where attribsId = fromMaybe "" $ lookup "id" attr - attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes + attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr + epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr -- Strip namespace prefixes stripPrefixes :: [Tag Text] -> [Tag Text] @@ -1304,11 +1297,11 @@ isSpace _ = False -- Utilities -- | Adjusts a url according to the document's base URL. -canonicalizeUrl :: PandocMonad m => String -> TagParser m String +canonicalizeUrl :: PandocMonad m => Text -> TagParser m Text canonicalizeUrl url = do mbBaseHref <- baseHref <$> getState - return $ case (parseURIReference url, mbBaseHref) of - (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + return $ case (parseURIReference (T.unpack url), mbBaseHref) of + (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs) _ -> url diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 0a048b6e6..3fc2f9715 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Haddock Copyright : Copyright (C) 2013 David Lazar @@ -17,9 +18,10 @@ module Text.Pandoc.Readers.Haddock import Prelude import Control.Monad.Except (throwError) -import Data.List (intersperse, stripPrefix) +import Data.List (intersperse) import Data.Maybe (fromMaybe) import Data.Text (Text, unpack) +import qualified Data.Text as T import Documentation.Haddock.Parser import Documentation.Haddock.Types as H import Text.Pandoc.Builder (Blocks, Inlines) @@ -28,7 +30,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, splitBy, trim) +import Text.Pandoc.Shared (crFilter, splitTextBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. @@ -51,7 +53,7 @@ docHToBlocks d' = case d' of DocEmpty -> mempty DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) -> - B.headerWith (ident,[],[]) (headerLevel h) + B.headerWith (T.pack ident,[],[]) (headerLevel h) (docHToInlines False $ headerTitle h) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocString _ -> inlineFallback @@ -73,12 +75,12 @@ docHToBlocks d' = DocDefList items -> B.definitionList (map (\(d,t) -> (docHToInlines False d, [consolidatePlains $ docHToBlocks t])) items) - DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s + DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) $ T.pack s DocCodeBlock d -> B.para $ docHToInlines True d DocHyperlink _ -> inlineFallback DocPic _ -> inlineFallback DocAName _ -> inlineFallback - DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) + DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim $ T.pack s) DocExamples es -> mconcat $ map (\e -> makeExample ">>>" (exampleExpression e) (exampleResult e)) es DocTable H.Table{ tableHeaderRows = headerRows @@ -114,58 +116,58 @@ docHToInlines isCode d' = (docHToInlines isCode d2) DocString s | isCode -> mconcat $ intersperse B.linebreak - $ map B.code $ splitBy (=='\n') s - | otherwise -> B.text s + $ map B.code $ splitTextBy (=='\n') $ T.pack s + | otherwise -> B.text $ T.pack s DocParagraph _ -> mempty DocIdentifier ident -> case toRegular (DocIdentifier ident) of - DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) s + DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s _ -> mempty - DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s - DocModule s -> B.codeWith ("",["haskell","module"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s + DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s DocWarning _ -> mempty -- TODO DocEmphasis d -> B.emph (docHToInlines isCode d) - DocMonospaced (DocString s) -> B.code s + DocMonospaced (DocString s) -> B.code $ T.pack s DocMonospaced d -> docHToInlines True d DocBold d -> B.strong (docHToInlines isCode d) - DocMathInline s -> B.math s - DocMathDisplay s -> B.displayMath s + DocMathInline s -> B.math $ T.pack s + DocMathDisplay s -> B.displayMath $ T.pack s DocHeader _ -> mempty DocUnorderedList _ -> mempty DocOrderedList _ -> mempty DocDefList _ -> mempty DocCodeBlock _ -> mempty - DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h) - (maybe (B.text $ hyperlinkUrl h) (docHToInlines isCode) + DocHyperlink h -> B.link (T.pack $ hyperlinkUrl h) (T.pack $ hyperlinkUrl h) + (maybe (B.text $ T.pack $ hyperlinkUrl h) (docHToInlines isCode) (hyperlinkLabel h)) - DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p) - (maybe mempty B.text $ pictureTitle p) - DocAName s -> B.spanWith (s,["anchor"],[]) mempty + DocPic p -> B.image (T.pack $ pictureUri p) (T.pack $ fromMaybe (pictureUri p) $ pictureTitle p) + (maybe mempty (B.text . T.pack) $ pictureTitle p) + DocAName s -> B.spanWith (T.pack s,["anchor"],[]) mempty DocProperty _ -> mempty DocExamples _ -> mempty DocTable _ -> mempty -- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Blocks +makeExample :: T.Text -> String -> [String] -> Blocks makeExample prompt expression result = B.para $ B.codeWith ("",["prompt"],[]) prompt <> B.space - <> B.codeWith ([], ["haskell","expr"], []) (trim expression) + <> B.codeWith ("", ["haskell","expr"], []) (trim $ T.pack expression) <> B.linebreak <> mconcat (intersperse B.linebreak $ map coder result') where -- 1. drop trailing whitespace from the prompt, remember the prefix - prefix = takeWhile (`elem` " \t") prompt + prefix = T.takeWhile (`elem` (" \t" :: String)) prompt -- 2. drop, if possible, the exact same sequence of whitespace -- characters from each result line -- -- 3. interpret lines that only contain the string "<BLANKLINE>" as an -- empty line - result' = map (substituteBlankLine . tryStripPrefix prefix) result + result' = map (substituteBlankLine . tryStripPrefix prefix . T.pack) result where - tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys + tryStripPrefix xs ys = fromMaybe ys $ T.stripPrefix xs ys substituteBlankLine "<BLANKLINE>" = "" substituteBlankLine line = line - coder = B.codeWith ([], ["result"], []) + coder = B.codeWith ("", ["result"], []) diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index dbca5a59f..8efc230cc 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -4,6 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Ipynb Copyright : Copyright (C) 2019 John MacFarlane @@ -19,7 +20,6 @@ module Text.Pandoc.Readers.Ipynb ( readIpynb ) where import Prelude import Data.Char (isDigit) -import Data.List (isPrefixOf) import Data.Maybe (fromMaybe) import Data.Digest.Pure.SHA (sha1, showDigest) import Text.Pandoc.Options @@ -30,6 +30,7 @@ import Text.Pandoc.Definition import Data.Ipynb as Ipynb import Text.Pandoc.Class import Text.Pandoc.MIME (extensionFromMimeType) +import Text.Pandoc.Shared (tshow) import Text.Pandoc.UTF8 import Text.Pandoc.Walk (walk) import Text.Pandoc.Error @@ -51,15 +52,15 @@ readIpynb opts t = do Left _ -> case eitherDecode src of Right (notebook3 :: Notebook NbV3) -> notebookToPandoc opts notebook3 - Left err -> throwError $ PandocIpynbDecodingError err + Left err -> throwError $ PandocIpynbDecodingError $ T.pack err notebookToPandoc :: PandocMonad m => ReaderOptions -> Notebook a -> m Pandoc notebookToPandoc opts notebook = do let cells = notebookCells notebook let (fmt,fmtminor) = notebookFormat notebook - let m = M.insert "nbformat" (MetaString $ show fmt) $ - M.insert "nbformat_minor" (MetaString $ show fmtminor) $ + let m = M.insert "nbformat" (MetaString $ tshow fmt) $ + M.insert "nbformat_minor" (MetaString $ tshow fmtminor) $ jsonMetaToMeta (notebookMetadata notebook) let lang = case M.lookup "kernelspec" m of Just (MetaMap ks) -> @@ -72,7 +73,7 @@ notebookToPandoc opts notebook = do return $ Pandoc (Meta $ M.insert "jupyter" (MetaMap m) mempty) blocks cellToBlocks :: PandocMonad m - => ReaderOptions -> String -> Cell a -> m B.Blocks + => ReaderOptions -> Text -> Cell a -> m B.Blocks cellToBlocks opts lang c = do let Source ts = cellSource c let source = mconcat ts @@ -100,19 +101,18 @@ cellToBlocks opts lang c = do "text/markdown" -> "markdown" "text/x-rsrt" -> "rst" _ -> format - return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' - $ T.unpack source + return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do outputBlocks <- mconcat <$> mapM outputToBlock outputs - let kvs' = maybe kvs (\x -> ("execution_count", show x):kvs) ec + let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec return $ B.divWith ("",["cell","code"],kvs') $ - B.codeBlockWith ("",[lang],[]) (T.unpack source) + B.codeBlockWith ("",[lang],[]) source <> outputBlocks -- Remove attachment: prefix from images... fixImage :: Inline -> Inline fixImage (Image attr lab (src,tit)) - | "attachment:" `isPrefixOf` src = Image attr lab (drop 11 src, tit) + | "attachment:" `T.isPrefixOf` src = Image attr lab (T.drop 11 src, tit) fixImage x = x addAttachment :: PandocMonad m => (Text, MimeBundle) -> m () @@ -120,19 +120,19 @@ addAttachment (fname, mimeBundle) = do let fp = T.unpack fname case M.toList (unMimeBundle mimeBundle) of (mimeType, BinaryData bs):_ -> - insertMedia fp (Just $ T.unpack mimeType) (BL.fromStrict bs) + insertMedia fp (Just mimeType) (BL.fromStrict bs) (mimeType, TextualData t):_ -> - insertMedia fp (Just $ T.unpack mimeType) + insertMedia fp (Just mimeType) (BL.fromStrict $ TE.encodeUtf8 t) (mimeType, JsonData v):_ -> - insertMedia fp (Just $ T.unpack mimeType) (encode v) - [] -> report $ CouldNotFetchResource fp "no attachment" + insertMedia fp (Just mimeType) (encode v) + [] -> report $ CouldNotFetchResource fname "no attachment" outputToBlock :: PandocMonad m => Output a -> m B.Blocks outputToBlock Stream{ streamName = sName, streamText = Source text } = do - return $ B.divWith ("",["output","stream",T.unpack sName],[]) - $ B.codeBlock $ T.unpack . mconcat $ text + return $ B.divWith ("",["output","stream",sName],[]) + $ B.codeBlock $ T.concat $ text outputToBlock DisplayData{ displayData = data', displayMetadata = metadata' } = B.divWith ("",["output", "display_data"],[]) <$> @@ -140,15 +140,15 @@ outputToBlock DisplayData{ displayData = data', outputToBlock ExecuteResult{ executeCount = ec, executeData = data', executeMetadata = metadata' } = - B.divWith ("",["output", "execute_result"],[("execution_count",show ec)]) + B.divWith ("",["output", "execute_result"],[("execution_count",tshow ec)]) <$> handleData metadata' data' outputToBlock Err{ errName = ename, errValue = evalue, errTraceback = traceback } = do return $ B.divWith ("",["output","error"], - [("ename",T.unpack ename), - ("evalue",T.unpack evalue)]) - $ B.codeBlock $ T.unpack . T.unlines $ traceback + [("ename",ename), + ("evalue",evalue)]) + $ B.codeBlock $ T.unlines $ traceback -- We want to display the richest output possible given -- the output format. @@ -174,54 +174,53 @@ handleData metadata (MimeBundle mb) = let metaPairs = jsonMetaToPairs meta let bl = BL.fromStrict bs -- SHA1 hash for filename - let mt' = T.unpack mt - let fname = showDigest (sha1 bl) ++ - case extensionFromMimeType mt' of + let fname = T.pack (showDigest (sha1 bl)) <> + case extensionFromMimeType mt of Nothing -> "" - Just ext -> '.':ext - insertMedia fname (Just mt') bl + Just ext -> "." <> ext + insertMedia (T.unpack fname) (Just mt) bl return $ B.para $ B.imageWith ("",[],metaPairs) fname "" mempty | otherwise = return mempty dataBlock ("text/html", TextualData t) - = return $ B.rawBlock "html" $ T.unpack t + = return $ B.rawBlock "html" $ t dataBlock ("text/latex", TextualData t) - = return $ B.rawBlock "latex" $ T.unpack t + = return $ B.rawBlock "latex" $ t dataBlock ("text/plain", TextualData t) = - return $ B.codeBlock $ T.unpack t + return $ B.codeBlock $ t dataBlock (_, JsonData v) = - return $ B.codeBlockWith ("",["json"],[]) $ toStringLazy $ encode v + return $ B.codeBlockWith ("",["json"],[]) $ T.pack $ toStringLazy $ encode v dataBlock _ = return mempty -jsonMetaToMeta :: JSONMeta -> M.Map String MetaValue -jsonMetaToMeta = M.mapKeys T.unpack . M.map valueToMetaValue +jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue +jsonMetaToMeta = M.map valueToMetaValue where valueToMetaValue :: Value -> MetaValue valueToMetaValue x@(Object{}) = case fromJSON x of - Error s -> MetaString s + Error s -> MetaString $ T.pack s Success jm' -> MetaMap $ jsonMetaToMeta jm' valueToMetaValue x@(Array{}) = case fromJSON x of - Error s -> MetaString s + Error s -> MetaString $ T.pack s Success xs -> MetaList $ map valueToMetaValue xs valueToMetaValue (Bool b) = MetaBool b - valueToMetaValue (String t) = MetaString (T.unpack t) + valueToMetaValue (String t) = MetaString t valueToMetaValue (Number n) - | Scientific.isInteger n = MetaString (show (floor n :: Integer)) - | otherwise = MetaString (show n) + | Scientific.isInteger n = MetaString (tshow (floor n :: Integer)) + | otherwise = MetaString (tshow n) valueToMetaValue Aeson.Null = MetaString "" -jsonMetaToPairs :: JSONMeta -> [(String, String)] -jsonMetaToPairs = M.toList . M.mapKeys T.unpack . M.map +jsonMetaToPairs :: JSONMeta -> [(Text, Text)] +jsonMetaToPairs = M.toList . M.map (\case String t | not (T.all isDigit t) , t /= "true" , t /= "false" - -> T.unpack t - x -> UTF8.toStringLazy $ Aeson.encode x) + -> t + x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index e074599eb..320b9c1dd 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.JATS Copyright : Copyright (C) 2017-2019 Hamish Mackenzie @@ -76,13 +77,13 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr = fromMaybe "" . maybeAttrValue attr -maybeAttrValue :: String -> Element -> Maybe String +maybeAttrValue :: String -> Element -> Maybe Text maybeAttrValue attr elt = - lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt) -- convenience function named :: String -> Element -> Bool @@ -90,7 +91,7 @@ named s e = qName (elName e) == s -- -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> JATS m () addMeta field val = modify (setMeta field val) instance HasMeta JATSState where @@ -126,15 +127,13 @@ isBlockElement (Elem e) = qName (elName e) `S.member` blocktags isBlockElement _ = False -- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs +trimNl :: Text -> Text +trimNl = T.dropAround (== '\n') -- function that is used by both graphic (in parseBlock) -- and inline-graphic (in parseInline) getGraphic :: PandocMonad m - => Maybe (Inlines, String) -> Element -> JATS m Inlines + => Maybe (Inlines, Text) -> Element -> JATS m Inlines getGraphic mbfigdata e = do let atVal a = attrValue a e (ident, title, caption) = @@ -142,7 +141,7 @@ getGraphic mbfigdata e = do Just (capt, i) -> (i, "fig:" <> atVal "title", capt) Nothing -> (atVal "id", atVal "title", text (atVal "alt-text")) - attr = (ident, words $ atVal "role", []) + attr = (ident, T.words $ atVal "role", []) imageUrl = atVal "href" return $ imageWith attr imageUrl title caption @@ -155,8 +154,8 @@ parseBlock :: PandocMonad m => Content -> JATS m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x + else return $ plain $ trimInlines $ text $ T.pack s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x parseBlock (Elem e) = case qName (elName e) of "p" -> parseMixed para (elContent e) @@ -167,7 +166,7 @@ parseBlock (Elem e) = "bullet" -> bulletList <$> listitems listType -> do let start = fromMaybe 1 $ - (strContent <$> (filterElement (named "list-item") e + (textContent <$> (filterElement (named "list-item") e >>= filterElement (named "label"))) >>= safeRead orderedListWith (start, parseListStyleType listType, DefaultDelim) @@ -204,7 +203,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e + $ trimNl $ textContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -267,9 +266,9 @@ parseBlock (Elem e) = Just "right" -> AlignRight Just "center" -> AlignCenter _ -> AlignDefault - let toWidth c = case findAttr (unqual "colwidth") c of + let toWidth c = case findAttrText (unqual "colwidth") c of Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> + $ safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w Nothing -> 0 :: Double let numrows = foldl' max 0 $ map length bodyrows @@ -363,7 +362,7 @@ parseRefList e = do return mempty parseRef :: PandocMonad m - => Element -> JATS m (Map.Map String MetaValue) + => Element -> JATS m (Map.Map Text MetaValue) parseRef e = do let refId = text $ attrValue "id" e let getInlineText n = maybe (return mempty) getInlines . filterChild (named n) @@ -396,7 +395,7 @@ parseRef e = do family <- maybe (return mempty) getInlines $ filterChild (named "surname") nm return $ toMetaValue $ Map.fromList [ - ("given", given) + ("given" :: Text, given) , ("family", family) ] personGroups <- mapM (\pg -> @@ -406,7 +405,7 @@ parseRef e = do toMetaValue names)) personGroups' return $ Map.fromList $ - [ ("id", toMetaValue refId) + [ ("id" :: Text, toMetaValue refId) , ("type", toMetaValue refType) , ("title", toMetaValue refTitle) , ("container-title", toMetaValue refContainerTitle) @@ -415,7 +414,7 @@ parseRef e = do , ("title", toMetaValue refTitle) , ("issued", toMetaValue $ Map.fromList [ - ("year", refYear) + ("year" :: Text, refYear) ]) , ("volume", toMetaValue refVolume) , ("page", toMetaValue refPages) @@ -424,6 +423,15 @@ parseRef e = do Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty -- TODO handle mixed-citation +findAttrText :: QName -> Element -> Maybe Text +findAttrText x = fmap T.pack . findAttr x + +textContent :: Element -> Text +textContent = T.pack . strContent + +textContentRecursive :: Element -> Text +textContentRecursive = T.pack . strContentRecursive + strContentRecursive :: Element -> String strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -433,9 +441,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines -parseInline (Text (CData _ s _)) = return $ text s +parseInline (Text (CData _ s _)) = return $ text $ T.pack s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) text $ lookupEntity ref + return $ maybe (text $ T.toUpper $ T.pack ref) text $ T.pack <$> lookupEntity ref parseInline (Elem e) = case qName (elName e) of "italic" -> emph <$> innerInlines @@ -464,7 +472,7 @@ parseInline (Elem e) = "xref" -> do ils <- innerInlines let rid = attrValue "rid" e - let rids = words rid + let rids = T.words rid let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e let attr = (attrValue "id" e, [], maybeToList refType) return $ if refType == Just ("ref-type","bibr") @@ -477,13 +485,13 @@ parseInline (Elem e) = , citationNoteNum = 0 , citationHash = 0}) rids) ils - else linkWith attr ('#' : rid) "" ils + else linkWith attr ("#" <> rid) "" ils "ext-link" -> do ils <- innerInlines - let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> '#' : attrValue "rid" e + Just h -> T.pack h + _ -> "#" <> attrValue "rid" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, [], []) return $ linkWith attr href title ils' @@ -491,23 +499,23 @@ parseInline (Elem e) = "disp-formula" -> formula displayMath "inline-formula" -> formula math "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e - "tex-math" -> return . math $ strContent e + "tex-math" -> return . math $ textContent e - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e + "email" -> return $ link ("mailto:" <> textContent e) "" + $ str $ textContent e + "uri" -> return $ link (textContent e) "" $ str $ textContent e "fn" -> (note . mconcat) <$> mapM parseBlock (elContent e) _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> mapM parseInline (elContent e) mathML x = - case readMathML . showElement $ everywhere (mkT removePrefix) x of + case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of Left _ -> mempty Right m -> writeTeX m formula constructor = do let whereToLook = fromMaybe e $ filterElement (named "alternatives") e - texMaths = map strContent $ + texMaths = map textContent $ filterChildren (named "tex-math") whereToLook mathMLs = map mathML $ filterChildren isMathML whereToLook @@ -520,4 +528,4 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index be19964a4..5c9a3e69c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,9 +1,10 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.LaTeX Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -29,9 +30,9 @@ import Prelude import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (isDigit, isLetter, toLower, toUpper, chr) +import Data.Char (isDigit, isLetter, toUpper, chr) import Data.Default -import Data.List (intercalate, isPrefixOf) +import Data.List (intercalate) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set @@ -44,7 +45,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv, readFileFromDirs, report, setResourcePath, setTranslations, translateTerm, trace, fileExists) -import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError)) +import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -77,7 +78,7 @@ readLaTeX opts ltx = do (tokenize "source" (crFilter ltx)) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (T.unpack ltx) e + Left e -> throwError $ PandocParsecError ltx e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -99,13 +100,13 @@ parseLaTeX = do walk (resolveRefs (sLabels st)) doc' return $ Pandoc meta bs' -resolveRefs :: M.Map String [Inline] -> Inline -> Inline +resolveRefs :: M.Map Text [Inline] -> Inline -> Inline resolveRefs labels x@(Link (ident,classes,kvs) _ _) = case (lookup "reference-type" kvs, lookup "reference" kvs) of (Just "ref", Just lab) -> case M.lookup lab labels of - Just txt -> Link (ident,classes,kvs) txt ('#':lab, "") + Just txt -> Link (ident,classes,kvs) txt ("#" <> lab, "") Nothing -> x _ -> x resolveRefs _ x = x @@ -123,11 +124,11 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String + => ParserT Text s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> (rawLaTeXParser toks True (do choice (map controlSeq @@ -151,14 +152,14 @@ beginOrEndCommand = try $ do (inlineEnvironments :: M.Map Text (LP PandocPure Inlines)) then mzero else return $ rawBlock "latex" - (T.unpack (txt <> untokenize rawargs)) + (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String + => ParserT Text s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp raw <- snd <$> ( rawLaTeXParser toks True (mempty <$ (controlSeq "input" >> skipMany opt >> braced)) @@ -167,23 +168,23 @@ rawLaTeXInline = do inlines ) finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 - return $ raw <> finalbraces + return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines +inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines -- inline elements: word :: PandocMonad m => LP m Inlines -word = (str . T.unpack . untoken) <$> satisfyTok isWordTok +word = (str . untoken) <$> satisfyTok isWordTok regularSymbol :: PandocMonad m => LP m Inlines -regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol +regularSymbol = (str . untoken) <$> satisfyTok isRegularSymbol where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t isRegularSymbol _ = False isSpecial c = c `Set.member` specialChars @@ -199,14 +200,14 @@ inlineGroup = do doLHSverb :: PandocMonad m => LP m Inlines doLHSverb = - (codeWith ("",["haskell"],[]) . T.unpack . untokenize) + (codeWith ("",["haskell"],[]) . untokenize) <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') -mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines -mkImage options src = do +mkImage :: PandocMonad m => [(Text, Text)] -> Text -> LP m Inlines +mkImage options (T.unpack -> src) = do let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + Just (num, "\\textwidth") -> (k, showFl (num * 100) <> "%") _ -> (k, v) let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options @@ -223,10 +224,10 @@ mkImage options src = do then return s' else findFile s es src' <- case takeExtension src of - "" | not (null defaultExt) -> return $ addExtension src defaultExt + "" | not (T.null defaultExt) -> return $ addExtension src $ T.unpack defaultExt | otherwise -> findFile src exts _ -> return src - return $ imageWith attr src' "" alt + return $ imageWith attr (T.pack src') "" alt doxspace :: PandocMonad m => LP m Inlines doxspace = @@ -435,7 +436,7 @@ siUnitMap = M.fromList , ("zetta", str "Z") ] -lit :: String -> LP m Inlines +lit :: Text -> LP m Inlines lit = pure . str removeDoubleQuotes :: Text -> Text @@ -471,7 +472,7 @@ quoted' :: PandocMonad m -> LP m () -> LP m Inlines quoted' f starter ender = do - startchs <- (T.unpack . untokenize) <$> starter + startchs <- untokenize <$> starter smart <- extensionEnabled Ext_smart <$> getOption readerExtensions if smart then do @@ -487,7 +488,7 @@ quoted' f starter ender = do enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines enquote starred mblang = do skipopts - let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let lang = mblang >>= babelLangToBCP47 let langspan = case lang of Nothing -> id Just l -> spanWith ("",[],[("lang", renderLang l)]) @@ -503,27 +504,27 @@ blockquote citations mblang = do cs <- cites NormalCitation False return $ para (cite cs mempty) else return mempty - let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let lang = mblang >>= babelLangToBCP47 let langdiv = case lang of Nothing -> id Just l -> divWith ("",[],[("lang", renderLang l)]) bs <- grouped block return $ blockQuote . langdiv $ (bs <> citePar) -doAcronym :: PandocMonad m => String -> LP m Inlines +doAcronym :: PandocMonad m => Text -> LP m Inlines doAcronym form = do acro <- braced - return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), - ("acronym-form", "singular+" ++ form)]) - $ str $ toksToString acro] + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "singular+" <> form)]) + $ str $ untokenize acro] -doAcronymPlural :: PandocMonad m => String -> LP m Inlines +doAcronymPlural :: PandocMonad m => Text -> LP m Inlines doAcronymPlural form = do acro <- braced plural <- lit "s" - return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), - ("acronym-form", "plural+" ++ form)]) $ - mconcat [str $ toksToString acro, plural]] + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "plural+" <> form)]) $ + mconcat [str $ untokenize acro, plural]] doverb :: PandocMonad m => LP m Inlines doverb = do @@ -532,7 +533,7 @@ doverb = do Just (c, ts) | T.null ts -> return c _ -> mzero withVerbatimMode $ - (code . T.unpack . untokenize) <$> + (code . untokenize) <$> manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) verbTok :: PandocMonad m => Char -> LP m Tok @@ -547,7 +548,7 @@ verbTok stopchar = do : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp return $ Tok pos toktype t1 -listingsLanguage :: [(String, String)] -> Maybe String +listingsLanguage :: [(Text, Text)] -> Maybe Text listingsLanguage opts = case lookup "language" opts of Nothing -> Nothing @@ -562,10 +563,10 @@ dolstinline = do domintinline :: PandocMonad m => LP m Inlines domintinline = do skipopts - cls <- toksToString <$> braced + cls <- untokenize <$> braced doinlinecode [cls] -doinlinecode :: PandocMonad m => [String] -> LP m Inlines +doinlinecode :: PandocMonad m => [Text] -> LP m Inlines doinlinecode classes = do Tok _ Symbol t <- anySymbol marker <- case T.uncons t of @@ -573,14 +574,14 @@ doinlinecode classes = do _ -> mzero let stopchar = if marker == '{' then '}' else marker withVerbatimMode $ - (codeWith ("",classes,[]) . map nlToSpace . T.unpack . untokenize) <$> + (codeWith ("",classes,[]) . T.map nlToSpace . untokenize) <$> manyTill (verbTok stopchar) (symbol stopchar) nlToSpace :: Char -> Char nlToSpace '\n' = ' ' nlToSpace x = x -keyval :: PandocMonad m => LP m (String, String) +keyval :: PandocMonad m => LP m (Text, Text) keyval = try $ do Tok _ Word key <- satisfyTok isWordTok optional sp @@ -601,35 +602,34 @@ keyval = try $ do _ -> True)))))) optional (symbol ',') optional sp - return (T.unpack key, T.unpack $ T.strip val) + return (key, T.strip val) -keyvals :: PandocMonad m => LP m [(String, String)] +keyvals :: PandocMonad m => LP m [(Text, Text)] keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines accent combiningAccent fallBack = try $ do ils <- tok case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ + (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ -- try to normalize to the combined character: - Str (T.unpack - (Normalize.normalize Normalize.NFC - (T.pack [x, combiningAccent])) ++ xs) : ys - [Space] -> return $ str [fromMaybe combiningAccent fallBack] - [] -> return $ str [fromMaybe combiningAccent fallBack] + Str (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent]) <> xs) : ys + [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack + [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack _ -> return ils -mathDisplay :: String -> Inlines +mathDisplay :: Text -> Inlines mathDisplay = displayMath . trimMath -mathInline :: String -> Inlines +mathInline :: Text -> Inlines mathInline = math . trimMath dollarsMath :: PandocMonad m => LP m Inlines dollarsMath = do symbol '$' display <- option False (True <$ symbol '$') - (do contents <- try $ T.unpack . untokenize <$> pDollarsMath 0 + (do contents <- try $ untokenize <$> pDollarsMath 0 if display then (mathDisplay contents <$ symbol '$') else return $ mathInline contents) @@ -682,10 +682,10 @@ simpleCiteArgs = try $ do } return $ addPrefix pre $ addSuffix suf $ map conv keys -citationLabel :: PandocMonad m => LP m String +citationLabel :: PandocMonad m => LP m Text citationLabel = do optional spaces - toksToString <$> + untokenize <$> (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) <* optional spaces <* optional (symbol ',') @@ -729,10 +729,10 @@ cites mode multi = try $ do addMprenote _ _ = [] addMpostnote = addSuffix . mpostnote -citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines citation name mode multi = do (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ toksToString raw) + return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) handleCitationPart :: Inlines -> [Citation] handleCitationPart ils = @@ -756,7 +756,7 @@ complexNatbibCitation mode = try $ do case cs of [] -> mzero (c:cits) -> return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ toksToString raw) + (rawInline "latex" $ "\\citetext" <> untokenize raw) inNote :: Inlines -> Inlines inNote ils = @@ -780,10 +780,10 @@ tok :: PandocMonad m => LP m Inlines tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' where singleChar' = do Tok _ _ t <- singleChar - return (str (T.unpack t)) + return $ str t opt :: PandocMonad m => LP m Inlines -opt = bracketed inline <|> (str . T.unpack <$> rawopt) +opt = bracketed inline <|> (str <$> rawopt) paropt :: PandocMonad m => LP m Inlines paropt = parenWrapped inline @@ -822,26 +822,31 @@ overlayTok = inBrackets :: Inlines -> Inlines inBrackets x = str "[" <> x <> str "]" -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" +unescapeURL :: Text -> Text +unescapeURL = T.concat . go . T.splitOn "\\" + where + isEscapable c = c `elemText` "#$%&~_^\\{}" + go (x:xs) = x : map unescapeInterior xs + go [] = [] + unescapeInterior t + | Just (c, _) <- T.uncons t + , isEscapable c = t + | otherwise = "\\" <> t mathEnvWith :: PandocMonad m => (Inlines -> a) -> Maybe Text -> Text -> LP m a mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name where inner x = case innerEnv of Nothing -> x - Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ - "\\end{" ++ T.unpack y ++ "}" + Just y -> "\\begin{" <> y <> "}\n" <> x <> + "\\end{" <> y <> "}" -mathEnv :: PandocMonad m => Text -> LP m String +mathEnv :: PandocMonad m => Text -> LP m Text mathEnv name = do skipopts optional blankline res <- manyTill anyTok (end_ name) - return $ stripTrailingNewlines $ T.unpack $ untokenize res + return $ stripTrailingNewlines $ untokenize res inlineEnvironment :: PandocMonad m => LP m Inlines inlineEnvironment = try $ do @@ -914,9 +919,9 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok) , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok) , ("lettrine", optional opt >> extractSpaces (spanWith ("",["lettrine"],[])) <$> tok) - , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) - , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) - , ("ensuremath", mathInline . toksToString <$> braced) + , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . untokenize <$> braced) , ("texorpdfstring", const <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") @@ -1008,16 +1013,15 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("lstinline", dolstinline) , ("mintinline", domintinline) , ("Verb", doverb) - , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> + , ("url", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url -> pure (link url "" (str url))) - , ("nolinkurl", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> + , ("nolinkurl", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url -> pure (code url)) - , ("href", (unescapeURL . toksToString <$> + , ("href", (unescapeURL . untokenize <$> bracedUrl <* optional sp) >>= \url -> tok >>= \lab -> pure (link url "" lab)) , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . T.unpack . - removeDoubleQuotes . untokenize <$> braced + src <- unescapeURL . removeDoubleQuotes . untokenize <$> braced mkImage options src) , ("enquote*", enquote True Nothing) , ("enquote", enquote False Nothing) @@ -1172,22 +1176,21 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList ifdim :: PandocMonad m => LP m Inlines ifdim = do contents <- manyTill anyTok (controlSeq "fi") - return $ rawInline "latex" $ T.unpack $ - "\\ifdim" <> untokenize contents <> "\\fi" + return $ rawInline "latex" $ "\\ifdim" <> untokenize contents <> "\\fi" makeUppercase :: Inlines -> Inlines -makeUppercase = fromList . walk (alterStr (map toUpper)) . toList +makeUppercase = fromList . walk (alterStr T.toUpper) . toList makeLowercase :: Inlines -> Inlines -makeLowercase = fromList . walk (alterStr (map toLower)) . toList +makeLowercase = fromList . walk (alterStr T.toLower) . toList -alterStr :: (String -> String) -> Inline -> Inline +alterStr :: (Text -> Text) -> Inline -> Inline alterStr f (Str xs) = Str (f xs) alterStr _ x = x foreignlanguage :: PandocMonad m => LP m Inlines foreignlanguage = do - babelLang <- T.unpack . untokenize <$> braced + babelLang <- untokenize <$> braced case babelLangToBCP47 babelLang of Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok _ -> tok @@ -1196,24 +1199,24 @@ inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47 where mk (polyglossia, bcp47Func) = - ("text" <> T.pack polyglossia, inlineLanguage bcp47Func) + ("text" <> polyglossia, inlineLanguage bcp47Func) -inlineLanguage :: PandocMonad m => (String -> Lang) -> LP m Inlines +inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines inlineLanguage bcp47Func = do - o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') <$> rawopt let lang = renderLang $ bcp47Func o extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok hyperlink :: PandocMonad m => LP m Inlines hyperlink = try $ do - src <- toksToString <$> braced + src <- untokenize <$> braced lab <- tok - return $ link ('#':src) "" lab + return $ link ("#" <> src) "" lab hypertargetBlock :: PandocMonad m => LP m Blocks hypertargetBlock = try $ do - ref <- toksToString <$> braced + ref <- untokenize <$> braced bs <- grouped block case toList bs of [Header 1 (ident,_,_) _] | ident == ref -> return bs @@ -1221,7 +1224,7 @@ hypertargetBlock = try $ do hypertargetInline :: PandocMonad m => LP m Inlines hypertargetInline = try $ do - ref <- toksToString <$> braced + ref <- untokenize <$> braced ils <- grouped inline return $ spanWith (ref, [], []) ils @@ -1231,7 +1234,7 @@ romanNumeralUpper = romanNumeralLower :: (PandocMonad m) => LP m Inlines romanNumeralLower = - str . map toLower . toRomanNumeral <$> romanNumeralArg + str . T.toLower . toRomanNumeral <$> romanNumeralArg romanNumeralArg :: (PandocMonad m) => LP m Int romanNumeralArg = spaces *> (parser <|> inBraces) @@ -1248,18 +1251,18 @@ romanNumeralArg = spaces *> (parser <|> inBraces) let (digits, rest) = T.span isDigit s unless (T.null rest) $ Prelude.fail "Non-digits in argument to \\Rn or \\RN" - safeRead $ T.unpack digits + safeRead digits newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a newToggle name = do updateState $ \st -> - st{ sToggles = M.insert (toksToString name) False (sToggles st) } + st{ sToggles = M.insert (untokenize name) False (sToggles st) } return mempty setToggle :: (Monoid a, PandocMonad m) => Bool -> [Tok] -> LP m a setToggle on name = do updateState $ \st -> - st{ sToggles = M.adjust (const on) (toksToString name) (sToggles st) } + st{ sToggles = M.adjust (const on) (untokenize name) (sToggles st) } return mempty ifToggle :: PandocMonad m => LP m () @@ -1271,7 +1274,7 @@ ifToggle = do no <- braced toggles <- sToggles <$> getState inp <- getInput - let name' = toksToString name + let name' = untokenize name case M.lookup name' toggles of Just True -> setInput (yes ++ inp) Just False -> setInput (no ++ inp) @@ -1294,11 +1297,11 @@ ifstrequal = do else getInput >>= setInput . (ifnotequal ++) return mempty -coloredInline :: PandocMonad m => String -> LP m Inlines +coloredInline :: PandocMonad m => Text -> LP m Inlines coloredInline stylename = do skipopts color <- braced - spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok + spanWith ("",[],[("style",stylename <> ": " <> untokenize color)]) <$> tok ttfamily :: PandocMonad m => LP m Inlines ttfamily = (code . stringify . toList) <$> tok @@ -1313,12 +1316,12 @@ rawInlineOr name' fallback = do processHBox :: Inlines -> Inlines processHBox = walk convert where - convert Space = Str [chr 160] -- non-breakable space - convert SoftBreak = Str [chr 160] -- non-breakable space + convert Space = Str $ T.singleton $ chr 160 -- non-breakable space + convert SoftBreak = Str $ T.singleton $ chr 160 -- non-breakable space convert LineBreak = Str "" convert x = x -getRawCommand :: PandocMonad m => Text -> Text -> LP m String +getRawCommand :: PandocMonad m => Text -> Text -> LP m Text getRawCommand name txt = do (_, rawargs) <- withRaw $ case name of @@ -1336,7 +1339,7 @@ getRawCommand name txt = do skipopts option "" (try dimenarg) void $ many braced - return $ T.unpack (txt <> untokenize rawargs) + return $ txt <> untokenize rawargs isFontSizeCommand :: Text -> Bool isFontSizeCommand "tiny" = True @@ -1396,17 +1399,17 @@ treatAsInline = Set.fromList dolabel :: PandocMonad m => LP m Inlines dolabel = do v <- braced - let refstr = toksToString v + let refstr = untokenize v return $ spanWith (refstr,[],[("label", refstr)]) - $ inBrackets $ str $ toksToString v + $ inBrackets $ str $ untokenize v -doref :: PandocMonad m => String -> LP m Inlines +doref :: PandocMonad m => Text -> LP m Inlines doref cls = do v <- braced - let refstr = toksToString v + let refstr = untokenize v return $ linkWith ("",[],[ ("reference-type", cls) , ("reference", refstr)]) - ('#':refstr) + ("#" <> refstr) "" (inBrackets $ str refstr) @@ -1435,11 +1438,11 @@ inline = (mempty <$ comment) <|> (str "\160" <$ symbol '~') <|> dollarsMath <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) - <|> (str . (:[]) <$> primEscape) + <|> (str . T.singleton <$> primEscape) <|> regularSymbol <|> (do res <- symbolIn "#^'`\"[]&" pos <- getPosition - let s = T.unpack (untoken res) + let s = untoken res report $ ParsingUnescaped s pos return $ str s) @@ -1498,7 +1501,7 @@ include name = do -- note, we can have cc_by_4.0 for example... _ | name == "usepackage" -> addExtension f ".sty" | otherwise -> addExtension f ".tex" - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" mapM_ (insertIncluded dirs) (map addExt fs) return mempty @@ -1509,19 +1512,19 @@ insertIncluded :: PandocMonad m insertIncluded dirs f = do pos <- getPosition containers <- getIncludeFiles <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show pos - updateState $ addIncludeFile f + when (T.pack f `elem` containers) $ + throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos + updateState $ addIncludeFile $ T.pack f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of Just s -> return s Nothing -> do - report $ CouldNotLoadIncludeFile f pos + report $ CouldNotLoadIncludeFile (T.pack f) pos return "" - getInput >>= setInput . (tokenize f (T.pack contents) ++) + getInput >>= setInput . (tokenize f contents ++) updateState dropLatestIncludeFile -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () addMeta field val = updateState $ \st -> st{ sMeta = addMetaField field val $ sMeta st } @@ -1536,10 +1539,10 @@ authors = try $ do egroup addMeta "author" (map trimInlines auths) -macroDef :: (PandocMonad m, Monoid a) => (String -> a) -> LP m a +macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a macroDef constructor = do (_, s) <- withRaw (commandDef <|> environmentDef) - (constructor (T.unpack $ untokenize s) <$ + (constructor (untokenize s) <$ guardDisabled Ext_latex_macros) <|> return mempty where commandDef = do @@ -1632,7 +1635,7 @@ newcommand = do case M.lookup name macros of Just macro | mtype == "newcommand" -> do - report $ MacroAlreadyDefined (T.unpack txt) pos + report $ MacroAlreadyDefined txt pos return (name, macro) | mtype == "providecommand" -> return (name, macro) _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents) @@ -1658,7 +1661,7 @@ newenvironment = do case M.lookup name macros of Just _ | mtype == "newenvironment" -> do - report $ MacroAlreadyDefined (T.unpack name) pos + report $ MacroAlreadyDefined name pos return Nothing | mtype == "provideenvironment" -> do return Nothing @@ -1669,7 +1672,7 @@ newenvironment = do bracketedNum :: PandocMonad m => LP m Int bracketedNum = do ds <- untokenize <$> bracketedToks - case safeRead (T.unpack ds) of + case safeRead ds of Just i -> return i _ -> return 0 @@ -1709,7 +1712,7 @@ section (ident, classes, kvs) lvl = do contents <- grouped inline lab <- option ident $ try (spaces >> controlSeq "label" - >> spaces >> toksToString <$> braced) + >> spaces >> untokenize <$> braced) when (lvl == 0) $ updateState $ \st -> st{ sHasChapters = True } unless ("unnumbered" `elem` classes) $ do @@ -1836,9 +1839,9 @@ blockCommands = M.fromList , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("caption", skipopts *> setCaption) , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs . toksToString)) + addMeta "bibliography" . splitBibs . untokenize)) , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs . toksToString)) + addMeta "bibliography" . splitBibs . untokenize)) , ("endinput", mempty <$ skipMany tok) -- includes , ("lstinputlisting", inputListing) @@ -1941,18 +1944,18 @@ rawEnv name = do (bs, raw) <- withRaw $ env name blocks if parseRaw then return $ rawBlock "latex" - $ T.unpack $ beginCommand <> untokenize raw + $ beginCommand <> untokenize raw else do - report $ SkippedContent (T.unpack beginCommand) pos1 + report $ SkippedContent beginCommand pos1 pos2 <- getPosition - report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 + report $ SkippedContent ("\\end{" <> name <> "}") pos2 return bs rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do pos <- getPosition (_, raw) <- withRaw $ verbEnv name - let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw + let raw' = "\\begin{" <> name <> "}" <> untokenize raw exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts if parseRaw @@ -1961,12 +1964,11 @@ rawVerbEnv name = do report $ SkippedContent raw' pos return mempty -verbEnv :: PandocMonad m => Text -> LP m String +verbEnv :: PandocMonad m => Text -> LP m Text verbEnv name = withVerbatimMode $ do optional blankline res <- manyTill anyTok (end_ name) - return $ T.unpack - $ stripTrailingNewline + return $ stripTrailingNewline $ untokenize $ res @@ -2010,11 +2012,11 @@ minted = do mintedAttr :: PandocMonad m => LP m Attr mintedAttr = do options <- option [] keyvals - lang <- toksToString <$> braced + lang <- untokenize <$> braced let kvs = [ (if k == "firstnumber" then "startFrom" else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ + let classes = [ lang | not (T.null lang) ] ++ [ "numberLines" | lookup "linenos" options == Just "true" ] return ("",classes,kvs) @@ -2023,14 +2025,14 @@ inputMinted :: PandocMonad m => LP m Blocks inputMinted = do pos <- getPosition attr <- mintedAttr - f <- filter (/='"') . toksToString <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f + f <- T.filter (/='"') . untokenize <$> braced + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs (T.unpack f) rawcode <- case mbCode of Just s -> return s Nothing -> do report $ CouldNotLoadIncludeFile f pos - return [] + return "" return $ B.codeBlockWith attr rawcode letterContents :: PandocMonad m => LP m Blocks @@ -2052,10 +2054,10 @@ figure = try $ do addImageCaption :: PandocMonad m => Blocks -> LP m Blocks addImageCaption = walkM go where go (Image attr@(_, cls, kvs) alt (src,tit)) - | not ("fig:" `isPrefixOf` tit) = do + | not ("fig:" `T.isPrefixOf` tit) = do (mbcapt, mblab) <- sCaption <$> getState let (alt', tit') = case mbcapt of - Just ils -> (toList ils, "fig:" ++ tit) + Just ils -> (toList ils, "fig:" <> tit) Nothing -> (alt, tit) attr' = case mblab of Just lab -> (lab, cls, kvs) @@ -2090,23 +2092,23 @@ addImageCaption = walkM go return $ Image attr' alt' (src, tit') go x = return x -coloredBlock :: PandocMonad m => String -> LP m Blocks +coloredBlock :: PandocMonad m => Text -> LP m Blocks coloredBlock stylename = try $ do skipopts color <- braced notFollowedBy (grouped inline) - let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) + let constructor = divWith ("",[],[("style",stylename <> ": " <> untokenize color)]) constructor <$> grouped block graphicsPath :: PandocMonad m => LP m Blocks graphicsPath = do - ps <- map toksToString <$> + ps <- map (T.unpack . untokenize) <$> (bgroup *> spaces *> manyTill (braced <* spaces) egroup) - getResourcePath >>= setResourcePath . (++ ps) + getResourcePath >>= setResourcePath . (<> ps) return mempty -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') +splitBibs :: Text -> [Inlines] +splitBibs = map (str . T.pack . flip replaceExtension "bib" . T.unpack . trim) . splitTextBy (==',') alltt :: Blocks -> Blocks alltt = walk strToCode @@ -2115,7 +2117,7 @@ alltt = walk strToCode strToCode SoftBreak = LineBreak strToCode x = x -parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions :: [(Text, Text)] -> Attr parseListingsOptions options = let kvs = [ (if k == "firstnumber" then "startFrom" @@ -2129,23 +2131,23 @@ inputListing :: PandocMonad m => LP m Blocks inputListing = do pos <- getPosition options <- option [] keyvals - f <- filter (/='"') . toksToString <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f + f <- T.filter (/='"') . untokenize <$> braced + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs (T.unpack f) codeLines <- case mbCode of - Just s -> return $ lines s + Just s -> return $ T.lines s Nothing -> do report $ CouldNotLoadIncludeFile f pos return [] let (ident,classes,kvs) = parseListingsOptions options let classes' = (case listingsLanguage options of - Nothing -> (take 1 (languagesByExtension (takeExtension f)) ++) + Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>) Just _ -> id) classes let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead let lastline = fromMaybe (length codeLines) $ lookup "lastline" options >>= safeRead - let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $ + let codeContents = T.intercalate "\n" $ take (1 + lastline - firstline) $ drop (firstline - 1) codeLines return $ codeBlockWith (ident,classes',kvs) codeContents @@ -2176,12 +2178,12 @@ orderedList' = try $ do spaces let markerSpec = do symbol '[' - ts <- toksToString <$> manyTill anyTok (symbol ']') + ts <- untokenize <$> manyTill anyTok (symbol ']') case runParser anyOrderedListMarker def "option" ts of Right r -> return r Left _ -> do pos <- getPosition - report $ SkippedContent ("[" ++ ts ++ "]") pos + report $ SkippedContent ("[" <> ts <> "]") pos return (1, DefaultStyle, DefaultDelim) (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec spaces @@ -2191,17 +2193,17 @@ orderedList' = try $ do spaces start <- option 1 $ try $ do pos <- getPosition controlSeq "setcounter" - ctr <- toksToString <$> braced - guard $ "enum" `isPrefixOf` ctr - guard $ all (`elem` ['i','v']) (drop 4 ctr) + ctr <- untokenize <$> braced + guard $ "enum" `T.isPrefixOf` ctr + guard $ T.all (`elem` ['i','v']) (T.drop 4 ctr) optional sp - num <- toksToString <$> braced + num <- untokenize <$> braced case safeRead num of Just i -> return (i + 1 :: Int) Nothing -> do report $ SkippedContent - ("\\setcounter{" ++ ctr ++ - "}{" ++ num ++ "}") pos + ("\\setcounter{" <> ctr <> + "}{" <> num <> "}") pos return 1 bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs @@ -2235,7 +2237,7 @@ splitWordTok = do inp <- getInput case inp of (Tok spos Word t : rest) -> - setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest _ -> return () parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] @@ -2256,7 +2258,7 @@ parseAligns = try $ do let alignSuffix = symbol '<' >> braced let colWidth = try $ do symbol '{' - ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") + ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") spaces symbol '}' case safeRead ds of @@ -2266,7 +2268,7 @@ parseAligns = try $ do pref <- option [] alignPrefix spaces al <- alignChar - width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced + width <- colWidth <|> option 0.0 (do s <- untokenize <$> braced pos <- getPosition report $ SkippedContent s pos return 0.0) @@ -2276,13 +2278,13 @@ parseAligns = try $ do let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro symbol '*' spaces - ds <- trim . toksToString <$> braced + ds <- trim . untokenize <$> braced spaces spec <- braced case safeRead ds of Just n -> getInput >>= setInput . (mconcat (replicate n spec) ++) - Nothing -> Prelude.fail $ "Could not parse " ++ ds ++ " as number" + Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" bgroup spaces maybeBar @@ -2379,7 +2381,7 @@ block = do <|> blockCommand <|> paragraph <|> grouped block - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blocks :: PandocMonad m => LP m Blocks @@ -2387,9 +2389,9 @@ blocks = mconcat <$> many block setDefaultLanguage :: PandocMonad m => LP m Blocks setDefaultLanguage = do - o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') <$> rawopt - polylang <- toksToString <$> braced + polylang <- untokenize <$> braced case M.lookup polylang polyglossiaLangToBCP47 of Nothing -> return mempty -- TODO mzero? warning? Just langFunc -> do diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index b21398f93..7ec432a4a 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.LaTeX.Lang Copyright : Copyright (C) 2018-2019 John MacFarlane @@ -18,11 +19,12 @@ module Text.Pandoc.Readers.LaTeX.Lang where import Prelude import qualified Data.Map as M +import qualified Data.Text as T import Text.Pandoc.BCP47 (Lang(..)) -polyglossiaLangToBCP47 :: M.Map String (String -> Lang) +polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList - [ ("arabic", \o -> case filter (/=' ') o of + [ ("arabic", \o -> case T.filter (/=' ') o of "locale=algeria" -> Lang "ar" "" "DZ" [] "locale=mashriq" -> Lang "ar" "" "SY" [] "locale=libya" -> Lang "ar" "" "LY" [] @@ -30,7 +32,7 @@ polyglossiaLangToBCP47 = M.fromList "locale=mauritania" -> Lang "ar" "" "MR" [] "locale=tunisia" -> Lang "ar" "" "TN" [] _ -> Lang "ar" "" "" []) - , ("german", \o -> case filter (/=' ') o of + , ("german", \o -> case T.filter (/=' ') o of "spelling=old" -> Lang "de" "" "DE" ["1901"] "variant=austrian,spelling=old" -> Lang "de" "" "AT" ["1901"] @@ -40,11 +42,11 @@ polyglossiaLangToBCP47 = M.fromList "variant=swiss" -> Lang "de" "" "CH" [] _ -> Lang "de" "" "" []) , ("lsorbian", \_ -> Lang "dsb" "" "" []) - , ("greek", \o -> case filter (/=' ') o of + , ("greek", \o -> case T.filter (/=' ') o of "variant=poly" -> Lang "el" "" "polyton" [] "variant=ancient" -> Lang "grc" "" "" [] _ -> Lang "el" "" "" []) - , ("english", \o -> case filter (/=' ') o of + , ("english", \o -> case T.filter (/=' ') o of "variant=australian" -> Lang "en" "" "AU" [] "variant=canadian" -> Lang "en" "" "CA" [] "variant=british" -> Lang "en" "" "GB" [] @@ -52,7 +54,7 @@ polyglossiaLangToBCP47 = M.fromList "variant=american" -> Lang "en" "" "US" [] _ -> Lang "en" "" "" []) , ("usorbian", \_ -> Lang "hsb" "" "" []) - , ("latin", \o -> case filter (/=' ') o of + , ("latin", \o -> case T.filter (/=' ') o of "variant=classic" -> Lang "la" "" "" ["x-classic"] _ -> Lang "la" "" "" []) , ("slovenian", \_ -> Lang "sl" "" "" []) @@ -133,7 +135,7 @@ polyglossiaLangToBCP47 = M.fromList , ("vietnamese", \_ -> Lang "vi" "" "" []) ] -babelLangToBCP47 :: String -> Maybe Lang +babelLangToBCP47 :: T.Text -> Maybe Lang babelLangToBCP47 s = case s of "austrian" -> Just $ Lang "de" "" "AT" ["1901"] diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 14cb408b0..a01abda46 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -97,8 +97,8 @@ import Text.Parsec.Pos newtype DottedNum = DottedNum [Int] deriving (Show) -renderDottedNum :: DottedNum -> String -renderDottedNum (DottedNum xs) = +renderDottedNum :: DottedNum -> T.Text +renderDottedNum (DottedNum xs) = T.pack $ intercalate "." (map show xs) incrementDottedNum :: Int -> DottedNum -> DottedNum @@ -111,18 +111,18 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sMeta :: Meta , sQuoteContext :: QuoteContext , sMacros :: M.Map Text Macro - , sContainers :: [String] + , sContainers :: [Text] , sLogMessages :: [LogMessage] - , sIdentifiers :: Set.Set String + , sIdentifiers :: Set.Set Text , sVerbatimMode :: Bool - , sCaption :: (Maybe Inlines, Maybe String) + , sCaption :: (Maybe Inlines, Maybe Text) , sInListItem :: Bool , sInTableCell :: Bool , sLastHeaderNum :: DottedNum , sLastFigureNum :: DottedNum - , sLabels :: M.Map String [Inline] + , sLabels :: M.Map Text [Inline] , sHasChapters :: Bool - , sToggles :: M.Map String Bool + , sToggles :: M.Map Text Bool , sExpanded :: Bool } deriving Show @@ -202,7 +202,7 @@ withVerbatimMode parser = do rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) => [Tok] -> Bool -> LP m a -> LP m a - -> ParserT String s m (a, String) + -> ParserT Text s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } @@ -233,16 +233,16 @@ rawLaTeXParser toks retokenize parser valParser = do , not (" " `T.isSuffixOf` result) -> result <> " " _ -> result - return (val, T.unpack result') + return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => String -> ParserT String s m String + => Text -> ParserT Text s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> - do let retokenize = toksToString <$> many (satisfyTok (const True)) + do let retokenize = untokenize <$> many (satisfyTok (const True)) pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate , sMacros = extractMacros pstate } - res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s)) + res <- runParserT retokenize lstate "math" (tokenize "math" s) case res of Left e -> Prelude.fail (show e) Right s' -> return s' @@ -307,7 +307,7 @@ totoks pos t = : totoks (incSourceColumn pos 2) rest' | c == '#' -> let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest - in case safeRead (T.unpack t1) of + in case safeRead t1 of Just i -> Tok pos (Arg i) ("#" <> t1) : totoks (incSourceColumn pos (1 + T.length t1)) t2 @@ -447,7 +447,7 @@ doMacros' n inp = do handleMacros n' spos name ts = do when (n' > 20) -- detect macro expansion loops - $ throwError $ PandocMacroLoop (T.unpack name) + $ throwError $ PandocMacroLoop name macros <- sMacros <$> getState case M.lookup name macros of Nothing -> mzero @@ -588,7 +588,7 @@ primEscape = do | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) | otherwise -> return (chr (ord c + 64)) Nothing -> Prelude.fail "Empty content of Esc1" - Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Esc2 -> case safeRead ("0x" <> T.drop 2 t) of Just x -> return (chr x) Nothing -> Prelude.fail $ "Could not read: " ++ T.unpack t _ -> Prelude.fail "Expected an Esc1 or Esc2 token" -- should not happen @@ -677,7 +677,7 @@ dimenarg = try $ do guard $ rest `elem` ["", "pt","pc","in","bp","cm","mm","dd","cc","sp"] return $ T.pack ['=' | ch] <> minus <> s -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore :: (Monoid a, PandocMonad m) => Text -> ParserT s u m a ignore raw = do pos <- getPosition report $ SkippedContent raw pos diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index ddf469222..feacb8450 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -1,4 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Man Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane @@ -63,7 +65,7 @@ readWithMTokens :: PandocMonad m -> [RoffToken] -- ^ input -> m (Either PandocError a) readWithMTokens parser state input = - let leftF = PandocParsecError . intercalate "\n" $ show <$> input + let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input in mapLeft leftF `liftM` runParserT parser state "source" input parseMan :: PandocMonad m => ManParser m Pandoc @@ -141,7 +143,7 @@ parseTable = do isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','='] isHrule (_, [RoffTokens ss]) = case Foldable.toList ss of - [TextLine [RoffStr [c]]] -> c `elem` ['_','-','='] + [TextLine [RoffStr (T.unpack -> [c])]] -> c `elem` ['_','-','='] _ -> False isHrule _ = False @@ -191,7 +193,7 @@ memptyLine = msatisfy isEmptyLine where isEmptyLine EmptyLine = True isEmptyLine _ = False -mmacro :: PandocMonad m => String -> ManParser m RoffToken +mmacro :: PandocMonad m => T.Text -> ManParser m RoffToken mmacro mk = msatisfy isControlLine where isControlLine (ControlLine mk' _ _) | mk == mk' = True | otherwise = False @@ -284,7 +286,7 @@ parseInline = try $ do _ -> mzero handleInlineMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> ManParser m Inlines + => T.Text -> [Arg] -> SourcePos -> ManParser m Inlines handleInlineMacro mname args _pos = do case mname of "UR" -> parseLink args @@ -339,7 +341,7 @@ bareIP = msatisfy isBareIP where isBareIP (ControlLine "IP" [] _) = True isBareIP _ = False -endmacro :: PandocMonad m => String -> ManParser m () +endmacro :: PandocMonad m => T.Text -> ManParser m () endmacro name = void (mmacro name) <|> lookAhead (void newBlockMacro) <|> lookAhead eof @@ -356,7 +358,7 @@ parseCodeBlock = try $ do toks <- (mmacro "nf" *> manyTill codeline (endmacro "fi")) <|> (mmacro "EX" *> manyTill codeline (endmacro "EE")) optional (mmacro "in") - return $ codeBlock (intercalate "\n" $ catMaybes toks) + return $ codeBlock (T.intercalate "\n" $ catMaybes toks) where @@ -366,7 +368,7 @@ parseCodeBlock = try $ do ControlLine "PP" _ _ -> return $ Just "" -- .PP sometimes used for blank line ControlLine mname args pos -> do (Just . query getText <$> handleInlineMacro mname args pos) <|> - do report $ SkippedContent ('.':mname) pos + do report $ SkippedContent ("." <> mname) pos return Nothing Tbl _ _ pos -> do report $ SkippedContent "TABLE" pos @@ -375,12 +377,12 @@ parseCodeBlock = try $ do TextLine ss | not (null ss) , all isFontToken ss -> return Nothing - | otherwise -> return $ Just $ linePartsToString ss + | otherwise -> return $ Just $ linePartsToText ss isFontToken Font{} = True isFontToken _ = False - getText :: Inline -> String + getText :: Inline -> T.Text getText (Str s) = s getText Space = " " getText (Code _ s) = s @@ -416,8 +418,8 @@ listItem mbListType = try $ do (ControlLine _ args _) <- mmacro "IP" case args of (arg1 : _) -> do - let cs = linePartsToString arg1 - let cs' = if not ('.' `elem` cs || ')' `elem` cs) then cs ++ "." else cs + let cs = linePartsToText arg1 + let cs' = if not (T.any (== '.') cs || T.any (== ')') cs) then cs <> "." else cs let lt = case Parsec.runParser anyOrderedListMarker defaultParserState "list marker" cs' of Right (start, listtype, listdelim) @@ -467,7 +469,7 @@ parseLink args = do ControlLine _ endargs _ <- mmacro "UE" let url = case args of [] -> "" - (x:_) -> linePartsToString x + (x:_) -> linePartsToText x return $ link url "" contents <> case endargs of [] -> mempty @@ -479,7 +481,7 @@ parseEmailLink args = do ControlLine _ endargs _ <- mmacro "ME" let url = case args of [] -> "" - (x:_) -> "mailto:" ++ linePartsToString x + (x:_) -> "mailto:" <> linePartsToText x return $ link url "" contents <> case endargs of [] -> mempty @@ -490,6 +492,6 @@ skipUnknownMacro = do tok <- mmacroAny case tok of ControlLine mkind _ pos -> do - report $ SkippedContent ('.':mkind) pos + report $ SkippedContent ("." <> mkind) pos return mempty _ -> Prelude.fail "the impossible happened" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4807baada..f8349ea99 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,7 +1,9 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RelaxedPolyRec #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Markdown Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -19,14 +21,15 @@ import Prelude import Control.Monad import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BS -import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) -import Data.List (intercalate, sortBy, transpose, elemIndex) +import Data.Char (isAlphaNum, isPunctuation, isSpace) +import Data.List (sortBy, transpose, elemIndex) import qualified Data.Map as M import Data.Maybe import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.YAML as YAML import qualified Data.YAML.Event as YE import System.FilePath (addExtension, takeExtension) @@ -47,7 +50,7 @@ import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) -type MarkdownParser m = ParserT [Char] ParserState m +type MarkdownParser m = ParserT Text ParserState m -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: PandocMonad m @@ -56,7 +59,7 @@ readMarkdown :: PandocMonad m -> m Pandoc readMarkdown opts s = do parsed <- readWithM parseMarkdown def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -77,7 +80,7 @@ isHruleChar '-' = True isHruleChar '_' = True isHruleChar _ = False -setextHChars :: String +setextHChars :: [Char] setextHChars = "=-" isBlank :: Char -> Bool @@ -96,30 +99,30 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: PandocMonad m => ParserT [Char] st m () +spnl :: PandocMonad m => ParserT Text st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -spnl' :: PandocMonad m => ParserT [Char] st m String +spnl' :: PandocMonad m => ParserT Text st m Text spnl' = try $ do xs <- many spaceChar ys <- option "" $ try $ (:) <$> newline <*> (many spaceChar <* notFollowedBy (char '\n')) - return (xs ++ ys) + return $ T.pack $ xs ++ ys -indentSpaces :: PandocMonad m => MarkdownParser m String +indentSpaces :: PandocMonad m => MarkdownParser m Text indentSpaces = try $ do tabStop <- getOption readerTabStop - count tabStop (char ' ') <|> - string "\t" <?> "indentation" + countChar tabStop (char ' ') <|> + textStr "\t" <?> "indentation" -nonindentSpaces :: PandocMonad m => MarkdownParser m String +nonindentSpaces :: PandocMonad m => MarkdownParser m Text nonindentSpaces = do n <- skipNonindentSpaces - return $ replicate n ' ' + return $ T.replicate n " " -- returns number of spaces parsed skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int @@ -139,8 +142,9 @@ inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) inlinesInBalancedBrackets = try $ char '[' >> withRaw (go 1) >>= parseFromString inlines . stripBracket . snd - where stripBracket [] = [] - stripBracket xs = if last xs == ']' then init xs else xs + where stripBracket t = case T.unsnoc t of + Just (t', ']') -> t' + _ -> t go :: PandocMonad m => Int -> MarkdownParser m () go 0 = return () go openBrackets = @@ -160,7 +164,7 @@ inlinesInBalancedBrackets = -- document structure -- -rawTitleBlockLine :: PandocMonad m => MarkdownParser m String +rawTitleBlockLine :: PandocMonad m => MarkdownParser m Text rawTitleBlockLine = do char '%' skipSpaces @@ -169,7 +173,7 @@ rawTitleBlockLine = do notFollowedBy blankline skipSpaces anyLine - return $ trim $ unlines (first:rest) + return $ trim $ T.unlines (first:rest) titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do @@ -222,9 +226,9 @@ yamlMetaBlock = try $ do notFollowedBy blankline -- if --- is followed by a blank it's an HRULE rawYamlLines <- manyTill anyLine stopLine -- by including --- and ..., we allow yaml blocks with just comments: - let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) + let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml + newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml -- Since `<>` is left-biased, existing values are not touched: updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF } return mempty @@ -255,7 +259,7 @@ yamlBsToMeta bstr = do return . return $ mempty Left (_pos, err') -> do logMessage $ CouldNotParseYamlMetadata - err' pos + (T.pack err') pos return . return $ mempty nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text @@ -270,11 +274,11 @@ toMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with -- `|` or `>` will. - if (T.pack "\n") `T.isSuffixOf` x - then parseFromString' (asBlocks <$> parseBlocks) (xstring <> "\n") + if "\n" `T.isSuffixOf` x + then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n") else parseFromString' ((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks)) - xstring + x where pInlines = trimInlinesF . mconcat <$> manyTill inline eof asBlocks p = do p' <- p @@ -282,7 +286,6 @@ toMetaValue x = asInlines p = do p' <- p return $ MetaInlines (B.toList p') - xstring = T.unpack x checkBoolean :: Text -> Maybe Bool checkBoolean t = @@ -298,8 +301,8 @@ yamlToMetaValue (YAML.Scalar _ x) = case x of YAML.SStr t -> toMetaValue t YAML.SBool b -> return $ return $ MetaBool b - YAML.SFloat d -> return $ return $ MetaString (show d) - YAML.SInt i -> return $ return $ MetaString (show i) + YAML.SFloat d -> return $ return $ MetaString $ tshow d + YAML.SInt i -> return $ return $ MetaString $ tshow i YAML.SUnknown _ t -> case checkBoolean t of Just b -> return $ return $ MetaBool b @@ -315,7 +318,7 @@ yamlToMetaValue _ = return $ return $ MetaString "" yamlMap :: PandocMonad m => M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> MarkdownParser m (F (M.Map String MetaValue)) + -> MarkdownParser m (F (M.Map Text MetaValue)) yamlMap o = do kvs <- forM (M.toList o) $ \(key, v) -> do k <- nodeToKey key @@ -323,12 +326,12 @@ yamlMap o = do let kvs' = filter (not . ignorable . fst) kvs (fmap M.fromList . sequence) <$> mapM toMeta kvs' where - ignorable t = (T.pack "_") `T.isSuffixOf` t + ignorable t = "_" `T.isSuffixOf` t toMeta (k, v) = do fv <- yamlToMetaValue v return $ do v' <- fv - return (T.unpack k, v') + return (k, v') stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -343,14 +346,14 @@ mmdTitleBlock = try $ do updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue) +kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue) kvPair allowEmpty = try $ do - key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - val <- trim <$> manyTill anyChar + key <- many1TillChar (alphaNum <|> oneOf "_- ") (char ':') + val <- trim <$> manyTillChar anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ allowEmpty || not (null val) - let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $B.text val + guard $ allowEmpty || not (T.null val) + let key' = T.concat $ T.words $ T.toLower key + let val' = MetaBlocks $ B.toList $ B.plain $ B.text val return (key',val') parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc @@ -380,13 +383,13 @@ referenceKey = try $ do (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL = fmap unwords $ many $ try $ do + let sourceURL = fmap T.unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes notFollowedBy' (() <$ reference) - many1 $ notFollowedBy space >> litChar - let betweenAngles = try $ char '<' >> manyTill litChar (char '>') + many1Char $ notFollowedBy space >> litChar + let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle attr <- option nullAttr $ try $ @@ -411,20 +414,20 @@ referenceKey = try $ do updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty -referenceTitle :: PandocMonad m => MarkdownParser m String +referenceTitle :: PandocMonad m => MarkdownParser m Text referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar -- A link title in quotes -quotedTitle :: PandocMonad m => Char -> MarkdownParser m String +quotedTitle :: PandocMonad m => Char -> MarkdownParser m Text quotedTitle c = try $ do char c notFollowedBy spaces let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum) - let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar - let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c - unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder + let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> countChar 1 litChar + let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c + T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for @@ -440,21 +443,21 @@ abbrevKey = do blanklines return $ return mempty -noteMarker :: PandocMonad m => MarkdownParser m String -noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') +noteMarker :: PandocMonad m => MarkdownParser m Text +noteMarker = string "[^" >> many1TillChar (satisfy $ not . isBlank) (char ']') -rawLine :: PandocMonad m => MarkdownParser m String +rawLine :: PandocMonad m => MarkdownParser m Text rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: PandocMonad m => MarkdownParser m String +rawLines :: PandocMonad m => MarkdownParser m Text rawLines = do first <- anyLine rest <- many rawLine - return $ unlines (first:rest) + return $ T.unlines (first:rest) noteBlock :: PandocMonad m => MarkdownParser m (F Blocks) noteBlock = try $ do @@ -466,7 +469,7 @@ noteBlock = try $ do optional indentSpaces first <- rawLines rest <- many $ try $ blanklines >> indentSpaces >> rawLines - let raw = unlines (first:rest) ++ "\n" + let raw = T.unlines (first:rest) <> "\n" optional blanklines parsed <- parseFromString' parseBlocks raw oldnotes <- stateNotes' <$> getState @@ -510,7 +513,7 @@ block = do , para , plain ] <?> "block" - trace (take 60 $ show $ B.toList $ runF res defaultParserState) + trace (T.take 60 $ tshow $ B.toList $ runF res defaultParserState) return res -- @@ -570,7 +573,7 @@ mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr mmdHeaderIdentifier = do (_, raw) <- reference let raw' = trim $ stripFirstAndLast raw - let ident = concat $ words $ map toLower raw' + let ident = T.concat $ T.words $ T.toLower raw' let attr = (ident, [], []) guardDisabled Ext_implicit_header_references <|> registerImplicitHeader raw' attr @@ -600,20 +603,20 @@ setextHeader = try $ do <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m () +registerImplicitHeader :: PandocMonad m => Text -> Attr -> MarkdownParser m () registerImplicitHeader raw attr@(ident, _, _) - | null raw = return () + | T.null raw = return () | otherwise = do - let key = toKey $ "[" ++ raw ++ "]" + let key = toKey $ "[" <> raw <> "]" updateState $ \s -> - s { stateHeaderKeys = M.insert key (('#':ident,""), attr) + s { stateHeaderKeys = M.insert key (("#" <> ident,""), attr) (stateHeaderKeys s) } -- -- hrule block -- -hrule :: PandocMonad m => ParserT [Char] st m (F Blocks) +hrule :: PandocMonad m => ParserT Text st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -627,13 +630,13 @@ hrule = try $ do -- code blocks -- -indentedLine :: PandocMonad m => MarkdownParser m String +indentedLine :: PandocMonad m => MarkdownParser m Text indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) -> Maybe Int - -> ParserT [Char] ParserState m Int + -> ParserT Text ParserState m Int blockDelimiter f len = try $ do skipNonindentSpaces c <- lookAhead (satisfy f) @@ -652,11 +655,11 @@ attributes = try $ do attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr -identifier :: PandocMonad m => MarkdownParser m String +identifier :: PandocMonad m => MarkdownParser m Text identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." - return (first:rest) + return $ T.pack (first:rest) identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) identifierAttr = try $ do @@ -674,15 +677,15 @@ keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) keyValAttr = try $ do key <- identifier char '=' - val <- enclosed (char '"') (char '"') litChar - <|> enclosed (char '\'') (char '\'') litChar + val <- T.pack <$> enclosed (char '"') (char '"') litChar + <|> T.pack <$> enclosed (char '\'') (char '\'') litChar <|> ("" <$ try (string "\"\"")) <|> ("" <$ try (string "''")) - <|> many (escapedChar' <|> noneOf " \t\n\r}") + <|> manyChar (escapedChar' <|> noneOf " \t\n\r}") return $ \(id',cs,kvs) -> case key of "id" -> (val,cs,kvs) - "class" -> (id',cs ++ words val,kvs) + "class" -> (id',cs ++ T.words val,kvs) _ -> (id',cs,kvs ++ [(key,val)]) specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) @@ -690,12 +693,12 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute :: PandocMonad m => MarkdownParser m Text rawAttribute = do char '{' skipMany spaceChar char '=' - format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + format <- many1Char $ satisfy (\c -> isAlphaNum c || c `elem` ['-', '_']) skipMany spaceChar char '}' return format @@ -703,7 +706,7 @@ rawAttribute = do codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do indentchars <- nonindentSpaces - let indentLevel = length indentchars + let indentLevel = T.length indentchars c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing @@ -713,9 +716,9 @@ codeBlockFenced = try $ do <|> (Right <$> option ("",[],[]) (try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar))) blankline - contents <- intercalate "\n" <$> + contents <- T.intercalate "\n" <$> manyTill (gobbleAtMostSpaces indentLevel >> anyLine) (try $ do blockDelimiter (== c) (Just size) @@ -726,8 +729,8 @@ codeBlockFenced = try $ do Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers -toLanguageId :: String -> String -toLanguageId = map toLower . go +toLanguageId :: Text -> Text +toLanguageId = T.toLower . go where go "c++" = "cpp" go "objective-c" = "objectivec" go x = x @@ -737,11 +740,11 @@ codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines l <- indentedLine - return $ b ++ l)) + return $ b <> l)) optional blanklines classes <- getOption readerIndentedCodeClasses return $ return $ B.codeBlockWith ("", classes, []) $ - stripTrailingNewlines $ concat contents + stripTrailingNewlines $ T.concat contents lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks) lhsCodeBlock = do @@ -751,33 +754,33 @@ lhsCodeBlock = do <|> (return . B.codeBlockWith ("",["haskell"],[]) <$> lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String +lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m Text lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline - contents <- many1Till anyChar (try $ string "\\end{code}") + contents <- many1TillChar anyChar (try $ string "\\end{code}") blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String +lhsCodeBlockBird :: PandocMonad m => MarkdownParser m Text lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String +lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m Text lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String +lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m Text lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ Prelude.fail "Not in first column" lns <- many1 $ birdTrackLine c -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns + let lns' = if all (\ln -> T.null ln || T.take 1 ln == " ") lns + then map (T.drop 1) lns else lns blanklines - return $ intercalate "\n" lns' + return $ T.intercalate "\n" lns' -birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String +birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -791,12 +794,12 @@ birdTrackLine c = try $ do emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') -emailBlockQuote :: PandocMonad m => MarkdownParser m [String] +emailBlockQuote :: PandocMonad m => MarkdownParser m [Text] emailBlockQuote = try $ do emailBlockQuoteStart - let emailLine = many $ nonEndline <|> try - (endline >> notFollowedBy emailBlockQuoteStart >> - return '\n') + let emailLine = manyChar $ nonEndline <|> try + (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n') let emailSep = try (newline >> emailBlockQuoteStart) first <- emailLine rest <- many $ try $ emailSep >> emailLine @@ -809,7 +812,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ T.intercalate "\n" raw <> "\n\n" return $ B.blockQuote <$> contents -- @@ -833,7 +836,7 @@ orderedListStart mbstydelim = try $ do skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number (do guardDisabled Ext_fancy_lists - start <- many1 digit >>= safeRead + start <- many1Char digit >>= safeRead char '.' gobbleSpaces 1 <|> () <$ lookAhead newline optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) @@ -857,7 +860,7 @@ orderedListStart mbstydelim = try $ do listStart :: PandocMonad m => MarkdownParser m () listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing) -listLine :: PandocMonad m => Int -> MarkdownParser m String +listLine :: PandocMonad m => Int -> MarkdownParser m Text listLine continuationIndent = try $ do notFollowedBy' (do gobbleSpaces continuationIndent skipMany spaceChar @@ -867,19 +870,19 @@ listLine continuationIndent = try $ do optional (() <$ gobbleSpaces continuationIndent) listLineCommon -listLineCommon :: PandocMonad m => MarkdownParser m String -listLineCommon = concat <$> manyTill - ( many1 (satisfy $ \c -> c `notElem` ['\n', '<', '`']) +listLineCommon :: PandocMonad m => MarkdownParser m Text +listLineCommon = T.concat <$> manyTill + ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`']) <|> fmap snd (withRaw code) <|> fmap snd (htmlTag isCommentTag) - <|> count 1 anyChar + <|> countChar 1 anyChar ) newline -- parse raw text for one list item, excluding start marker and continuations rawListItem :: PandocMonad m => Bool -- four space rule -> MarkdownParser m a - -> MarkdownParser m (String, Int) + -> MarkdownParser m (Text, Int) rawListItem fourSpaceRule start = try $ do pos1 <- getPosition start @@ -892,14 +895,14 @@ rawListItem fourSpaceRule start = try $ do notFollowedBy (() <$ codeBlockFenced) notFollowedBy blankline listLine continuationIndent) - blanks <- many blankline - let result = unlines (first:rest) ++ blanks + blanks <- manyChar blankline + let result = T.unlines (first:rest) <> blanks return (result, continuationIndent) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: PandocMonad m => Int -> MarkdownParser m String +listContinuation :: PandocMonad m => Int -> MarkdownParser m Text listContinuation continuationIndent = try $ do x <- try $ do notFollowedBy blankline @@ -913,12 +916,12 @@ listContinuation continuationIndent = try $ do notFollowedByDivCloser gobbleSpaces continuationIndent <|> notFollowedBy' listStart anyLineNewline - blanks <- many blankline - return $ concat (x:xs) ++ blanks + blanks <- manyChar blankline + return $ T.concat (x:xs) <> blanks -- Variant of blanklines that doesn't require blank lines -- before a fence or eof. -blanklines' :: PandocMonad m => MarkdownParser m [Char] +blanklines' :: PandocMonad m => MarkdownParser m Text blanklines' = blanklines <|> try checkDivCloser where checkDivCloser = do guardEnabled Ext_fenced_divs @@ -954,7 +957,7 @@ listItem fourSpaceRule start = try $ do (first, continuationIndent) <- rawListItem fourSpaceRule start continuations <- many (listContinuation continuationIndent) -- parse the extracted block, which may contain various block elements: - let raw = concat (first:continuations) + let raw = T.concat (first:continuations) contents <- parseFromString' parseBlocks raw updateState (\st -> st {stateParserContext = oldContext}) exts <- getOption readerExtensions @@ -990,7 +993,7 @@ defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' tabStop <- getOption readerTabStop - let remaining = tabStop - (length sps + 1) + let remaining = tabStop - (T.length sps + 1) if remaining > 0 then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar else mzero @@ -1001,11 +1004,11 @@ definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact term <- parseFromString' (trimInlinesF <$> inlines) rawLine' - contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw + contents <- mapM (parseFromString' parseBlocks . (<> "\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) -defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String +defRawBlock :: PandocMonad m => Bool -> MarkdownParser m Text defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker @@ -1020,13 +1023,13 @@ defRawBlock compact = try $ do <|> notFollowedBy defListMarker anyLine ) rawlines <- many dline - cont <- fmap concat $ many $ try $ do + cont <- fmap T.concat $ many $ try $ do trailing <- option "" blanklines ln <- indentSpaces >> notFollowedBy blankline >> anyLine lns <- many dline - return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ unlines rawlines ++ cont) ++ - if hasBlank || not (null cont) then "\n\n" else "" + return $ trailing <> T.unlines (ln:lns) + return $ trimr (firstline <> T.unlines rawlines <> cont) <> + if hasBlank || not (T.null cont) then "\n\n" else "" definitionList :: PandocMonad m => MarkdownParser m (F Blocks) definitionList = try $ do @@ -1063,7 +1066,7 @@ para = try $ do | not (null alt) -> -- the fig: at beginning of title indicates a figure return $ B.singleton - $ Image attr alt (src,'f':'i':'g':':':tit) + $ Image attr alt (src, "fig:" <> tit) _ -> return x' | otherwise = x result <- implicitFigures . trimInlinesF <$> inlines1 @@ -1082,7 +1085,7 @@ para = try $ do inHtmlBlock <- stateInHtmlBlock <$> getState case inHtmlBlock of Just "div" -> () <$ - lookAhead (htmlTag (~== TagClose "div")) + lookAhead (htmlTag (~== TagClose ("div" :: Text))) _ -> mzero <|> do guardEnabled Ext_fenced_divs divLevel <- stateFencedDivLevel <$> getState @@ -1098,7 +1101,7 @@ plain = fmap B.plain . trimInlinesF <$> inlines1 -- raw html -- -htmlElement :: PandocMonad m => MarkdownParser m String +htmlElement :: PandocMonad m => MarkdownParser m Text htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> fmap snd (htmlTag isBlockTag) @@ -1132,14 +1135,14 @@ htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ if null first + return $ if T.null first then mempty else return $ B.rawBlock "html" first -strictHtmlBlock :: PandocMonad m => MarkdownParser m String +strictHtmlBlock :: PandocMonad m => MarkdownParser m Text strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: PandocMonad m => MarkdownParser m String +rawVerbatimBlock :: PandocMonad m => MarkdownParser m Text rawVerbatimBlock = htmlInBalanced isVerbTag where isVerbTag (TagOpen "pre" _) = True isVerbTag (TagOpen "style" _) = True @@ -1150,13 +1153,13 @@ rawVerbatimBlock = htmlInBalanced isVerbTag rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "tex" . trim . concat <$> - many1 ((++) <$> rawConTeXtEnvironment <*> spnl')) - <|> (B.rawBlock "tex" . trim . concat <$> - many1 ((++) <$> rawLaTeXBlock <*> spnl')) + result <- (B.rawBlock "tex" . trim . T.concat <$> + many1 ((<>) <$> rawConTeXtEnvironment <*> spnl')) + <|> (B.rawBlock "tex" . trim . T.concat <$> + many1 ((<>) <$> rawLaTeXBlock <*> spnl')) return $ case B.toList result of [RawBlock _ cs] - | all (`elem` [' ','\t','\n']) cs -> return mempty + | T.all (`elem` [' ','\t','\n']) cs -> return mempty -- don't create a raw block for suppressed macro defs _ -> return result @@ -1186,7 +1189,7 @@ rawHtmlBlocks = do return result -- remove markdown="1" attribute -stripMarkdownAttribute :: String -> String +stripMarkdownAttribute :: Text -> Text stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s where filterAttrib (TagOpen t as) = TagOpen t [(k,v) | (k,v) <- as, k /= "markdown"] @@ -1211,7 +1214,7 @@ lineBlock = try $ do -- and the length including trailing space. dashedLine :: PandocMonad m => Char - -> ParserT [Char] st m (Int, Int) + -> ParserT Text st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1232,9 +1235,9 @@ simpleTableHeader headless = try $ do dashes <- many1 (dashedLine '-') newline let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' + let indices = scanl (+) (T.length initSp) lines' -- If no header, calculate alignment on basis of first row of text - rawHeads <- fmap (tail . splitStringByIndices (init indices)) $ + rawHeads <- fmap (tail . splitTextByIndices (init indices)) $ if headless then lookAhead anyLine else return rawContent @@ -1250,15 +1253,15 @@ simpleTableHeader headless = try $ do -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the -- dashed line under the rows. -alignType :: [String] +alignType :: [Text] -> Int -> Alignment alignType [] _ = AlignDefault alignType strLst len = - let nonempties = filter (not . null) $ map trimr strLst + let nonempties = filter (not . T.null) $ map trimr strLst (leftSpace, rightSpace) = - case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) + case sortBy (comparing T.length) nonempties of + (x:_) -> (T.head x `elem` [' ', 't'], T.length x < len) [] -> (False, False) in case (leftSpace, rightSpace) of (True, False) -> AlignRight @@ -1267,7 +1270,7 @@ alignType strLst len = (False, False) -> AlignDefault -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: PandocMonad m => MarkdownParser m String +tableFooter :: PandocMonad m => MarkdownParser m Text tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines' -- Parse a table separator - dashed line. @@ -1277,12 +1280,12 @@ tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: PandocMonad m => [Int] - -> MarkdownParser m [String] + -> MarkdownParser m [Text] rawTableLine indices = do notFollowedBy' (blanklines' <|> tableFooter) - line <- many1Till anyChar newline + line <- many1TillChar anyChar newline return $ map trim $ tail $ - splitStringByIndices (init indices) line + splitTextByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). tableLine :: PandocMonad m @@ -1297,7 +1300,7 @@ multilineRow :: PandocMonad m -> MarkdownParser m (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) - let cols = map unlines $ transpose colLines + let cols = map T.unlines $ transpose colLines fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' @@ -1344,7 +1347,7 @@ multilineTableHeader headless = try $ do dashes <- many1 (dashedLine '-') newline let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' + let indices = scanl (+) (T.length initSp) lines' -- compensate for the fact that intercolumn spaces are -- not included in the last index: let indices' = case reverse indices of @@ -1352,14 +1355,14 @@ multilineTableHeader headless = try $ do (x:xs) -> reverse (x+1:xs) rawHeadsList <- if headless then fmap (map (:[]) . tail . - splitStringByIndices (init indices')) $ lookAhead anyLine + splitTextByIndices (init indices')) $ lookAhead anyLine else return $ transpose $ map - (tail . splitStringByIndices (init indices')) + (tail . splitTextByIndices (init indices')) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" - else map (unlines . map trim) rawHeadsList + else map (T.unlines . map trim) rawHeadsList heads <- fmap sequence $ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads return (heads, aligns, indices') @@ -1393,7 +1396,7 @@ pipeTable = try $ do lines' <- many pipeTableRow let lines'' = map (take (length aligns) <$>) lines' let maxlength = maximum $ - map (\x -> length . stringify $ runF x def) (heads' : lines'') + map (\x -> T.length . stringify $ runF x def) (heads' : lines'') numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> @@ -1430,7 +1433,7 @@ pipeTableCell = return $ B.plain <$> result) <|> return mempty -pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1446,12 +1449,12 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT [Char] st m () +scanForPipe :: PandocMonad m => ParserT Text st m () scanForPipe = do inp <- getInput - case break (\c -> c == '\n' || c == '|') inp of - (_,'|':_) -> return () - _ -> mzero + case T.break (\c -> c == '\n' || c == '|') inp of + (_, T.uncons -> Just ('|', _)) -> return () + _ -> mzero -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in @@ -1561,7 +1564,7 @@ escapedChar = do result <- escapedChar' case result of ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - _ -> return $ return $ B.str [result] + _ -> return $ return $ B.str $ T.singleton result ltSign :: PandocMonad m => MarkdownParser m (F Inlines) ltSign = do @@ -1574,12 +1577,12 @@ exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' - lab <- many1 (alphaNum <|> oneOf "-_") + lab <- many1Char (alphaNum <|> oneOf "-_") return $ do st <- askF return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + Just n -> B.str $ tshow n + Nothing -> B.str $ "@" <> lab symbol :: PandocMonad m => MarkdownParser m (F Inlines) symbol = do @@ -1587,16 +1590,16 @@ symbol = do <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ return $ B.str [result] + return $ return $ B.str $ T.singleton result -- parses inline code, between n `s and n `s code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- (trim . concat) <$> + result <- (trim . T.concat) <$> manyTill (notFollowedBy (inList >> listStart) >> - (many1 (noneOf "`\n") <|> many1 (char '`') <|> + (many1Char (noneOf "`\n") <|> many1Char (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " "))) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) @@ -1627,10 +1630,10 @@ enclosure c = do guardDisabled Ext_intraword_underscores <|> guard (c == '*') <|> (guard =<< notAfterString) - cs <- many1 (char c) + cs <- many1Char (char c) (return (B.str cs) <>) <$> whitespace <|> - case length cs of + case T.length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty @@ -1653,7 +1656,7 @@ three c = do (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents)) <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents)) <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents)) - <|> return (return (B.str [c,c,c]) <> contents) + <|> return (return (B.str $ T.pack [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. @@ -1662,7 +1665,7 @@ two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) (ender c 2 >> updateLastStrPos >> return (B.strong <$> (prefix' <> contents))) - <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + <|> return (return (B.str $ T.pack [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. @@ -1673,7 +1676,7 @@ one c prefix' = do notFollowedBy (ender c 1) >> two c mempty) ) (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents))) - <|> return (return (B.str [c]) <> (prefix' <> contents)) + <|> return (return (B.str $ T.singleton c) <> (prefix' <> contents)) strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) strongOrEmph = enclosure '*' <|> enclosure '_' @@ -1717,16 +1720,16 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: PandocMonad m => ParserT [Char] st m Char +nonEndline :: PandocMonad m => ParserT Text st m Char nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do - result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) + result <- many1Char (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) updateLastStrPos (do guardEnabled Ext_smart abbrevs <- getOption readerAbbreviations - if not (null result) && last result == '.' && result `Set.member` abbrevs + if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs then try (do ils <- whitespace -- ?? lookAhead alphaNum -- replace space after with nonbreaking space @@ -1766,36 +1769,36 @@ endline = try $ do -- -- a reference label for a link -reference :: PandocMonad m => MarkdownParser m (F Inlines, String) +reference :: PandocMonad m => MarkdownParser m (F Inlines, Text) reference = do guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^") guardDisabled Ext_citations <|> notFollowedBy' (string "[@") withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -parenthesizedChars :: PandocMonad m => MarkdownParser m [Char] +parenthesizedChars :: PandocMonad m => MarkdownParser m Text parenthesizedChars = do result <- charsInBalanced '(' ')' litChar - return $ '(' : result ++ ")" + return $ "(" <> result <> ")" -- source for a link, with optional title -source :: PandocMonad m => MarkdownParser m (String, String) +source :: PandocMonad m => MarkdownParser m (Text, Text) source = do char '(' skipSpaces let urlChunk = try parenthesizedChars - <|> (notFollowedBy (oneOf " )") >> count 1 litChar) - <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')")) - let sourceURL = (unwords . words . concat) <$> many urlChunk + <|> (notFollowedBy (oneOf " )") >> countChar 1 litChar) + <|> try (many1Char spaceChar <* notFollowedBy (oneOf "\"')")) + let sourceURL = (T.unwords . T.words . T.concat) <$> many urlChunk let betweenAngles = try $ - char '<' >> manyTill litChar (char '>') + char '<' >> manyTillChar litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" $ try $ spnl >> linkTitle skipSpaces char ')' return (escapeURI $ trimr src, tit) -linkTitle :: PandocMonad m => MarkdownParser m String +linkTitle :: PandocMonad m => MarkdownParser m Text linkTitle = quotedTitle '"' <|> quotedTitle '\'' link :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1823,13 +1826,13 @@ isSmallCaps :: Attr -> Bool isSmallCaps ("",["smallcaps"],[]) = True isSmallCaps ("",[],kvs) = case lookup "style" kvs of - Just s -> map toLower (filter (`notElem` " \t;") s) == + Just s -> T.toLower (T.filter (`notElem` [' ', '\t', ';']) s) == "font-variant:small-caps" Nothing -> False isSmallCaps _ = False regLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) + => (Attr -> Text -> Text -> Inlines -> Inlines) -> F Inlines -> MarkdownParser m (F Inlines) regLink constructor lab = try $ do @@ -1840,8 +1843,8 @@ regLink constructor lab = try $ do -- a link like [this][ref] or [this][] or [this] referenceLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) - -> (F Inlines, String) + => (Attr -> Text -> Text -> Inlines -> Inlines) + -> (F Inlines, Text) -> MarkdownParser m (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1863,7 +1866,7 @@ referenceLink constructor (lab, raw) = do parsedRaw' <- parsedRaw fallback' <- fallback return $ B.str "[" <> fallback' <> B.str "]" <> - (if sp && not (null raw) then B.space else mempty) <> + (if sp && not (T.null raw) then B.space else mempty) <> parsedRaw' return $ do keys <- asksF stateKeys @@ -1878,19 +1881,19 @@ referenceLink constructor (lab, raw) = do else makeFallback Just ((src,tit), attr) -> constructor attr src tit <$> lab -dropBrackets :: String -> String -dropBrackets = reverse . dropRB . reverse . dropLB - where dropRB (']':xs) = xs - dropRB xs = xs - dropLB ('[':xs) = xs - dropLB xs = xs +dropBrackets :: Text -> Text +dropBrackets = dropRB . dropLB + where dropRB (T.unsnoc -> Just (xs,']')) = xs + dropRB xs = xs + dropLB (T.uncons -> Just ('[',xs)) = xs + dropLB xs = xs bareURL :: PandocMonad m => MarkdownParser m (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress) - notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") + notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text)) return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig) autoLink :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1902,19 +1905,20 @@ autoLink = try $ do -- is finished, because the uri parser tries to avoid parsing -- final punctuation. for example: in `<http://hi---there>`, -- the URI parser will stop before the dashes. - extra <- fromEntities <$> manyTill nonspaceChar (char '>') + extra <- fromEntities <$> manyTillChar nonspaceChar (char '>') attr <- option ("", [cls], []) $ try $ guardEnabled Ext_link_attributes >> attributes - return $ return $ B.linkWith attr (src ++ escapeURI extra) "" - (B.str $ orig ++ extra) + return $ return $ B.linkWith attr (src <> escapeURI extra) "" + (B.str $ orig <> extra) image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor attr' src = case takeExtension src of - "" -> B.imageWith attr' (addExtension src defaultExt) + let constructor attr' src = case takeExtension (T.unpack src) of + "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src) + $ T.unpack defaultExt) _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) @@ -1926,7 +1930,7 @@ note = try $ do return $ do notes <- asksF stateNotes' case M.lookup ref notes of - Nothing -> return $ B.str $ "[^" ++ ref ++ "]" + Nothing -> return $ B.str $ "[^" <> ref <> "]" Just (_pos, contents) -> do st <- askF -- process the note in a context that doesn't resolve @@ -1949,29 +1953,29 @@ rawLaTeXInline' = try $ do s <- rawLaTeXInline return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String +rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) - <|> many1 letter - contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar) - (try $ string "\\stop" >> string completion) - return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion + <|> many1Char letter + contents <- manyTill (rawConTeXtEnvironment <|> countChar 1 anyChar) + (try $ string "\\stop" >> textStr completion) + return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion -inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String +inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text inBrackets parser = do char '[' - contents <- many parser + contents <- manyChar parser char ']' - return $ "[" ++ contents ++ "]" + return $ "[" <> contents <> "]" spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) spanHtml = try $ do guardEnabled Ext_native_spans - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) - contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) []) + contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text))) let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs + let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ if isSmallCaps (ident, classes, keyvals) then B.smallcaps <$> contents @@ -1980,20 +1984,20 @@ spanHtml = try $ do divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = try $ do guardEnabled Ext_native_divs - (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) + (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) []) -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just "div" } bls <- option "" (blankline >> option "" blanklines) contents <- mconcat <$> - many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block) - closed <- option False (True <$ htmlTag (~== TagClose "div")) + many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block) + closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text))) if closed then do updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs + let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents else -- avoid backtracing @@ -2005,7 +2009,7 @@ divFenced = try $ do string ":::" skipMany (char ':') skipMany spaceChar - attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar) + attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) skipMany spaceChar skipMany (char ':') blankline @@ -2047,7 +2051,7 @@ emoji :: PandocMonad m => MarkdownParser m (F Inlines) emoji = try $ do guardEnabled Ext_emoji char ':' - emojikey <- many1 (oneOf emojiChars) + emojikey <- many1Char (oneOf emojiChars) char ':' case emojiToInline emojikey of Just i -> return (return $ B.singleton i) @@ -2077,14 +2081,14 @@ textualCite = try $ do mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite case mbrest of Just (rest, raw) -> - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) + return $ (flip B.cite (B.text $ "@" <> key <> " " <> raw) . (first:)) <$> rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - let (spaces',raw') = span isSpace raw - spc | null spaces' = mempty - | otherwise = B.space + let (spaces',raw') = T.span isSpace raw + spc | T.null spaces' = mempty + | otherwise = B.space lab <- parseFromString' inlines $ dropBrackets raw' fallback <- referenceLink B.linkWith (lab,raw') return $ do @@ -2092,12 +2096,12 @@ textualCite = try $ do cs' <- cs return $ case B.toList fallback' of - Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback' - _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw)) + Link{}:_ -> B.cite [first] (B.str $ "@" <> key) <> spc <> fallback' + _ -> B.cite cs' (B.text $ "@" <> key <> " " <> raw)) <|> return (do st <- askF return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key) + Just n -> B.str $ tshow n + _ -> B.cite [first] $ B.str $ "@" <> key) bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation]) bareloc c = try $ do diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 35bb8e3eb..07240e951 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | Module : Text.Pandoc.Readers.MediaWiki @@ -24,11 +25,12 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isSpace) import qualified Data.Foldable as F -import Data.List (intercalate, intersperse, isPrefixOf) +import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) import Data.Sequence (ViewL (..), viewl, (<|)) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B @@ -39,7 +41,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, - trim) + trim, splitTextBy, tshow) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) @@ -57,7 +59,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (unpack (crFilter s) ++ "\n") + (crFilter s <> "\n") case parsed of Right result -> return result Left e -> throwError e @@ -66,12 +68,12 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] - , mwIdentifierList :: Set.Set String + , mwIdentifierList :: Set.Set Text , mwLogMessages :: [LogMessage] , mwInTT :: Bool } -type MWParser m = ParserT [Char] MWState m +type MWParser m = ParserT Text MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions @@ -105,58 +107,58 @@ specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" -sym :: PandocMonad m => String -> MWParser m () -sym s = () <$ try (string s) +sym :: PandocMonad m => Text -> MWParser m () +sym s = () <$ try (string $ T.unpack s) -newBlockTags :: [String] +newBlockTags :: [Text] newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] -isBlockTag' :: Tag String -> Bool +isBlockTag' :: Tag Text -> Bool isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag = isBlockTag tag -isInlineTag' :: Tag String -> Bool +isInlineTag' :: Tag Text -> Bool isInlineTag' (TagComment _) = True isInlineTag' t = not (isBlockTag' t) -eitherBlockOrInline :: [String] +eitherBlockOrInline :: [Text] eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", "map", "area", "object"] htmlComment :: PandocMonad m => MWParser m () htmlComment = () <$ htmlTag isCommentTag -inlinesInTags :: PandocMonad m => String -> MWParser m Inlines +inlinesInTags :: PandocMonad m => Text -> MWParser m Inlines inlinesInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else trimInlines . mconcat <$> manyTill inline (htmlTag (~== TagClose tag)) -blocksInTags :: PandocMonad m => String -> MWParser m Blocks +blocksInTags :: PandocMonad m => Text -> MWParser m Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) let closer = if tag == "li" - then htmlTag (~== TagClose "li") + then htmlTag (~== TagClose ("li" :: Text)) <|> lookAhead ( - htmlTag (~== TagOpen "li" []) - <|> htmlTag (~== TagClose "ol") - <|> htmlTag (~== TagClose "ul")) + htmlTag (~== TagOpen ("li" :: Text) []) + <|> htmlTag (~== TagClose ("ol" :: Text)) + <|> htmlTag (~== TagClose ("ul" :: Text))) else htmlTag (~== TagClose tag) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else mconcat <$> manyTill block closer -charsInTags :: PandocMonad m => String -> MWParser m [Char] -charsInTags tag = try $ do +textInTags :: PandocMonad m => Text -> MWParser m Text +textInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return "" - else manyTill anyChar (htmlTag (~== TagClose tag)) + else T.pack <$> manyTill anyChar (htmlTag (~== TagClose tag)) -- -- main parser @@ -192,7 +194,7 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res para :: PandocMonad m => MWParser m Blocks @@ -234,16 +236,16 @@ table = do else (replicate cols mempty, hdr:rows') return $ B.table caption cellspecs headers rows -parseAttrs :: PandocMonad m => MWParser m [(String,String)] +parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] parseAttrs = many1 parseAttr -parseAttr :: PandocMonad m => MWParser m (String, String) +parseAttr :: PandocMonad m => MWParser m (Text, Text) parseAttr = try $ do skipMany spaceChar - k <- many1 letter + k <- many1Char letter char '=' - v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) - <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') + v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) + <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) tableStart :: PandocMonad m => MWParser m () @@ -293,8 +295,8 @@ tableCell = try $ do notFollowedBy (char '|') skipMany spaceChar pos' <- getPosition - ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> - ((snd <$> withRaw table) <|> count 1 anyChar)) + ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> + ((snd <$> withRaw table) <|> countChar 1 anyChar)) bs <- parseFromString (do setPosition pos' mconcat <$> many block) ls let align = case lookup "align" attrs of @@ -307,48 +309,49 @@ tableCell = try $ do Nothing -> 0.0 return ((align, width), bs) -parseWidth :: String -> Maybe Double +parseWidth :: Text -> Maybe Double parseWidth s = - case reverse s of - ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) - _ -> Nothing + case T.unsnoc s of + Just (ds, '%') | T.all isDigit ds -> safeRead $ "0." <> ds + _ -> Nothing -template :: PandocMonad m => MWParser m String +template :: PandocMonad m => MWParser m Text template = try $ do string "{{" notFollowedBy (char '{') lookAhead $ letter <|> digit <|> char ':' - let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar + let chunk = template <|> variable <|> many1Char (noneOf "{}") <|> countChar 1 anyChar contents <- manyTill chunk (try $ string "}}") - return $ "{{" ++ concat contents ++ "}}" + return $ "{{" <> T.concat contents <> "}}" blockTag :: PandocMonad m => MWParser m Blocks blockTag = do (tag, _) <- lookAhead $ htmlTag isBlockTag' case tag of TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote" - TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre" + TagOpen "pre" _ -> B.codeBlock . trimCode <$> textInTags "pre" TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs TagOpen "source" attrs -> syntaxhighlight "source" attrs TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$> - charsInTags "haskell" + textInTags "haskell" TagOpen "gallery" _ -> blocksInTags "gallery" TagOpen "p" _ -> mempty <$ htmlTag (~== tag) TagClose "p" -> mempty <$ htmlTag (~== tag) _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag) -trimCode :: String -> String -trimCode ('\n':xs) = stripTrailingNewlines xs -trimCode xs = stripTrailingNewlines xs +trimCode :: Text -> Text +trimCode t = case T.uncons t of + Just ('\n', xs) -> stripTrailingNewlines xs + _ -> stripTrailingNewlines t -syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks +syntaxhighlight :: PandocMonad m => Text -> [Attribute Text] -> MWParser m Blocks syntaxhighlight tag attrs = try $ do let mblang = lookup "lang" attrs let mbstart = lookup "start" attrs let mbline = lookup "line" attrs let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart - contents <- charsInTags tag + contents <- textInTags tag return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents hrule :: PandocMonad m => MWParser m Blocks @@ -362,17 +365,17 @@ preformatted = try $ do guardColumnOne char ' ' let endline' = B.linebreak <$ try (newline <* char ' ') - let whitespace' = B.str <$> many1 ('\160' <$ spaceChar) + let whitespace' = B.str <$> many1Char ('\160' <$ spaceChar) let spToNbsp ' ' = '\160' spToNbsp x = x let nowiki' = mconcat . intersperse B.linebreak . map B.str . - lines . fromEntities . map spToNbsp <$> try - (htmlTag (~== TagOpen "nowiki" []) *> - manyTill anyChar (htmlTag (~== TagClose "nowiki"))) + T.lines . fromEntities . T.map spToNbsp <$> try + (htmlTag (~== TagOpen ("nowiki" :: Text) []) *> + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text)))) let inline' = whitespace' <|> endline' <|> nowiki' <|> try (notFollowedBy newline *> inline) contents <- mconcat <$> many1 inline' - let spacesStr (Str xs) = all isSpace xs + let spacesStr (Str xs) = T.all isSpace xs spacesStr _ = False if F.all spacesStr contents then return mempty @@ -385,7 +388,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode strToCode x = x normalizeCode [] = [] normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = - normalizeCode $ Code a1 (x ++ y) : zs + normalizeCode $ Code a1 (x <> y) : zs normalizeCode (x:xs) = x : normalizeCode xs header :: PandocMonad m => MWParser m Blocks @@ -400,22 +403,22 @@ header = try $ do -- See #4731: modifyIdentifier :: Attr -> Attr modifyIdentifier (ident,cl,kv) = (ident',cl,kv) - where ident' = map (\c -> if c == '-' then '_' else c) ident + where ident' = T.map (\c -> if c == '-' then '_' else c) ident bulletList :: PandocMonad m => MWParser m Blocks bulletList = B.bulletList <$> ( many1 (listItem '*') - <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* - optional (htmlTag (~== TagClose "ul"))) ) + <|> (htmlTag (~== TagOpen ("ul" :: Text) []) *> spaces *> many (listItem '*' <|> li) <* + optional (htmlTag (~== TagClose ("ul" :: Text)))) ) orderedList :: PandocMonad m => MWParser m Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) <|> try - (do (tag,_) <- htmlTag (~== TagOpen "ol" []) + (do (tag,_) <- htmlTag (~== TagOpen ("ol" :: Text) []) spaces items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) + optional (htmlTag (~== TagClose ("ol" :: Text))) let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) @@ -452,7 +455,7 @@ anyListStart :: PandocMonad m => MWParser m Char anyListStart = guardColumnOne >> oneOf "*#:;" li :: PandocMonad m => MWParser m Blocks -li = lookAhead (htmlTag (~== TagOpen "li" [])) *> +li = lookAhead (htmlTag (~== TagOpen ("li" :: Text) [])) *> (firstParaToPlain <$> blocksInTags "li") <* spaces listItem :: PandocMonad m => Char -> MWParser m Blocks @@ -464,13 +467,13 @@ listItem c = try $ do else do skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ string extras *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) contents <- parseFromString (do setPosition pos' many1 $ listItem' c) - (unlines (first : rest)) + (T.unlines (first : rest)) case c of '*' -> return $ B.bulletList contents '#' -> return $ B.orderedList contents @@ -484,20 +487,20 @@ listItem c = try $ do -- }} -- * next list item -- which seems to be valid mediawiki. -listChunk :: PandocMonad m => MWParser m String -listChunk = template <|> count 1 anyChar +listChunk :: PandocMonad m => MWParser m Text +listChunk = template <|> countChar 1 anyChar listItem' :: PandocMonad m => Char -> MWParser m Blocks listItem' c = try $ do listStart c skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ char c *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) parseFromString (do setPosition pos' firstParaToPlain . mconcat <$> many1 block) - $ unlines $ first : rest + $ T.unlines $ first : rest firstParaToPlain :: Blocks -> Blocks firstParaToPlain contents = @@ -528,23 +531,23 @@ inline = whitespace <|> special str :: PandocMonad m => MWParser m Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) +str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) math :: PandocMonad m => MWParser m Inlines -math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) - <|> (B.math . trim <$> charsInTags "math") - <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) - <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) +math = (B.displayMath . trim <$> try (many1 (char ':') >> textInTags "math")) + <|> (B.math . trim <$> textInTags "math") + <|> (B.displayMath . trim <$> try (dmStart *> manyTillChar anyChar dmEnd)) + <|> (B.math . trim <$> try (mStart *> manyTillChar (satisfy (/='\n')) mEnd)) where dmStart = string "\\[" dmEnd = try (string "\\]") mStart = string "\\(" mEnd = try (string "\\)") -variable :: PandocMonad m => MWParser m String +variable :: PandocMonad m => MWParser m Text variable = try $ do string "{{{" - contents <- manyTill anyChar (try $ string "}}}") - return $ "{{{" ++ contents ++ "}}}" + contents <- manyTillChar anyChar (try $ string "}}}") + return $ "{{{" <> contents <> "}}}" inlineTag :: PandocMonad m => MWParser m Inlines inlineTag = do @@ -553,11 +556,11 @@ inlineTag = do TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" TagOpen "nowiki" _ -> try $ do (_,raw) <- htmlTag (~== tag) - if '/' `elem` raw + if T.any (== '/') raw then return mempty else B.text . fromEntities <$> - manyTill anyChar (htmlTag (~== TagClose "nowiki")) - TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text))) + TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen ("br" :: Text) []) -- will get /> too *> optional blankline) TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike" TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" @@ -570,12 +573,12 @@ inlineTag = do result <- encode <$> inlinesInTags "tt" updateState $ \st -> st{ mwInTT = inTT } return result - TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" + TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> textInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) special :: PandocMonad m => MWParser m Inlines -special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> - oneOf specialChars) +special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *> + oneOf specialChars) inlineHtml :: PandocMonad m => MWParser m Inlines inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' @@ -594,7 +597,7 @@ endline = () <$ try (newline <* notFollowedBy anyListStart) imageIdentifiers :: PandocMonad m => [MWParser m ()] -imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] +imageIdentifiers = [sym (identifier <> ":") | identifier <- identifiers] where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", "Bild"] @@ -602,9 +605,9 @@ image :: PandocMonad m => MWParser m Inlines image = try $ do sym "[[" choice imageIdentifiers - fname <- addUnderscores <$> many1 (noneOf "|]") + fname <- addUnderscores <$> many1Char (noneOf "|]") _ <- many imageOption - dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px") + dims <- try (char '|' *> sepBy (manyChar digit) (char 'x') <* string "px") <|> return [] _ <- many imageOption let kvs = case dims of @@ -614,9 +617,9 @@ image = try $ do let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption + return $ B.imageWith attr fname ("fig:" <> stringify caption) caption -imageOption :: PandocMonad m => MWParser m String +imageOption :: PandocMonad m => MWParser m Text imageOption = try $ char '|' *> opt where opt = try (oneOfStrings [ "border", "thumbnail", "frameless" @@ -624,30 +627,27 @@ imageOption = try $ char '|' *> opt , "center", "none", "baseline", "sub" , "super", "top", "text-top", "middle" , "bottom", "text-bottom" ]) - <|> try (string "frame") + <|> try (textStr "frame") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) -collapseUnderscores :: String -> String -collapseUnderscores [] = [] -collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) -collapseUnderscores (x:xs) = x : collapseUnderscores xs - -addUnderscores :: String -> String -addUnderscores = collapseUnderscores . intercalate "_" . words +addUnderscores :: Text -> Text +addUnderscores = T.intercalate "_" . splitTextBy sep + where + sep c = isSpace c || c == '_' internalLink :: PandocMonad m => MWParser m Inlines internalLink = try $ do sym "[[" - pagename <- unwords . words <$> many (noneOf "|]") + pagename <- T.unwords . T.words <$> manyChar (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) -- the "pipe trick" -- [[Help:Contents|] -> "Contents" - <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) ) + <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) ) sym "]]" - linktrail <- B.text <$> many letter + linktrail <- B.text <$> manyChar letter let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) - if "Category:" `isPrefixOf` pagename + if "Category:" `T.isPrefixOf` pagename then do updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st } return mempty @@ -662,7 +662,7 @@ externalLink = try $ do <|> do char ']' num <- mwNextLinkNumber <$> getState updateState $ \st -> st{ mwNextLinkNumber = num + 1 } - return $ B.str $ show num + return $ B.str $ tshow num return $ B.link src "" lab url :: PandocMonad m => MWParser m Inlines diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index b8cbe2f26..4ade61294 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Muse Copyright : Copyright (C) 2017-2019 Alexander Krotov @@ -24,12 +25,12 @@ import Control.Monad.Reader import Control.Monad.Except (throwError) import Data.Bifunctor import Data.Default -import Data.List (intercalate, transpose, uncons) -import Data.List.Split (splitOn) +import Data.List (transpose, uncons) import qualified Data.Map as M import qualified Data.Set as Set import Data.Maybe (fromMaybe, isNothing, maybeToList) -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) @@ -38,7 +39,7 @@ import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (F) -import Text.Pandoc.Shared (crFilter, trimr, underlineSpan) +import Text.Pandoc.Shared (crFilter, trimr, underlineSpan, tshow) -- | Read Muse from an input string and return a Pandoc document. readMuse :: PandocMonad m @@ -49,18 +50,18 @@ readMuse opts s = do let input = crFilter s res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input case res of - Left e -> throwError $ PandocParsecError (unpack input) e + Left e -> throwError $ PandocParsecError input e Right d -> return d type F = Future MuseState data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museOptions :: ReaderOptions - , museIdentifierList :: Set.Set String + , museIdentifierList :: Set.Set Text , museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] - , museNotes :: M.Map String (SourcePos, F Blocks) + , museNotes :: M.Map Text (SourcePos, F Blocks) } instance Default MuseState where @@ -116,22 +117,27 @@ parseMuse = do -- * Utility functions -- | Trim up to one newline from the beginning of the string. -lchop :: String -> String -lchop ('\n':xs) = xs -lchop s = s +lchop :: Text -> Text +lchop s = case T.uncons s of + Just ('\n', xs) -> xs + _ -> s -- | Trim up to one newline from the end of the string. -rchop :: String -> String -rchop = reverse . lchop . reverse +rchop :: Text -> Text +rchop s = case T.unsnoc s of + Just (xs, '\n') -> xs + _ -> s -unindent :: String -> String -unindent = rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop +unindent :: Text -> Text +unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop -dropSpacePrefix :: [String] -> [String] -dropSpacePrefix lns = drop maxIndent <$> lns +dropSpacePrefix :: [Text] -> [Text] +dropSpacePrefix lns = T.drop maxIndent <$> lns where isSpaceChar c = c == ' ' || c == '\t' - maxIndent = length $ takeWhile (isSpaceChar . head) $ takeWhile same $ transpose lns - same = and . (zipWith (==) <*> drop 1) + maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns + same t = case T.uncons t of + Just (c, cs) -> T.all (== c) cs + Nothing -> True atStart :: PandocMonad m => MuseParser m () atStart = do @@ -160,29 +166,29 @@ getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition -- ** HTML parsers -openTag :: PandocMonad m => String -> MuseParser m [(String, String)] +openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)] openTag tag = try $ - char '<' *> string tag *> manyTill attr (char '>') + char '<' *> textStr tag *> manyTill attr (char '>') where attr = try $ (,) <$ many1 spaceChar - <*> many1 (noneOf "=\n") + <*> many1Char (noneOf "=\n") <* string "=\"" - <*> manyTill (noneOf "\"") (char '"') + <*> manyTillChar (noneOf "\"") (char '"') -closeTag :: PandocMonad m => String -> MuseParser m () -closeTag tag = try $ string "</" *> string tag *> void (char '>') +closeTag :: PandocMonad m => Text -> MuseParser m () +closeTag tag = try $ string "</" *> textStr tag *> void (char '>') -- | Convert HTML attributes to Pandoc 'Attr' -htmlAttrToPandoc :: [(String, String)] -> Attr +htmlAttrToPandoc :: [(Text, Text)] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs + classes = maybe [] T.words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"] parseHtmlContent :: PandocMonad m - => String -- ^ Tag name + => Text -- ^ Tag name -> MuseParser m (Attr, F Blocks) parseHtmlContent tag = try $ getIndent >>= \indent -> (,) <$> fmap htmlAttrToPandoc (openTag tag) @@ -193,16 +199,16 @@ parseHtmlContent tag = try $ getIndent >>= \indent -> (,) -- ** Directive parsers -- While not documented, Emacs Muse allows "-" in directive name -parseDirectiveKey :: PandocMonad m => MuseParser m String -parseDirectiveKey = char '#' *> many (letter <|> char '-') +parseDirectiveKey :: PandocMonad m => MuseParser m Text +parseDirectiveKey = char '#' *> manyChar (letter <|> char '-') -parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines) parseEmacsDirective = (,) <$> parseDirectiveKey <* spaceChar <*> (trimInlinesF . mconcat <$> manyTill inline' eol) -parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines) parseAmuseDirective = (,) <$> parseDirectiveKey <* many1 spaceChar @@ -289,7 +295,7 @@ listItemContentsUntil col pre end = p parseBlock :: PandocMonad m => MuseParser m (F Blocks) parseBlock = do res <- blockElements <|> para - trace (take 60 $ show $ B.toList $ runF res def) + trace (T.take 60 $ tshow $ B.toList $ runF res def) return res where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) @@ -337,7 +343,7 @@ pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always <* string "* * * * *" <* manyTill spaceChar eol -headingStart :: PandocMonad m => MuseParser m (String, Int) +headingStart :: PandocMonad m => MuseParser m (Text, Int) headingStart = try $ (,) <$> option "" (try (parseAnchor <* manyTill spaceChar eol)) <* firstColumn @@ -371,14 +377,14 @@ example :: PandocMonad m => MuseParser m (F Blocks) example = try $ pure . B.codeBlock <$ string "{{{" <* many spaceChar - <*> (unindent <$> manyTill anyChar (string "}}}")) + <*> (unindent <$> manyTillChar anyChar (string "}}}")) -- | Parse an @\<example>@ tag. exampleTag :: PandocMonad m => MuseParser m (F Blocks) exampleTag = try $ fmap pure $ B.codeBlockWith <$ many spaceChar <*> (htmlAttrToPandoc <$> openTag "example") - <*> (unindent <$> manyTill anyChar (closeTag "example")) + <*> (unindent <$> manyTillChar anyChar (closeTag "example")) <* manyTill spaceChar eol -- | Parse a @\<literal>@ tag as a raw block. @@ -388,7 +394,7 @@ literalTag = try $ fmap pure $ B.rawBlock <$ many spaceChar <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML <* manyTill spaceChar eol - <*> (unindent <$> manyTill anyChar (closeTag "literal")) + <*> (unindent <$> manyTillChar anyChar (closeTag "literal")) <* manyTill spaceChar eol -- | Parse @\<center>@ tag. @@ -428,7 +434,7 @@ playTag = do verseLine :: PandocMonad m => MuseParser m (F Inlines) verseLine = (<>) - <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' '))) + <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' '))) <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol) -- | Parse @\<verse>@ tag. @@ -466,17 +472,17 @@ paraUntil end = do noteMarker' :: PandocMonad m => Char -> Char - -> MuseParser m String -noteMarker' l r = try $ (\x y -> l:x:y ++ [r]) + -> MuseParser m Text +noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r]) <$ char l <*> oneOf "123456789" <*> manyTill digit (char r) -noteMarker :: PandocMonad m => MuseParser m String +noteMarker :: PandocMonad m => MuseParser m Text noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}' addNote :: PandocMonad m - => String + => Text -> SourcePos -> F Blocks -> MuseParser m () @@ -674,15 +680,15 @@ museGridTableRow :: PandocMonad m -> MuseParser m (F [Blocks]) museGridTableRow indent indices = try $ do lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices) - let cols = map (unlines . map trimr) $ transpose lns + let cols = map (T.unlines . map trimr) $ transpose lns indentWith indent *> museGridTableHeader sequence <$> mapM (parseFromString' parseBlocks) cols museGridTableRawLine :: PandocMonad m => [Int] - -> MuseParser m [String] + -> MuseParser m [Text] museGridTableRawLine indices = - char '|' *> forM indices (\n -> count n anyChar <* char '|') <* manyTill spaceChar eol + char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol museGridTable :: PandocMonad m => MuseParser m (F Blocks) museGridTable = try $ do @@ -767,12 +773,12 @@ inline = endline <|> inline' endline :: PandocMonad m => MuseParser m (F Inlines) endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos -parseAnchor :: PandocMonad m => MuseParser m String -parseAnchor = try $ (:) +parseAnchor :: PandocMonad m => MuseParser m Text +parseAnchor = try $ T.cons <$ firstColumn <* char '#' <*> letter - <*> many (letter <|> digit <|> char '-') + <*> manyChar (letter <|> digit <|> char '-') anchor :: PandocMonad m => MuseParser m (F Inlines) anchor = try $ do @@ -813,7 +819,7 @@ emphasisBetween p = try $ trimInlinesF . mconcat -- | Parse an inline tag, such as @\<em>@ and @\<strong>@. inlineTag :: PandocMonad m - => String -- ^ Tag name + => Text -- ^ Tag name -> MuseParser m (F Inlines) inlineTag tag = try $ mconcat <$ openTag tag @@ -862,12 +868,12 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del" verbatimTag :: PandocMonad m => MuseParser m (F Inlines) verbatimTag = return . B.text <$ openTag "verbatim" - <*> manyTill anyChar (closeTag "verbatim") + <*> manyTillChar anyChar (closeTag "verbatim") -- | Parse @\<class>@ tag. classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do - classes <- maybe [] words . lookup "name" <$> openTag "class" + classes <- maybe [] T.words . lookup "name" <$> openTag "class" fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class") -- | Parse @\<\<\<RTL>>>@ text. @@ -886,43 +892,43 @@ nbsp = try $ pure (B.str "\160") <$ string "~~" -- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) -code = try $ fmap pure $ B.code . uncurry (++) +code = try $ fmap pure $ B.code . uncurry (<>) <$ atStart <* char '=' <* notFollowedBy (spaceChar <|> newline) - <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=') + <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=') <* notFollowedBy alphaNum -- | Parse @\<code>@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) codeTag = fmap pure $ B.codeWith <$> (htmlAttrToPandoc <$> openTag "code") - <*> manyTill anyChar (closeTag "code") + <*> manyTillChar anyChar (closeTag "code") -- | Parse @\<math>@ tag. -- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@ mathTag :: PandocMonad m => MuseParser m (F Inlines) mathTag = return . B.math <$ openTag "math" - <*> manyTill anyChar (closeTag "math") + <*> manyTillChar anyChar (closeTag "math") -- | Parse inline @\<literal>@ tag as a raw inline. inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) inlineLiteralTag = try $ fmap pure $ B.rawInline <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML - <*> manyTill anyChar (closeTag "literal") + <*> manyTillChar anyChar (closeTag "literal") str :: PandocMonad m => MuseParser m (F Inlines) -str = return . B.str <$> many1 alphaNum <* updateLastStrPos +str = return . B.str <$> many1Char alphaNum <* updateLastStrPos -- | Consume asterisks that were not used as emphasis opening. -- This prevents series of asterisks from being split into -- literal asterisk and emphasis opening. asterisks :: PandocMonad m => MuseParser m (F Inlines) -asterisks = pure . B.str <$> many1 (char '*') +asterisks = pure . B.str <$> many1Char (char '*') symbol :: PandocMonad m => MuseParser m (F Inlines) -symbol = pure . B.str . pure <$> nonspaceChar +symbol = pure . B.str . T.singleton <$> nonspaceChar -- | Parse a link or image. linkOrImage :: PandocMonad m => MuseParser m (F Inlines) @@ -934,12 +940,12 @@ linkContent = trimInlinesF . mconcat <*> manyTill inline (char ']') -- | Parse a link starting with (possibly null) prefix -link :: PandocMonad m => String -> MuseParser m (F Inlines) +link :: PandocMonad m => Text -> MuseParser m (F Inlines) link prefix = try $ do inLink <- asks museInLink guard $ not inLink - string $ "[[" ++ prefix - url <- manyTill anyChar $ char ']' + textStr $ "[[" <> prefix + url <- manyTillChar anyChar $ char ']' content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent) char ']' return $ B.link url "" <$> content @@ -947,27 +953,27 @@ link prefix = try $ do image :: PandocMonad m => MuseParser m (F Inlines) image = try $ do string "[[" - (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']') + (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']') content <- option mempty linkContent char ']' let widthAttr = case align of - Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")] - _ -> maybeToList (("width",) . (++ "%") <$> width) + Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")] + _ -> maybeToList (("width",) . (<> "%") <$> width) let alignClass = case align of Just 'r' -> ["align-right"] Just 'l' -> ["align-left"] Just 'f' -> [] _ -> [] - return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content + return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] - imageExtension = choice (try . string <$> imageExtensions) + imageExtension = choice (try . textStr <$> imageExtensions) imageExtensionAndOptions = do ext <- imageExtension (width, align) <- option (Nothing, Nothing) imageAttrs return (ext, width, align) imageAttrs = (,) <$ many1 spaceChar - <*> optionMaybe (many1 digit) + <*> optionMaybe (many1Char digit) <* many spaceChar <*> optionMaybe (oneOf "rlf") diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 9e3c118d8..34d3c5e8f 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Native Copyright : Copyright (C) 2011-2019 John MacFarlane @@ -19,7 +20,7 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Shared (safeRead) import Control.Monad.Except (throwError) -import Data.Text (Text, unpack) +import Data.Text (Text) import Text.Pandoc.Class import Text.Pandoc.Error @@ -38,18 +39,18 @@ readNative :: PandocMonad m -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of + case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of Right doc -> return doc Left _ -> throwError $ PandocParseError "couldn't read native" readBlocks :: Text -> Either PandocError [Block] -readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s)) +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) readBlock :: Text -> Either PandocError Block -readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s)) +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) readInlines :: Text -> Either PandocError [Inline] -readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s)) +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) readInline :: Text -> Either PandocError Inline -readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s)) +readInline s = maybe (Left . PandocParseError $ "Could not read: " <> s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 2c3b0367f..5330b0238 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.OPML Copyright : Copyright (C) 2013-2019 John MacFarlane @@ -18,7 +19,8 @@ import Data.Char (toUpper) import Data.Default import Data.Generics import Data.Maybe (fromMaybe) -import Data.Text (Text, pack, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad) @@ -50,7 +52,7 @@ readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML opts inp = do (bs, st') <- runStateT (mapM parseBlock $ normalizeTree $ - parseXML (unpack (crFilter inp))) def{ opmlOptions = opts } + parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ @@ -76,23 +78,26 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr elt = - fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + +textContent :: Element -> Text +textContent = T.pack . strContent -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a -- exceptT = either throwError return -asHtml :: PandocMonad m => String -> OPML m Inlines +asHtml :: PandocMonad m => Text -> OPML m Inlines asHtml s = do opts <- gets opmlOptions - Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } (pack s) + Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } s return $ blocksToInlines' bs -asMarkdown :: PandocMonad m => String -> OPML m Blocks +asMarkdown :: PandocMonad m => Text -> OPML m Blocks asMarkdown s = do opts <- gets opmlOptions - Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } (pack s) + Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } s return $ fromList bs getBlocks :: PandocMonad m => Element -> OPML m Blocks @@ -102,11 +107,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> - st{opmlDocAuthors = [text $ strContent e]}) + st{opmlDocAuthors = [text $ textContent e]}) "dateModified" -> mempty <$ modify (\st -> - st{opmlDocDate = text $ strContent e}) + st{opmlDocDate = text $ textContent e}) "title" -> mempty <$ modify (\st -> - st{opmlDocTitle = text $ strContent e}) + st{opmlDocTitle = text $ textContent e}) "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e @@ -115,7 +120,7 @@ parseBlock (Elem e) = modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } - let headerText' = case map toUpper (attrValue "type" e) of + let headerText' = case T.toUpper (attrValue "type" e) of "LINK" -> link (attrValue "url" e) "" headerText _ -> headerText diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index dfa019932..f9b78d5bf 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt Copyright : Copyright (C) 2015 Martin Linnemann diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index d8e5ba272..ff8cdc5fa 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -1,11 +1,12 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE Arrows #-} -{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE Arrows #-} +{-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Odt.ContentReader Copyright : Copyright (C) 2015 Martin Linnemann @@ -29,8 +30,9 @@ import Control.Arrow import qualified Data.ByteString.Lazy as B import Data.Foldable (fold) -import Data.List (find, intercalate, stripPrefix) +import Data.List (find, stripPrefix) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Semigroup (First(..), Option(..)) @@ -59,7 +61,7 @@ import qualified Data.Set as Set -- State -------------------------------------------------------------------------------- -type Anchor = String +type Anchor = T.Text type Media = [(FilePath, B.ByteString)] data ReaderState @@ -204,21 +206,21 @@ updateMediaWithResource = keepingTheValue ( ) >>^ fst -lookupResource :: OdtReaderSafe String (FilePath, B.ByteString) +lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString) lookupResource = proc target -> do state <- getExtraState -< () case lookup target (getMediaEnv state) of Just bs -> returnV (target, bs) -<< () Nothing -> returnV ("", B.empty) -< () -type AnchorPrefix = String +type AnchorPrefix = T.Text -- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a -- unique identifier but without assuming that the id should be for a header. -- Second argument is a list of already used identifiers. uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor uniqueIdentFrom baseIdent usedIdents = - let numIdent n = baseIdent ++ "-" ++ show n + let numIdent n = baseIdent <> "-" <> T.pack (show n) in if baseIdent `elem` usedIdents then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of Just x -> numIdent x @@ -305,7 +307,7 @@ withNewStyle a = proc x -> do isCodeStyle _ = False inlineCode :: Inlines -> Inlines - inlineCode = code . intercalate "" . map stringify . toList + inlineCode = code . T.concat . map stringify . toList type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) type InlineModifier = Inlines -> Inlines @@ -535,7 +537,6 @@ matchChildContent :: (Monoid result) -> OdtReaderSafe _x result matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback - -------------------------------------------- -- Matchers -------------------------------------------- @@ -556,8 +557,8 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover ) >>?% mappend -- - extractText :: XML.Content -> Fallible String - extractText (XML.Text cData) = succeedWith (XML.cdData cData) + extractText :: XML.Content -> Fallible T.Text + extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData) extractText _ = failEmpty read_text_seq :: InlineMatcher @@ -675,8 +676,8 @@ read_list_item = matchingElement NsText "list-item" read_link :: InlineMatcher read_link = matchingElement NsText "a" $ liftA3 link - ( findAttrWithDefault NsXLink "href" "" ) - ( findAttrWithDefault NsOffice "title" "" ) + ( findAttrTextWithDefault NsXLink "href" "" ) + ( findAttrTextWithDefault NsOffice "title" "" ) ( matchChildContent [ read_span , read_note , read_citation @@ -709,12 +710,12 @@ read_citation :: InlineMatcher read_citation = matchingElement NsText "bibliography-mark" $ liftA2 cite ( liftA2 makeCitation - ( findAttrWithDefault NsText "identifier" "" ) + ( findAttrTextWithDefault NsText "identifier" "" ) ( readAttrWithDefault NsText "number" 0 ) ) ( matchChildContent [] read_plain_text ) where - makeCitation :: String -> Int -> [Citation] + makeCitation :: T.Text -> Int -> [Citation] makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] @@ -779,17 +780,17 @@ read_frame_img = let exts = extensionsFromList [Ext_auto_identifiers] resource <- lookupResource -< src' _ <- updateMediaWithResource -< resource - w <- findAttr' NsSVG "width" -< () - h <- findAttr' NsSVG "height" -< () + w <- findAttrText' NsSVG "width" -< () + h <- findAttrText' NsSVG "height" -< () titleNodes <- matchChildContent' [ read_frame_title ] -< () alt <- matchChildContent [] read_plain_text -< () arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) + (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt) read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) -image_attributes :: Maybe String -> Maybe String -> Attr +image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr image_attributes x y = ( "", [], (dim "width" x) ++ (dim "height" y)) where @@ -806,7 +807,7 @@ read_frame_mathml = src' -> do let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml" (_, mathml) <- lookupResource -< path - case readMathML (UTF8.toString $ B.toStrict mathml) of + case readMathML (UTF8.toText $ B.toStrict mathml) of Left _ -> returnV mempty -< () Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps @@ -817,9 +818,9 @@ read_frame_text_box = proc box -> do read_img_with_caption :: [Block] -> FirstMatch Inlines read_img_with_caption (Para [Image attr alt (src,title)] : _) = - firstMatch $ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption + firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) = - firstMatch $ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows + firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows read_img_with_caption ( Para (_ : xs) : ys) = read_img_with_caption (Para xs : ys) read_img_with_caption _ = @@ -829,12 +830,12 @@ read_img_with_caption _ = -- Internal links ---------------------- -_ANCHOR_PREFIX_ :: String +_ANCHOR_PREFIX_ :: T.Text _ANCHOR_PREFIX_ = "anchor" -- readAnchorAttr :: OdtReader _x Anchor -readAnchorAttr = findAttr NsText "name" +readAnchorAttr = findAttrText NsText "name" -- | Beware: may fail findAnchorName :: OdtReader AnchorPrefix Anchor @@ -875,7 +876,7 @@ read_reference_start = matchingElement NsText "reference-mark-start" -- | Beware: may fail findAnchorRef :: OdtReader _x Anchor -findAnchorRef = ( findAttr NsText "ref-name" +findAnchorRef = ( findAttrText NsText "ref-name" >>?^ (_ANCHOR_PREFIX_,) ) >>?! getPrettyAnchor @@ -890,7 +891,7 @@ maybeInAnchorRef = proc inlines -> do Left _ -> returnA -< inlines where toAnchorRef :: Anchor -> Inlines -> Inlines - toAnchorRef anchor = link ('#':anchor) "" -- no title + toAnchorRef anchor = link ("#" <> anchor) "" -- no title -- read_bookmark_ref :: InlineMatcher diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index ccbaf6fc4..59d1b8abd 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -38,8 +38,11 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , lookupAttr' , lookupDefaultingAttr , findAttr' +, findAttrText' , findAttr +, findAttrText , findAttrWithDefault +, findAttrTextWithDefault , readAttr , readAttr' , readAttrWithDefault @@ -59,6 +62,7 @@ import Control.Arrow import Data.Either ( rights ) import qualified Data.Map as M +import qualified Data.Text as T import Data.Default import Data.Maybe @@ -79,6 +83,7 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible type ElementName = String type AttributeName = String type AttributeValue = String +type TextAttributeValue = T.Text -- type NameSpacePrefix = String @@ -466,6 +471,16 @@ findAttr' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr +-- | Return value as a (Maybe Text) +findAttrText' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe TextAttributeValue) +findAttrText' nsID attrName + = qualifyName nsID attrName + &&& getCurrentElement + >>% XML.findAttr + >>^ fmap T.pack + -- | Return value as string or fail findAttr :: (NameSpaceID nsID) => nsID -> AttributeName @@ -473,6 +488,15 @@ findAttr :: (NameSpaceID nsID) findAttr nsID attrName = findAttr' nsID attrName >>> maybeToChoice +-- | Return value as text or fail +findAttrText :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x TextAttributeValue +findAttrText nsID attrName + = findAttr' nsID attrName + >>^ fmap T.pack + >>> maybeToChoice + -- | Return value as string or return provided default value findAttrWithDefault :: (NameSpaceID nsID) => nsID -> AttributeName @@ -482,6 +506,15 @@ findAttrWithDefault nsID attrName deflt = findAttr' nsID attrName >>^ fromMaybe deflt +-- | Return value as string or return provided default value +findAttrTextWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> TextAttributeValue + -> XMLConverter nsID extraState x TextAttributeValue +findAttrTextWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ maybe deflt T.pack + -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) => nsID -> AttributeName diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 79e8d7aea..99fa05880 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -548,11 +548,11 @@ readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) readListLevelStyle levelType = readAttr NsText "level" >>?! keepingTheValue ( liftA5 toListLevelStyle - ( returnV levelType ) - ( findAttr' NsStyle "num-prefix" ) - ( findAttr' NsStyle "num-suffix" ) - ( getAttr NsStyle "num-format" ) - ( findAttr' NsText "start-value" ) + ( returnV levelType ) + ( findAttr' NsStyle "num-prefix" ) + ( findAttr' NsStyle "num-suffix" ) + ( getAttr NsStyle "num-format" ) + ( findAttrText' NsText "start-value" ) ) where toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2c88c7776..99ece152c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -26,7 +27,6 @@ import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) import Data.Text (Text) -import qualified Data.Text as T -- | Parse org-mode string and return a Pandoc document. readOrg :: PandocMonad m @@ -36,7 +36,7 @@ readOrg :: PandocMonad m readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index 58db4f46c..b4f3cc0d8 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.BlockStarts Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts import Prelude import Control.Monad (void) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Org.Parsing -- | Horizontal Line (five -- dashes or more) @@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m () gridTableStart = try $ skipSpaces <* char '+' <* char '-' -latexEnvStart :: Monad m => OrgParser m String +latexEnvStart :: Monad m => OrgParser m Text latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" <* blankline where - latexEnvName :: Monad m => OrgParser m String - latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") + latexEnvName :: Monad m => OrgParser m Text + latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*") bulletListStart :: Monad m => OrgParser m Int bulletListStart = try $ do @@ -68,7 +71,7 @@ bulletListStart = try $ do return (ind + 1) genericListStart :: Monad m - => OrgParser m String + => OrgParser m Text -> OrgParser m Int genericListStart listMarker = try $ do ind <- length <$> many spaceChar @@ -82,11 +85,11 @@ eol = void (char '\n') orderedListStart :: Monad m => OrgParser m Int orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode - where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)" -drawerStart :: Monad m => OrgParser m String +drawerStart :: Monad m => OrgParser m Text drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline - where drawerName = char ':' *> manyTill nonspaceChar (char ':') + where drawerName = char ':' *> manyTillChar nonspaceChar (char ':') metaLineStart :: Monad m => OrgParser m () metaLineStart = try $ skipSpaces <* string "#+" @@ -99,12 +102,12 @@ commentLineStart = try $ exampleLineStart :: Monad m => OrgParser m () exampleLineStart = () <$ try (skipSpaces *> string ": ") -noteMarker :: Monad m => OrgParser m String +noteMarker :: Monad m => OrgParser m Text noteMarker = try $ do char '[' - choice [ many1Till digit (char ']') - , (++) <$> string "fn:" - <*> many1Till (noneOf "\n\r\t ") (char ']') + choice [ many1TillChar digit (char ']') + , (<>) <$> textStr "fn:" + <*> many1TillChar (noneOf "\n\r\t ") (char ']') ] -- | Succeeds if the parser is at the end of a block. diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index cba876f06..de51dec3d 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Blocks, Inlines) @@ -33,11 +34,13 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (compactify, compactifyDL, safeRead) import Control.Monad (foldM, guard, mzero, void) -import Data.Char (isSpace, toLower, toUpper) +import Data.Char (isSpace) import Data.Default (Default) -import Data.List (foldl', isPrefixOf) +import Data.List (foldl') import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Walk as Walk @@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline -- | Attributes that may be added to figures (like a name or caption). data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrLabel :: Maybe String + { blockAttrName :: Maybe Text + , blockAttrLabel :: Maybe Text , blockAttrCaption :: Maybe (F Inlines) - , blockAttrKeyValues :: [(String, String)] + , blockAttrKeyValues :: [(Text, Text)] } -- | Convert BlockAttributes into pandoc Attr @@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} = ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of Nothing -> [] - Just clsStr -> words clsStr + Just clsStr -> T.words clsStr kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute :: Monad m => OrgParser m (Text, Text) stringyMetaAttribute = try $ do metaLineStart - attrName <- map toUpper <$> many1Till nonspaceChar (char ':') + attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':') skipSpaces attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) @@ -129,8 +132,8 @@ blockAttributes = try $ do let label = lookup "LABEL" kv caption' <- case caption of Nothing -> return Nothing - Just s -> Just <$> parseFromString inlines (s ++ "\n") - kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs + Just s -> Just <$> parseFromString inlines (s <> "\n") + kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs return BlockAttributes { blockAttrName = name , blockAttrLabel = label @@ -138,31 +141,31 @@ blockAttributes = try $ do , blockAttrKeyValues = kvAttrs' } where - isBlockAttr :: String -> Bool + isBlockAttr :: Text -> Bool isBlockAttr = flip elem [ "NAME", "LABEL", "CAPTION" , "ATTR_HTML", "ATTR_LATEX" , "RESULTS" ] - appendValues :: String -> Maybe String -> (String, String) -> Maybe String + appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text appendValues attrName accValue (key, value) = if key /= attrName then accValue else case accValue of - Just acc -> Just $ acc ++ ' ':value + Just acc -> Just $ acc <> " " <> value Nothing -> Just value -- | Parse key-value pairs for HTML attributes -keyValues :: Monad m => OrgParser m [(String, String)] +keyValues :: Monad m => OrgParser m [(Text, Text)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where - key :: Monad m => OrgParser m String - key = try $ skipSpaces *> char ':' *> many1 nonspaceChar + key :: Monad m => OrgParser m Text + key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar - value :: Monad m => OrgParser m String - value = skipSpaces *> manyTill anyChar endOfValue + value :: Monad m => OrgParser m Text + value = skipSpaces *> manyTillChar anyChar endOfValue endOfValue :: Monad m => OrgParser m () endOfValue = @@ -180,7 +183,7 @@ orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case map toLower blkType of + case T.toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -194,13 +197,13 @@ orgBlock = try $ do let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where - blockHeaderStart :: Monad m => OrgParser m String + blockHeaderStart :: Monad m => OrgParser m Text blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord - lowercase :: String -> String - lowercase = map toLower + lowercase :: Text -> Text + lowercase = T.toLower -exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) exampleBlock blockAttrs _label = do skipSpaces (classes, kv) <- switchesAsAttributes @@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do let codeBlck = B.codeBlockWith (id', "example":classes, kv) content return . return $ codeBlck -rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) +rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks) rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) -parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) +parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks) parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType - parseFromString blocks (raw ++ "\n") + parseFromString blocks (raw <> "\n") -- | Read the raw string content of a block -rawBlockContent :: Monad m => String -> OrgParser m String +rawBlockContent :: Monad m => Text -> OrgParser m Text rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop trimP <- orgStateTrimLeadBlkIndent <$> getState - let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs - (unlines + let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs + (T.unlines . stripIndent . map (tabsToSpaces tabLen . commaEscaped) $ blkLines) <$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True }) where - rawLine :: Monad m => OrgParser m String + rawLine :: Monad m => OrgParser m Text rawLine = try $ ("" <$ blankline) <|> anyLine blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) - shortestIndent :: [String] -> Int - shortestIndent = foldr (min . length . takeWhile isSpace) maxBound - . filter (not . null) - - tabsToSpaces :: Int -> String -> String - tabsToSpaces _ [] = [] - tabsToSpaces tabLen cs'@(c:cs) = - case c of - ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs - _ -> cs' - - commaEscaped :: String -> String - commaEscaped (',':cs@('*':_)) = cs - commaEscaped (',':cs@('#':'+':_)) = cs - commaEscaped (' ':cs) = ' ':commaEscaped cs - commaEscaped ('\t':cs) = '\t':commaEscaped cs - commaEscaped cs = cs + shortestIndent :: [Text] -> Int + shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound + . filter (not . T.null) + + tabsToSpaces :: Int -> Text -> Text + tabsToSpaces tabStop t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + tabNum = T.length $ T.filter (== '\n') ind + spaceNum = T.length ind - tabNum + in T.replicate (spaceNum + tabStop * tabNum) " " <> suff + + commaEscaped t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + in case T.uncons suff of + Just (',', cs) + | "*" <- T.take 1 cs -> ind <> cs + | "#+" <- T.take 2 cs -> ind <> cs + _ -> t -- | Read but ignore all remaining block headers. ignHeaders :: Monad m => OrgParser m () @@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. -exportBlock :: Monad m => String -> OrgParser m (F Blocks) +exportBlock :: Monad m => Text -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType - returnF (B.rawBlock (map toLower exportType) contents) + returnF (B.rawBlock (T.toLower exportType) contents) -verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) +verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType fmap B.lineBlock . sequence - <$> mapM parseVerseLine (lines content) + <$> mapM parseVerseLine (T.lines content) where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline - parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) + parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines) parseVerseLine cs = do - let (initialSpaces, indentedLine) = span isSpace cs - let nbspIndent = if null initialSpaces + let (initialSpaces, indentedLine) = T.span isSpace cs + let nbspIndent = if T.null initialSpaces then mempty - else B.str $ map (const '\160') initialSpaces - line <- parseFromString inlines (indentedLine ++ "\n") + else B.str $ T.map (const '\160') initialSpaces + line <- parseFromString inlines (indentedLine <> "\n") return (trimInlinesF $ pure nbspIndent <> line) -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. -codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - exportsResults :: [(String, String)] -> Bool + exportsResults :: [(Text, Text)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -- | Parse the result of an evaluated babel code block. @@ -329,7 +332,7 @@ babelResultsBlock = try $ do resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments -codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) +codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes @@ -338,14 +341,14 @@ codeHeaderArgs = try $ do , originalLang language <> switchKv <> parameters ) -switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) +switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)]) switchesAsAttributes = try $ do switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where - addToAttr :: (Char, Maybe String, SwitchPolarity) - -> ([String], [(String, String)]) - -> ([String], [(String, String)]) + addToAttr :: (Char, Maybe Text, SwitchPolarity) + -> ([Text], [(Text, Text)]) + -> ([Text], [(Text, Text)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of Just num -> ("startFrom", num):kv @@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -- | Parses a source block switch option. -switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) switch = try $ lineNumberSwitch <|> labelSwitch <|> whitespaceSwitch <|> simpleSwitch where simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter labelSwitch = genericSwitch 'l' $ - char '"' *> many1Till nonspaceChar (char '"') + char '"' *> many1TillChar nonspaceChar (char '"') -whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) whitespaceSwitch = do string "-i" updateState $ \s -> s { orgStateTrimLeadBlkIndent = False } @@ -382,8 +385,8 @@ whitespaceSwitch = do -- | Generic source block switch-option parser. genericSwitch :: Monad m => Char - -> OrgParser m String - -> OrgParser m (Char, Maybe String, SwitchPolarity) + -> OrgParser m Text + -> OrgParser m (Char, Maybe Text, SwitchPolarity) genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p @@ -391,17 +394,17 @@ genericSwitch c p = try $ do -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. -lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) -lineNumberSwitch = genericSwitch 'n' (many digit) +lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) +lineNumberSwitch = genericSwitch 'n' (manyChar digit) -blockOption :: Monad m => OrgParser m (String, String) +blockOption :: Monad m => OrgParser m (Text, Text) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) -orgParamValue :: Monad m => OrgParser m String -orgParamValue = try $ +orgParamValue :: Monad m => OrgParser m Text +orgParamValue = try $ fmap T.pack $ skipSpaces *> notFollowedBy orgArgKey *> noneOf "\n\r" `many1Till` endOfValue @@ -420,7 +423,7 @@ orgParamValue = try $ -- export setting. genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do - name <- map toUpper <$> drawerStart + name <- T.toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) state <- getState -- Include drawer if it is explicitly included in or not explicitly excluded @@ -432,16 +435,16 @@ genericDrawer = try $ do Right names | name `notElem` names -> return mempty _ -> drawerDiv name <$> parseLines content where - parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) - parseLines = parseFromString blocks . (++ "\n") . unlines + parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks) + parseLines = parseFromString blocks . (<> "\n") . T.unlines - drawerDiv :: String -> F Blocks -> F Blocks + drawerDiv :: Text -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) -drawerLine :: Monad m => OrgParser m String +drawerLine :: Monad m => OrgParser m Text drawerLine = anyLine -drawerEnd :: Monad m => OrgParser m String +drawerEnd :: Monad m => OrgParser m Text drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline @@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph - case cleanLinkString src of + case cleanLinkText src of Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where - selfTarget :: PandocMonad m => OrgParser m String + selfTarget :: PandocMonad m => OrgParser m Text selfTarget = try $ char '[' *> linkTarget <* char ']' - imageBlock :: Bool -> BlockAttributes -> String -> F Blocks + imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks imageBlock isFigure figAttrs imgSrc = let figName = fromMaybe mempty $ blockAttrName figAttrs @@ -478,11 +481,11 @@ figure = try $ do in B.para . B.imageWith attr imgSrc figTitle <$> figCaption - withFigPrefix :: String -> String + withFigPrefix :: Text -> Text withFigPrefix cs = - if "fig:" `isPrefixOf` cs + if "fig:" `T.isPrefixOf` cs then cs - else "fig:" ++ cs + else "fig:" <> cs -- | Succeeds if looking at the end of the current paragraph endOfParagraph :: Monad m => OrgParser m () @@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) -example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine +example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine where - exampleLine :: Monad m => OrgParser m String + exampleLine :: Monad m => OrgParser m Text exampleLine = try $ exampleLineStart *> anyLine -exampleCode :: String -> Blocks +exampleCode :: Text -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks) include = try $ do metaLineStart <* stringAnyCase "include:" <* skipSpaces filename <- includeTarget - includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + includeArgs <- many (try $ skipSpaces *> many1Char alphaNum) params <- keyValues blocksParser <- case includeArgs of ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw @@ -535,10 +538,10 @@ include = try $ do char '"' manyTill (noneOf "\n\r\t") (char '"') - parseRaw :: PandocMonad m => OrgParser m String - parseRaw = many anyChar + parseRaw :: PandocMonad m => OrgParser m Text + parseRaw = manyChar anyChar - blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter :: [(Text, Text)] -> [Block] -> [Block] blockFilter params blks = let minlvl = lookup "minlevel" params in case (minlvl >>= safeRead :: Maybe Int) of @@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info" <$> (skipSpaces *> char '<' *> optionMaybe tableAlignFromChar) - <*> (optionMaybe (many1 digit >>= safeRead) + <*> (optionMaybe (many1Char digit >>= safeRead) <* char '>' <* emptyCell) @@ -739,10 +742,10 @@ latexFragment = try $ do , "\\end{", e, "}\n" ] -latexEnd :: Monad m => String -> OrgParser m () +latexEnd :: Monad m => Text -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces - <* string ("\\end{" ++ envName ++ "}") + <* textStr ("\\end{" <> envName <> "}") <* blankline @@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m -> OrgParser m (F (Inlines, [Blocks])) definitionListItem parseIndentedMarker = try $ do markerLength <- parseIndentedMarker - term <- manyTill (noneOf "\n\r") (try definitionMarker) + term <- manyTillChar (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) - cont <- concat <$> many (listContinuation markerLength) + cont <- T.concat <$> many (listContinuation markerLength) term' <- parseFromString inlines term - contents' <- parseFromString blocks $ line1 ++ blank ++ cont + contents' <- parseFromString blocks $ line1 <> blank <> cont return $ (,) <$> term' <*> fmap (:[]) contents' where definitionMarker = @@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do markerLength <- try parseIndentedMarker firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString blocks $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: PandocMonad m => Int -> OrgParser m String +listContinuation :: PandocMonad m => Int -> OrgParser m Text listContinuation markerLength = try $ do notFollowedBy' blankline - mappend <$> (concat <$> many1 (listContinuation' markerLength)) - <*> many blankline + mappend <$> (T.concat <$> many1 (listContinuation' markerLength)) + <*> manyChar blankline where listContinuation' indentation = blockLines indentation <|> listLine indentation @@ -853,6 +856,6 @@ listContinuation markerLength = try $ do >> blockAttributes >>= (\blockAttrs -> case attrFromBlockAttributes blockAttrs of - ("", [], []) -> count 1 anyChar + ("", [], []) -> countChar 1 anyChar _ -> indentWith indentation)) >> (snd <$> withRaw orgBlock) diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index c96087be7..09a501b68 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.DocumentTree Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree import Prelude import Control.Arrow ((***)) import Control.Monad (guard, void) -import Data.Char (toLower, toUpper) import Data.List (intersperse) import Data.Maybe (mapMaybe) +import Data.Text (Text) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition @@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import qualified Data.Set as Set +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -- @@ -59,28 +61,28 @@ documentTree blocks inline = do } -- | Create a tag containing the given string. -toTag :: String -> Tag +toTag :: Text -> Tag toTag = Tag -- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } +newtype PropertyKey = PropertyKey { fromKey :: Text } deriving (Show, Eq, Ord) -- | Create a property key containing the given string. Org mode keys are -- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower +toPropertyKey :: Text -> PropertyKey +toPropertyKey = PropertyKey . T.toLower -- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } +newtype PropertyValue = PropertyValue { fromValue :: Text } -- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue +toPropertyValue :: Text -> PropertyValue toPropertyValue = PropertyValue -- | Check whether the property value is non-nil (i.e. truish). isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] +isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"] -- | Key/value pairs from a PROPERTIES drawer type Properties = [(PropertyKey, PropertyValue)] @@ -273,7 +275,7 @@ headlineToHeader hdln = do todoKeyword :: Monad m => OrgParser m TodoMarker todoKeyword = try $ do taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) + let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm) <* spaceChar <* updateLastPreCharPos) choice (map kwParser taskStates) @@ -281,26 +283,26 @@ todoKeyword = try $ do todoKeywordToInlines :: TodoMarker -> Inlines todoKeywordToInlines tdm = let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm + todoState = T.toLower . T.pack . show $ todoMarkerState tdm classes = [todoState, todoText] in B.spanWith (mempty, classes, mempty) (B.str todoText) propertiesToAttr :: Properties -> Attr propertiesToAttr properties = let - toStringPair = fromKey *** fromValue + toTextPair = fromKey *** fromValue customIdKey = toPropertyKey "custom_id" classKey = toPropertyKey "class" unnumberedKey = toPropertyKey "unnumbered" specialProperties = [customIdKey, classKey, unnumberedKey] id' = maybe mempty fromValue . lookup customIdKey $ properties cls = maybe mempty fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst) $ properties isUnnumbered = maybe False isNonNil . lookup unnumberedKey $ properties in - (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs') tagsToInlines :: [Tag] -> Inlines tagsToInlines [] = mempty @@ -336,15 +338,15 @@ planningToBlock planning = do <> B.emph (B.str time) -- | An Org timestamp, including repetition marks. TODO: improve -type Timestamp = String +type Timestamp = Text timestamp :: Monad m => OrgParser m Timestamp timestamp = try $ do openChar <- oneOf "<[" let isActive = openChar == '<' let closeChar = if isActive then '>' else ']' - content <- many1Till anyChar (char closeChar) - return (openChar : content ++ [closeChar]) + content <- many1TillChar anyChar (char closeChar) + return $ T.cons openChar $ content `T.snoc` closeChar -- | Planning information for a subtree/headline. data PlanningInfo = PlanningInfo @@ -374,7 +376,7 @@ planningInfo = try $ do propertiesDrawer :: Monad m => OrgParser m Properties propertiesDrawer = try $ do drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" + guard $ T.toUpper drawerType == "PROPERTIES" manyTill property (try endOfDrawer) where property :: Monad m => OrgParser m (PropertyKey, PropertyValue) @@ -382,12 +384,12 @@ propertiesDrawer = try $ do key :: Monad m => OrgParser m PropertyKey key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':') value :: Monad m => OrgParser m PropertyValue value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline) - endOfDrawer :: Monad m => OrgParser m String + endOfDrawer :: Monad m => OrgParser m Text endOfDrawer = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index f783eaa0f..f1f089273 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ExportSettings Copyright : © 2016–2019 Albert Krewinkel @@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing import Control.Monad (mzero, void) import Data.Char (toLower) import Data.Maybe (listToMaybe) +import Data.Text (Text) -- | Read and handle space separated org-mode export settings. exportSettings :: PandocMonad m => OrgParser m () @@ -70,11 +72,11 @@ exportSetting = choice genericExportSetting :: Monad m => OrgParser m a - -> String + -> Text -> ExportSettingSetter a -> OrgParser m () genericExportSetting optionParser settingIdentifier setter = try $ do - _ <- string settingIdentifier *> char ':' + _ <- textStr settingIdentifier *> char ':' value <- optionParser updateState $ modifyExportSettings value where @@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do st { orgStateExportSettings = setter val . orgStateExportSettings $ st } -- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () +booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m () booleanSetting = genericExportSetting elispBoolean -- | An integer-valued option. -integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () +integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m () integerSetting = genericExportSetting parseInt where parseInt = try $ @@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt -- | Either the string "headline" or an elisp boolean and treated as an -- @ArchivedTreesOption@. archivedTreeSetting :: Monad m - => String + => Text -> ExportSettingSetter ArchivedTreesOption -> OrgParser m () archivedTreeSetting = @@ -115,42 +117,42 @@ archivedTreeSetting = -- | A list or a complement list (i.e. a list starting with `not`). complementableListSetting :: Monad m - => String - -> ExportSettingSetter (Either [String] [String]) + => Text + -> ExportSettingSetter (Either [Text] [Text]) -> OrgParser m () complementableListSetting = genericExportSetting $ choice - [ Left <$> complementStringList + [ Left <$> complementTextList , Right <$> stringList , (\b -> if b then Left [] else Right []) <$> elispBoolean ] where -- Read a plain list of strings. - stringList :: Monad m => OrgParser m [String] + stringList :: Monad m => OrgParser m [Text] stringList = try $ char '(' - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' -- Read an emacs lisp list specifying a complement set. - complementStringList :: Monad m => OrgParser m [String] - complementStringList = try $ + complementTextList :: Monad m => OrgParser m [Text] + complementTextList = try $ string "(not " - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' - elispString :: Monad m => OrgParser m String - elispString = try $ + elispText :: Monad m => OrgParser m Text + elispText = try $ char '"' - *> manyTill alphaNum (char '"') + *> manyTillChar alphaNum (char '"') -- | Read but ignore the export setting. -ignoredSetting :: Monad m => String -> OrgParser m () -ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) +ignoredSetting :: Monad m => Text -> OrgParser m () +ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar) -- | Read any setting string, but ignore it and emit a warning. ignoreAndWarn :: PandocMonad m => OrgParser m () ignoreAndWarn = try $ do - opt <- many1 nonspaceChar + opt <- many1Char nonspaceChar report (UnknownOrgExportOption opt) return () diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index cae590c5f..da638f717 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -20,7 +20,7 @@ import Prelude import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Inlines) @@ -38,12 +38,14 @@ import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T import Data.Maybe (fromMaybe) -- -- Functions acting on the parser state -- -recordAnchorId :: PandocMonad m => String -> OrgParser m () +recordAnchorId :: PandocMonad m => Text -> OrgParser m () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : orgStateAnchorIds s } @@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural @@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String +orgRefCiteKey :: PandocMonad m => OrgParser m Text orgRefCiteKey = let citeKeySpecialChars = "-_:\\./," :: String isCiteKeySpecialChar c = c `elem` citeKeySpecialChars @@ -329,7 +331,7 @@ orgRefCiteKey = endOfCitation = try $ do many $ satisfy isCiteKeySpecialChar satisfy $ not . isCiteKeyChar - in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation -- | Supported citation types. Only a small subset of org-ref types is @@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote inlineNote :: PandocMonad m => OrgParser m (F Inlines) inlineNote = try $ do string "[fn:" - ref <- many alphaNum + ref <- manyChar alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - unless (null ref) $ - addToNotesTable ("fn:" ++ ref, note) + unless (T.null ref) $ + addToNotesTable ("fn:" <> ref, note) return $ B.note <$> note referencedNote :: PandocMonad m => OrgParser m (F Inlines) @@ -397,7 +399,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return . B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" <> ref <> "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -420,7 +422,7 @@ explicitOrImageLink = try $ do return $ do src <- srcF title <- titleF - case cleanLinkString descr of + case cleanLinkText descr of Just imgSrc | isImageFilename imgSrc -> return . B.link src "" $ B.image imgSrc mempty mempty _ -> @@ -429,10 +431,10 @@ explicitOrImageLink = try $ do selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) selflinkOrImage = try $ do target <- char '[' *> linkTarget <* char ']' - case cleanLinkString target of - Nothing -> case target of - '#':_ -> returnF $ B.link target "" (B.str target) - _ -> return $ internalLink target (B.str target) + case cleanLinkText target of + Nothing -> case T.uncons target of + Just ('#', _) -> returnF $ B.link target "" (B.str target) + _ -> return $ internalLink target (B.str target) Just nonDocTgt -> if isImageFilename nonDocTgt then returnF $ B.image nonDocTgt "" "" else returnF $ B.link nonDocTgt "" (B.str target) @@ -449,35 +451,35 @@ angleLink = try $ do char '>' return link -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]") +linkTarget :: PandocMonad m => OrgParser m Text +linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]") -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String +possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser m (F String) +applyCustomLinkFormat :: Text -> OrgParser m (F Text) applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link + let (linkType, rest) = T.break (== ':') link return $ do formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter + return $ maybe link ($ T.drop 1 rest) formatter -- | Take a link and return a function which produces new inlines when given -- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF :: Text -> Inlines -> F Inlines linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - Just extTgt -> return . B.link extTgt "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines + case T.uncons linkStr of + Nothing -> pure . B.link mempty "" -- wiki link (empty by convention) + Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction + _ -> case cleanLinkText linkStr of + Just extTgt -> return . B.link extTgt "" + Nothing -> internalLink linkStr -- other internal link + +internalLink :: Text -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds if anchorB - then return $ B.link ('#':link) "" title + then return $ B.link ("#" <> link) "" title else return $ B.emph title -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with @@ -493,15 +495,15 @@ anchor = try $ do returnF $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") + *> many1Char (noneOf "\t\n\r<>\"' ") <* string ">>" <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors -- the org function @org-export-solidify-link-text@. -solidify :: String -> String -solidify = map replaceSpecialChar +solidify :: Text -> Text +solidify = T.map replaceSpecialChar where replaceSpecialChar c | isAlphaNum c = c | c `elem` ("_.-:" :: String) = c @@ -511,25 +513,25 @@ solidify = map replaceSpecialChar inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" - lang <- many1 orgArgWordChar + lang <- many1Char orgArgWordChar opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r") + inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang] let attrKeyVal = originalLang lang <> opts let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode returnF $ if exportsCode opts then codeInlineBlck else mempty where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) + inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text) inlineBlockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgInlineParamValue return (argKey, paramValue) - orgInlineParamValue :: PandocMonad m => OrgParser m String + orgInlineParamValue :: PandocMonad m => OrgParser m Text orgInlineParamValue = try $ skipSpaces *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") + *> many1Char (noneOf "\t\n\r ]") <* skipSpaces @@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' + , mathTextBetween '$' , rawMathBetween "\\(" "\\)" ] @@ -604,7 +606,7 @@ updatePositions c = do return c symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions) emphasisBetween :: PandocMonad m => Char @@ -619,7 +621,7 @@ emphasisBetween c = try $ do verbatimBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text verbatimBetween c = try $ emphasisStart c *> many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) @@ -627,33 +629,33 @@ verbatimBetween c = try $ verbatimChar = noneOf "\n\r" >>= updatePositions -- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m +mathTextBetween :: PandocMonad m => Char - -> OrgParser m String -mathStringBetween c = try $ do + -> OrgParser m Text +mathTextBetween c = try $ do mathStart c body <- many1TillNOrLessNewlines mathAllowedNewlines (noneOf (c:"\n\r")) (lookAhead $ mathEnd c) final <- mathEnd c - return $ body ++ [final] + return $ T.snoc body final -- | Parse a single character between @c@ using math rules math1CharBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars char c eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] + return $ T.singleton res rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + => Text + -> Text + -> OrgParser m Text +rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e) -- | Parses the start (opening character) of emphasis emphasisStart :: PandocMonad m => Char -> OrgParser m Char @@ -702,10 +704,10 @@ enclosedInlines start end = try $ enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b - -> OrgParser m String + -> OrgParser m Text enclosedRaw start end = try $ start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end + where onSingleLine = try $ many1TillChar (noneOf "\n\r") end spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine @@ -714,7 +716,7 @@ enclosedRaw start end = try $ many1TillNOrLessNewlines :: PandocMonad m => Int -> OrgParser m Char -> OrgParser m a - -> OrgParser m String + -> OrgParser m Text many1TillNOrLessNewlines n p end = try $ nMoreLines (Just n) mempty >>= oneOrMore where @@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) finalLine = try $ manyTill p end minus1 k = k - 1 - oneOrMore cs = cs <$ guard (not $ null cs) + oneOrMore cs = T.pack cs <$ guard (not $ null cs) -- Org allows customization of the way it reads emphasis. We use the defaults -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` @@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString + , simpleSubOrSuperText ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] + where enclosing (left, right) s = T.cons left $ T.snoc s right -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do +simpleSubOrSuperText :: PandocMonad m => OrgParser m Text +simpleSubOrSuperText = try $ do state <- getState guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum + choice [ textStr "*" + , mappend <$> option "" (T.singleton <$> oneOf "+-") + <*> many1Char alphaNum ] inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) @@ -793,28 +795,28 @@ inlineLaTeX = try $ do maybe mzero returnF $ parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils where - parseAsMath :: String -> Maybe Inlines + parseAsMath :: Text -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) + parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines) parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym :: Text -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 + where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1 state :: ParserState state = def{ stateOptions = def{ readerExtensions = enableExtension Ext_raw_tex (readerExtensions def) } } - texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc :: Text -> Maybe [Inline] texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just -inlineLaTeXCommand :: PandocMonad m => OrgParser m String +inlineLaTeXCommand :: PandocMonad m => OrgParser m Text inlineLaTeXCommand = try $ do rest <- getInput st <- getState @@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc + let cmdNoSpc = T.dropWhileEnd isSpace cs + let len = T.length cmdNoSpc count len anyChar return cmdNoSpc _ -> mzero --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - exportSnippet :: PandocMonad m => OrgParser m (F Inlines) exportSnippet = try $ do string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") + format <- many1TillChar (alphaNum <|> char '-') (char ':') + snippet <- manyTillChar anyChar (try $ string "@@") returnF $ B.rawInline format snippet macro :: PandocMonad m => OrgParser m (F Inlines) @@ -845,7 +843,7 @@ macro = try $ do recursionDepth <- orgStateMacroDepth <$> getState guard $ recursionDepth < 15 string "{{{" - name <- many alphaNum + name <- manyChar alphaNum args <- ([] <$ string "}}}") <|> char '(' *> argument `sepBy` char ',' <* eoa expander <- lookupMacro name <$> getState @@ -857,7 +855,7 @@ macro = try $ do updateState $ \s -> s { orgStateMacroDepth = recursionDepth } return res where - argument = many $ notFollowedBy eoa *> noneOf "," + argument = manyChar $ notFollowedBy eoa *> noneOf "," eoa = string ")}}}" smart :: PandocMonad m => OrgParser m (F Inlines) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 0a388403e..811a5b974 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Meta Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -30,11 +31,12 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) import Control.Monad (mzero, void, when) -import Data.Char (toLower) import Data.List (intersperse) import Data.Maybe (fromMaybe) import qualified Data.Map as M import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T import Network.HTTP (urlEncode) -- | Returns the current meta, respecting export options. @@ -47,7 +49,7 @@ metaExport = do . (if exportWithEmail settings then id else removeMeta "email") <$> orgStateMeta st -removeMeta :: String -> Meta -> Meta +removeMeta :: Text -> Meta -> Meta removeMeta key meta' = let metaMap = unMeta meta' in Meta $ M.delete key metaMap @@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do - key <- map toLower <$> metaKey + key <- T.toLower <$> metaKey (key', value) <- metaValue key let addMetaValue st = st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } when (key' /= "results") $ updateState addMetaValue -metaKey :: Monad m => OrgParser m String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces +metaKey :: Monad m => OrgParser m Text +metaKey = T.toLower <$> many1Char (noneOf ": \n\r") + <* char ':' + <* skipSpaces -metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) +metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of @@ -88,7 +90,7 @@ metaValue key = -- Org-mode expects class options to contain the surrounding brackets, -- pandoc does not. "latex_class_options" -> ("classoption",) <$> - metaModifiedString (filter (`notElem` "[]")) + metaModifiedString (T.filter (`notElem` ("[]" :: String))) "html_head" -> (inclKey,) <$> accumulatingList inclKey (metaExportSnippet "html") _ -> (key,) <$> metaString @@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' + itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ',' newline - items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs + items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList return $ MetaList . map toMetaInlines <$> sequence items metaString :: Monad m => OrgParser m (F MetaValue) metaString = metaModifiedString id -metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) +metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue) metaModifiedString f = return . MetaString . f <$> anyLine -- | Read an format specific meta definition -metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) +metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue) metaExportSnippet format = return . MetaInlines . B.toList . B.rawInline format <$> anyLine -- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: Monad m => String +accumulatingList :: Monad m => Text -> OrgParser m (F MetaValue) -> OrgParser m (F MetaValue) accumulatingList key p = do @@ -147,33 +149,33 @@ optionLine = try $ do "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar _ -> mzero -addLinkFormat :: Monad m => String - -> (String -> String) +addLinkFormat :: Monad m => Text + -> (Text -> Text) -> OrgParser m () addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: Monad m => OrgParser m (String, String -> String) +parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text) parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat return (linkType, linkSubst) -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. -parseFormat :: Monad m => OrgParser m (String -> String) +parseFormat :: Monad m => OrgParser m (Text -> Text) parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) + replacePlain = try $ (\x -> T.concat . flip intersperse x) <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack) <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest + justAppend = try $ (<>) <$> rest - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:"")) tagList :: Monad m => OrgParser m [Tag] tagList = do @@ -231,41 +233,41 @@ todoSequence = try $ do (x:xs) -> return $ keywordsToSequence (reverse xs) [x] where - todoKeywords :: Monad m => OrgParser m [String] + todoKeywords :: Monad m => OrgParser m [Text] todoKeywords = try $ - let keyword = many1 nonspaceChar <* skipSpaces + let keyword = many1Char nonspaceChar <* skipSpaces endOfKeywords = todoDoneSep <|> void newline in manyTill keyword (lookAhead endOfKeywords) todoDoneSep :: Monad m => OrgParser m () todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 - keywordsToSequence :: [String] -> [String] -> TodoSequence + keywordsToSequence :: [Text] -> [Text] -> TodoSequence keywordsToSequence todo done = let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers -macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text) macroDefinition = try $ do - macroName <- many1 nonspaceChar <* skipSpaces + macroName <- many1Char nonspaceChar <* skipSpaces firstPart <- expansionPart (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder return (macroName, expander) where placeholder :: Monad m => OrgParser m Int - placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit + placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit - expansionPart :: Monad m => OrgParser m String - expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + expansionPart :: Monad m => OrgParser m Text + expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r") alternate :: [a] -> [a] -> [a] alternate [] ys = ys alternate xs [] = xs alternate (x:xs) (y:ys) = x : y : alternate xs ys - reorder :: [Int] -> [String] -> [String] + reorder :: [Int] -> [Text] -> [Text] reorder perm xs = let element n = take 1 $ drop (n - 1) xs in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index d6dde8b22..cf5583b76 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ParserState Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro) type F = Future OrgParserState -- | An inline note / footnote containing the note key and its (inline) value. -type OrgNoteRecord = (String, F Blocks) +type OrgNoteRecord = (Text, F Blocks) -- | Table of footnotes type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. -type OrgLinkFormatters = M.Map String (String -> String) +type OrgLinkFormatters = M.Map Text (Text -> Text) -- | Macro expander function -type MacroExpander = [String] -> String +type MacroExpander = [Text] -> Text -- | Tag -newtype Tag = Tag { fromTag :: String } +newtype Tag = Tag { fromTag :: Text } deriving (Show, Eq, Ord) -- | The states in which a todo item can be @@ -82,7 +83,7 @@ data TodoState = Todo | Done -- | A ToDo keyword like @TODO@ or @DONE@. data TodoMarker = TodoMarker { todoMarkerState :: TodoState - , todoMarkerName :: String + , todoMarkerName :: Text } deriving (Show, Eq) @@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker] -- | Org-mode parser state data OrgParserState = OrgParserState - { orgStateAnchorIds :: [String] + { orgStateAnchorIds :: [Text] , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before -- emphasis; spaces and newlines are @@ -102,13 +103,13 @@ data OrgParserState = OrgParserState , orgStateExcludeTags :: Set.Set Tag , orgStateExcludeTagsChanged :: Bool , orgStateExportSettings :: ExportSettings - , orgStateIdentifiers :: Set.Set String - , orgStateIncludeFiles :: [String] + , orgStateIdentifiers :: Set.Set Text + , orgStateIncludeFiles :: [Text] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMacros :: M.Map String MacroExpander + , orgStateMacros :: M.Map Text MacroExpander , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable @@ -212,10 +213,10 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences -lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander lookupMacro macroName = M.lookup macroName . orgStateMacros -registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState registerMacro (name, expander) st = let curMacros = orgStateMacros st in st{ orgStateMacros = M.insert name expander curMacros } @@ -236,7 +237,7 @@ data ArchivedTreesOption = -- These settings can be changed via OPTIONS statements. data ExportSettings = ExportSettings { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees - , exportDrawers :: Either [String] [String] + , exportDrawers :: Either [Text] [Text] -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 24aa0779d..718925120 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing , orgTagWordChar -- * Re-exports from Text.Pandoc.Parser , ParserContext (..) + , textStr + , countChar + , manyChar + , many1Char + , manyTillChar , many1Till + , many1TillChar , notFollowedBy' , spaceChar , nonspaceChar @@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing ) where import Prelude +import Data.Text (Text) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline, @@ -108,14 +115,14 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities -- -- | Parse any line of text -anyLine :: Monad m => OrgParser m String +anyLine :: Monad m => OrgParser m Text anyLine = P.anyLine <* updateLastPreCharPos @@ -123,7 +130,7 @@ anyLine = -- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character -- allowed before emphasised text. -parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a +parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a parseFromString parser str' = do updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } result <- P.parseFromString parser str' @@ -142,7 +149,7 @@ newline = <* updateLastForbiddenCharPos -- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: Monad m => OrgParser m [Char] +blanklines :: Monad m => OrgParser m Text blanklines = P.blanklines <* updateLastPreCharPos @@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p -> -- -- | Read the key of a plist style key-value list. -orgArgKey :: Monad m => OrgParser m String +orgArgKey :: Monad m => OrgParser m Text orgArgKey = try $ skipSpaces *> char ':' - *> many1 orgArgWordChar + *> many1Char orgArgWordChar -- | Read the value of a plist style key-value list. -orgArgWord :: Monad m => OrgParser m String -orgArgWord = many1 orgArgWordChar +orgArgWord :: Monad m => OrgParser m Text +orgArgWord = many1Char orgArgWordChar -- | Chars treated as part of a word in plists. orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" -orgTagWord :: Monad m => OrgParser m String -orgTagWord = many1 orgTagWordChar +orgTagWord :: Monad m => OrgParser m Text +orgTagWord = many1Char orgTagWordChar orgTagWordChar :: Monad m => OrgParser m Char orgTagWordChar = alphaNum <|> oneOf "@%#_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 34f958373..be0a2068e 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -10,7 +10,7 @@ Utility functions used in other Pandoc Org modules. -} module Text.Pandoc.Readers.Org.Shared - ( cleanLinkString + ( cleanLinkText , isImageFilename , originalLang , translateLang @@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared import Prelude import Data.Char (isAlphaNum) -import Data.List (isPrefixOf) +import Data.Text (Text) +import qualified Data.Text as T import System.FilePath (isValid, takeExtension) - +import Text.Pandoc.Shared (elemText) -- | Check whether the given string looks like the path to of URL of an image. -isImageFilename :: String -> Bool -isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri) +isImageFilename :: Text -> Bool +isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri) where - hasImageExtension = takeExtension fp `elem` imageExtensions - isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols + hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions + isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ] protocols = [ "file", "http", "https" ] -- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if -- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' - _ -> if isUrl s then Just s else Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) +cleanLinkText :: Text -> Maybe Text +cleanLinkText s + | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path + | Just _ <- T.stripPrefix "./" s = Just s -- relative path + | Just _ <- T.stripPrefix "../" s = Just s -- relative path + -- Relative path or URL (file schema) + | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s' + | otherwise = if isUrl s then Just s else Nothing + where + isUrl :: Text -> Bool + isUrl cs = + let (scheme, path) = T.break (== ':') cs + in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme + && not (T.null path) -- | Creates an key-value pair marking the original language name specified for -- a piece of source code. -- | Creates an key-value attributes marking the original language name -- specified for a piece of source code. -originalLang :: String -> [(String, String)] +originalLang :: Text -> [(Text, Text)] originalLang lang = let transLang = translateLang lang in if transLang == lang @@ -66,7 +66,7 @@ originalLang lang = -- | Translate from Org-mode's programming language identifiers to those used -- by Pandoc. This is useful to allow for proper syntax highlighting in -- Pandoc output. -translateLang :: String -> String +translateLang :: Text -> Text translateLang cs = case cs of "C" -> "c" @@ -79,5 +79,5 @@ translateLang cs = "sqlite" -> "sql" _ -> cs -exportsCode :: [(String, String)] -> Bool +exportsCode :: [(Text, Text)] -> Bool exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7e29caf28..d2fba4449 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.RST Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -19,9 +20,8 @@ import Control.Arrow (second) import Control.Monad (forM_, guard, liftM, mplus, mzero, when) import Control.Monad.Except (throwError) import Control.Monad.Identity (Identity (..)) -import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum) -import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, - nub, sort, transpose) +import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum) +import Data.List (deleteFirstsBy, elemIndex, nub, sort, transpose) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Sequence (ViewR (..), viewr) @@ -47,16 +47,16 @@ import Text.Printf (printf) -- | Parse reStructuredText string and return Pandoc document. readRST :: PandocMonad m => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ Text to parse (assuming @'\n'@ line endings) -> m Pandoc readRST opts s = do parsed <- readWithM parseRST def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e -type RSTParser m = ParserT [Char] ParserState m +type RSTParser m = ParserT Text ParserState m -- -- Constants and data structure definitions @@ -113,7 +113,7 @@ titleTransform (bs, meta) = metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta metaFromDefList ds meta = adjustAuthors $ foldr f meta ds - where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) + where f (k,v) = setMeta (T.toLower $ stringify k) (mconcat $ map fromList v) adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" @@ -136,13 +136,13 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds isSp LineBreak = True isSp _ = False splitOnSemi = splitBy (==Str ";") - factorSemi (Str []) = [] - factorSemi (Str s) = case break (==';') s of - (xs,[]) -> [Str xs] - (xs,';':ys) -> Str xs : Str ";" : - factorSemi (Str ys) - (xs,ys) -> Str xs : - factorSemi (Str ys) + factorSemi (Str "") = [] + factorSemi (Str s) = case T.break (==';') s of + (xs,"") -> [Str xs] + (xs,T.uncons -> Just (';',ys)) -> Str xs : Str ";" : + factorSemi (Str ys) + (xs,ys) -> Str xs : + factorSemi (Str ys) factorSemi x = [x] parseRST :: PandocMonad m => RSTParser m Pandoc @@ -151,7 +151,7 @@ parseRST = do startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- concat <$> + docMinusKeys <- T.concat <$> manyTill (referenceKey <|> anchorDef <|> noteBlock <|> citationBlock <|> (snd <$> withRaw comment) <|> @@ -180,7 +180,7 @@ parseRST = do return $ Pandoc meta' (blocks' ++ refBlock) parseCitation :: PandocMonad m - => (String, String) -> RSTParser m (Inlines, [Blocks]) + => (Text, Text) -> RSTParser m (Inlines, [Blocks]) parseCitation (ref, raw) = do contents <- parseFromString' parseBlocks raw return (B.spanWith (ref, ["citation-label"], []) (B.str ref), @@ -215,23 +215,23 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: Monad m => Int -> RSTParser m (String, String) +rawFieldListItem :: Monad m => Int -> RSTParser m (Text, Text) rawFieldListItem minIndent = try $ do indent <- length <$> many (char ' ') guard $ indent >= minIndent char ':' - name <- many1Till (noneOf "\n") (char ':') + name <- many1TillChar (noneOf "\n") (char ':') (() <$ lookAhead newline) <|> skipMany1 spaceChar first <- anyLine rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock - let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" + let raw = (if T.null first then "" else (first <> "\n")) <> rest <> "\n" return (name, raw) fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent - term <- parseInlineFromString name + term <- parseInlineFromText name contents <- parseFromString' parseBlocks raw optional blanklines return (term, [contents]) @@ -251,12 +251,12 @@ fieldList = try $ do lineBlock :: PandocMonad m => RSTParser m Blocks lineBlock = try $ do lines' <- lineBlockLines - lines'' <- mapM parseInlineFromString lines' + lines'' <- mapM parseInlineFromText lines' return $ B.lineBlock lines'' -lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks +lineBlockDirective :: PandocMonad m => Text -> RSTParser m Blocks lineBlockDirective body = do - lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body + lines' <- mapM parseInlineFromText $ T.lines $ stripTrailingNewlines body return $ B.lineBlock lines' -- @@ -271,9 +271,9 @@ para = try $ do newline blanklines case viewr (B.unMany result) of - ys :> Str xs | "::" `isSuffixOf` xs -> do + ys :> Str xs | "::" `T.isSuffixOf` xs -> do raw <- option mempty codeBlockBody - return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs)) + return $ B.para (B.Many ys <> B.str (T.take (T.length xs - 1) xs)) <> raw _ -> return (B.para result) @@ -349,7 +349,7 @@ singleHeader' = try $ do -- hrule block -- -hrule :: Monad m => ParserT [Char] st m Blocks +hrule :: Monad m => ParserT Text st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -364,7 +364,7 @@ hrule = try $ do -- read a line indented by a given string indentedLine :: (HasReaderOptions st, Monad m) - => Int -> ParserT [Char] st m [Char] + => Int -> ParserT Text st m Text indentedLine indents = try $ do lookAhead spaceChar gobbleAtMostSpaces indents @@ -373,29 +373,29 @@ indentedLine indents = try $ do -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: (HasReaderOptions st, Monad m) - => ParserT [Char] st m [Char] + => ParserT Text st m Text indentedBlock = try $ do indents <- length <$> lookAhead (many1 spaceChar) lns <- many1 $ try $ do b <- option "" blanklines l <- indentedLine indents - return (b ++ l) + return (b <> l) optional blanklines - return $ unlines lns + return $ T.unlines lns -quotedBlock :: Monad m => ParserT [Char] st m [Char] +quotedBlock :: Monad m => ParserT Text st m Text quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines - return $ unlines lns + return $ T.unlines lns -codeBlockStart :: Monad m => ParserT [Char] st m Char +codeBlockStart :: Monad m => ParserT Text st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks +codeBlock :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks +codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> (indentedBlock <|> quotedBlock) @@ -407,24 +407,24 @@ lhsCodeBlock = try $ do lns <- latexCodeBlock <|> birdCodeBlock blanklines return $ B.codeBlockWith ("", ["haskell","literate"], []) - $ intercalate "\n" lns + $ T.intercalate "\n" lns -latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]] +latexCodeBlock :: Monad m => ParserT Text st m [Text] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]] +birdCodeBlock :: Monad m => ParserT Text st m [Text] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it - if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns + if all (\ln -> T.null ln || T.take 1 ln == " ") lns + then map (T.drop 1) lns else lns -birdTrackLine :: Monad m => ParserT [Char] st m [Char] +birdTrackLine :: Monad m => ParserT Text st m Text birdTrackLine = char '>' >> anyLine -- @@ -435,7 +435,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ raw <> "\n\n" return $ B.blockQuote contents {- @@ -445,12 +445,12 @@ encoding -} includeDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks includeDirective top fields body = do let f = trim top - guard $ not (null f) - guard $ null (trim body) + guard $ not (T.null f) + guard $ T.null (trim body) -- options let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead @@ -458,11 +458,11 @@ includeDirective top fields body = do oldInput <- getInput containers <- stateContainers <$> getState when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos updateState $ \s -> s{ stateContainers = f : stateContainers s } - mbContents <- readFileFromDirs ["."] f + mbContents <- readFileFromDirs ["."] $ T.unpack f contentLines <- case mbContents of - Just s -> return $ lines s + Just s -> return $ T.lines s Nothing -> do logMessage $ CouldNotLoadIncludeFile f oldPos return [] @@ -478,23 +478,23 @@ includeDirective top fields body = do let contentLines' = drop (startLine' - 1) $ take (endLine' - 1) contentLines let contentLines'' = (case trim <$> lookup "end-before" fields of - Just patt -> takeWhile (not . (patt `isInfixOf`)) + Just patt -> takeWhile (not . (patt `T.isInfixOf`)) Nothing -> id) . (case trim <$> lookup "start-after" fields of Just patt -> drop 1 . - dropWhile (not . (patt `isInfixOf`)) + dropWhile (not . (patt `T.isInfixOf`)) Nothing -> id) $ contentLines' - let contents' = unlines contentLines'' ++ "\n" + let contents' = T.unlines contentLines'' <> "\n" case lookup "code" fields of Just lang -> do let numberLines = lookup "number-lines" fields - let classes = maybe [] words (lookup "class" fields) + let classes = maybe [] T.words (lookup "class" fields) let ident = maybe "" trimr $ lookup "name" fields codeblock ident classes numberLines (trimr lang) contents' False Nothing -> case lookup "literal" fields of Just _ -> return $ B.rawBlock "rst" contents' Nothing -> do - setPosition $ newPos f 1 1 + setPosition $ newPos (T.unpack f) 1 1 setInput contents' bs <- optional blanklines >> (mconcat <$> many block) @@ -519,14 +519,14 @@ definitionListItem = try $ do term <- trimInlines . mconcat <$> many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ raw ++ "\n" + contents <- parseFromString' parseBlocks $ raw <> "\n" return (term, [contents]) definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT [Char] st m Int +bulletListStart :: Monad m => ParserT Text st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -543,7 +543,7 @@ orderedListStart style delim = try $ do return $ markerLen + length white -- parse a line of a list item -listLine :: Monad m => Int -> RSTParser m [Char] +listLine :: Monad m => Int -> RSTParser m Text listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -551,21 +551,21 @@ listLine markerLength = try $ do -- parse raw text for one list item, excluding start marker and continuations rawListItem :: Monad m => RSTParser m Int - -> RSTParser m (Int, [Char]) + -> RSTParser m (Int, Text) rawListItem start = try $ do markerLength <- start firstLine <- anyLineNewline restLines <- many (listLine markerLength) - return (markerLength, firstLine ++ concat restLines) + return (markerLength, firstLine <> T.concat restLines) -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Monad m => Int -> RSTParser m [Char] +listContinuation :: Monad m => Int -> RSTParser m Text listContinuation markerLength = try $ do - blanks <- many1 blankline + blanks <- many1Char blankline result <- many1 (listLine markerLength) - return $ blanks ++ concat result + return $ blanks <> T.concat result listItem :: PandocMonad m => RSTParser m Int @@ -581,7 +581,7 @@ listItem start = try $ do let oldContext = stateParserContext state setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n" + parsed <- parseFromString' parseBlocks $ T.concat (first:rest) <> "\n" updateState (\st -> st {stateParserContext = oldContext}) return $ case B.toList parsed of [Para xs] -> @@ -617,9 +617,9 @@ comment = try $ do optional indentedBlock return mempty -directiveLabel :: Monad m => RSTParser m String -directiveLabel = map toLower - <$> many1Till (letter <|> char '-') (try $ string "::") +directiveLabel :: Monad m => RSTParser m Text +directiveLabel = T.toLower + <$> many1TillChar (letter <|> char '-') (try $ string "::") directive :: PandocMonad m => RSTParser m Blocks directive = try $ do @@ -631,7 +631,7 @@ directive' = do skipMany1 spaceChar label <- directiveLabel skipMany spaceChar - top <- many $ satisfy (/='\n') + top <- manyChar $ satisfy (/='\n') <|> try (char '\n' <* notFollowedBy' (rawFieldListItem 1) <* many1 (char ' ') <* @@ -644,35 +644,33 @@ directive' = do else many $ rawFieldListItem fieldIndent body <- option "" $ try $ blanklines >> indentedBlock optional blanklines - let body' = body ++ "\n\n" + let body' = body <> "\n\n" name = trim $ fromMaybe "" (lookup "name" fields) - classes = words $ maybe "" trim (lookup "class" fields) + classes = T.words $ maybe "" trim (lookup "class" fields) keyvals = [(k, trim v) | (k, v) <- fields, k /= "name", k /= "class"] imgAttr cl = (name, classes ++ alignClasses, widthAttr ++ heightAttr) where - alignClasses = words $ maybe "" trim (lookup cl fields) ++ - maybe "" (\x -> "align-" ++ trim x) + alignClasses = T.words $ maybe "" trim (lookup cl fields) <> + maybe "" (\x -> "align-" <> trim x) (lookup "align" fields) scale = case trim <$> lookup "scale" fields of - Just v -> case reverse v of - '%':vv -> - case safeRead (reverse vv) of - Just (percent :: Double) - -> percent / 100.0 - Nothing -> 1.0 - _ -> - case safeRead v of - Just (s :: Double) -> s - Nothing -> 1.0 - Nothing -> 1.0 + Just v -> case T.unsnoc v of + Just (vv, '%') -> case safeRead vv of + Just (percent :: Double) + -> percent / 100.0 + Nothing -> 1.0 + _ -> case safeRead v of + Just (s :: Double) -> s + Nothing -> 1.0 + Nothing -> 1.0 widthAttr = maybe [] (\x -> [("width", - show $ scaleDimension scale x)]) + tshow $ scaleDimension scale x)]) $ lookup "width" fields >>= - (lengthToDim . filter (not . isSpace)) + (lengthToDim . T.filter (not . isSpace)) heightAttr = maybe [] (\x -> [("height", - show $ scaleDimension scale x)]) + tshow $ scaleDimension scale x)]) $ lookup "height" fields >>= - (lengthToDim . filter (not . isSpace)) + (lengthToDim . T.filter (not . isSpace)) case label of "include" -> includeDirective top fields body' "table" -> tableDirective top fields body' @@ -682,36 +680,37 @@ directive' = do "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (second trim) fields "container" -> B.divWith - (name, "container" : words top ++ classes, []) <$> + (name, "container" : T.words top ++ classes, []) <$> parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim top) + parseInlineFromText (trim top) "unicode" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim $ unicodeTransform top) + parseInlineFromText (trim $ unicodeTransform top) "compound" -> parseFromString' parseBlocks body' "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body' "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body' "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body' - "rubric" -> B.para . B.strong <$> parseInlineFromString top + "rubric" -> B.para . B.strong <$> parseInlineFromText top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning","admonition"] -> - do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + do bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body' let lab = case label of - "admonition" -> mempty - (l:ls) -> B.divWith ("",["title"],[]) - (B.para (B.str (toUpper l : ls))) - [] -> mempty + "admonition" -> mempty + (T.uncons -> Just (l, ls)) + -> B.divWith ("",["title"],[]) + (B.para (B.str $ T.cons (toUpper l) ls)) + _ -> mempty return $ B.divWith (name,label:classes,keyvals) (lab <> bod) "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields - tit <- B.para . B.strong <$> parseInlineFromString - (trim top ++ if null subtit + tit <- B.para . B.strong <$> parseInlineFromText + (trim top <> if T.null subtit then "" - else (": " ++ subtit)) + else (": " <> subtit)) bod <- parseFromString' parseBlocks body' return $ B.divWith (name,"sidebar":classes,keyvals) $ tit <> bod "topic" -> - do tit <- B.para . B.strong <$> parseInlineFromString top + do tit <- B.para . B.strong <$> parseInlineFromText top bod <- parseFromString' parseBlocks body' return $ B.divWith (name,"topic":classes,keyvals) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> @@ -726,7 +725,7 @@ directive' = do let attribs = (name, ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath - $ toChunks $ top ++ "\n\n" ++ body + $ toChunks $ top <> "\n\n" <> body "figure" -> do (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top @@ -742,7 +741,7 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = (name, words (trim top), map (second trimr) fields) + let attrs = (name, T.words (trim top), map (second trimr) fields) -- directive content or the first immediately following element children <- case body of "" -> block @@ -750,12 +749,12 @@ directive' = do return $ B.divWith attrs children other -> do pos <- getPosition - logMessage $ SkippedContent (".. " ++ other) pos - bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + logMessage $ SkippedContent (".. " <> other) pos + bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body' return $ B.divWith (name, other:classes, keyvals) bod tableDirective :: PandocMonad m - => String -> [(String, String)] -> String -> RSTParser m Blocks + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks tableDirective top fields body = do bs <- parseFromString' parseBlocks body case B.toList bs of @@ -770,7 +769,7 @@ tableDirective top fields body = do Just "grid" -> widths' Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) - $ splitBy (`elem` (" ," :: String)) specs + $ splitTextBy (`elem` (" ," :: String)) specs Nothing -> widths' -- align is not applicable since we can't represent whole table align return $ B.singleton $ Table (B.toList title) @@ -783,7 +782,7 @@ tableDirective top fields body = do -- since Pandoc doesn't support a table with multiple header rows. -- We don't need to parse :align: as it represents the whole table align. listTableDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks listTableDirective top fields body = do bs <- parseFromString' parseBlocks body @@ -799,7 +798,7 @@ listTableDirective top fields body = do widths = case trim <$> lookup "widths" fields of Just "auto" -> replicate numOfCols 0 Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ - splitBy (`elem` (" ," :: String)) specs + splitTextBy (`elem` (" ," :: String)) specs _ -> replicate numOfCols 0 return $ B.table title (zip (replicate numOfCols AlignDefault) widths) @@ -812,7 +811,7 @@ listTableDirective top fields body = do normWidths ws = map (/ max 1 (sum ws)) ws csvTableDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks csvTableDirective top fields rawcsv = do let explicitHeader = trim <$> lookup "header" fields @@ -820,14 +819,17 @@ csvTableDirective top fields rawcsv = do csvDelim = case trim <$> lookup "delim" fields of Just "tab" -> '\t' Just "space" -> ' ' - Just [c] -> c + Just (T.unpack -> [c]) + -> c _ -> ',' , csvQuote = case trim <$> lookup "quote" fields of - Just [c] -> c - _ -> '"' + Just (T.unpack -> [c]) + -> c + _ -> '"' , csvEscape = case trim <$> lookup "escape" fields of - Just [c] -> Just c - _ -> Nothing + Just (T.unpack -> [c]) + -> Just c + _ -> Nothing , csvKeepSpace = case trim <$> lookup "keepspace" fields of Just "true" -> True _ -> False @@ -840,16 +842,16 @@ csvTableDirective top fields rawcsv = do lookup "file" fields `mplus` lookup "url" fields of Just u -> do (bs, _) <- fetchItem u - return $ UTF8.toString bs + return $ UTF8.toText bs Nothing -> return rawcsv - let res = parseCSV opts (T.pack $ case explicitHeader of - Just h -> h ++ "\n" ++ rawcsv' - Nothing -> rawcsv') + let res = parseCSV opts (case explicitHeader of + Just h -> h <> "\n" <> rawcsv' + Nothing -> rawcsv') case res of Left e -> throwError $ PandocParsecError "csv table" e Right rawrows -> do - let parseCell = parseFromString' (plain <|> return mempty) . T.unpack + let parseCell = parseFromString' (plain <|> return mempty) let parseRow = mapM parseCell rows <- mapM parseRow rawrows let (headerRow,bodyRows,numOfCols) = @@ -865,7 +867,7 @@ csvTableDirective top fields rawcsv = do Just "auto" -> replicate numOfCols 0 Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) - $ splitBy (`elem` (" ," :: String)) specs + $ splitTextBy (`elem` (" ," :: String)) specs _ -> replicate numOfCols 0 return $ B.table title (zip (replicate numOfCols AlignDefault) widths) @@ -876,10 +878,10 @@ csvTableDirective top fields rawcsv = do -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: PandocMonad m - => String -> [(String, String)] -> RSTParser m Blocks -addNewRole roleString fields = do + => Text -> [(Text, Text)] -> RSTParser m Blocks +addNewRole roleText fields = do pos <- getPosition - (role, parentRole) <- parseFromString' inheritedRole roleString + (role, parentRole) <- parseFromString' inheritedRole roleText customRoles <- stateRstCustomRoles <$> getState let getBaseRole (r, f, a) roles = case M.lookup r roles of @@ -888,7 +890,7 @@ addNewRole roleString fields = do (baseRole, baseFmt, baseAttr) = getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt - annotate :: [String] -> [String] + annotate :: [Text] -> [Text] annotate = maybe id (:) $ if baseRole == "code" then lookup "language" fields @@ -904,7 +906,7 @@ addNewRole roleString fields = do pos "format" -> when (baseRole /= "raw") $ logMessage $ SkippedContent ":format: [because parent of role is not :raw:]" pos - _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos + _ -> logMessage $ SkippedContent (":" <> key <> ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ logMessage $ SkippedContent ":format: [after first in definition of role]" @@ -930,30 +932,29 @@ addNewRole roleString fields = do -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u -- or as XML-style hexadecimal character entities, e.g. ᨫ -- or text, which is used as-is. Comments start with .. -unicodeTransform :: String -> String -unicodeTransform t = - case t of - ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment - ('0':'x':xs) -> go "0x" xs - ('x':xs) -> go "x" xs - ('\\':'x':xs) -> go "\\x" xs - ('U':'+':xs) -> go "U+" xs - ('u':xs) -> go "u" xs - ('\\':'u':xs) -> go "\\u" xs - ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs) - -- drop semicolon - (\(c,s) -> c : unicodeTransform (drop 1 s)) - $ extractUnicodeChar xs - (x:xs) -> x : unicodeTransform xs - [] -> [] - where go pref zs = maybe (pref ++ unicodeTransform zs) - (\(c,s) -> c : unicodeTransform s) - $ extractUnicodeChar zs - -extractUnicodeChar :: String -> Maybe (Char, String) +unicodeTransform :: Text -> Text +unicodeTransform t + | Just xs <- T.stripPrefix ".." t = unicodeTransform $ T.dropWhile (/= '\n') xs -- comment + | Just xs <- T.stripPrefix "0x" t = go "0x" xs + | Just xs <- T.stripPrefix "x" t = go "x" xs + | Just xs <- T.stripPrefix "\\x" t = go "\\x" xs + | Just xs <- T.stripPrefix "U+" t = go "U+" xs + | Just xs <- T.stripPrefix "u" t = go "u" xs + | Just xs <- T.stripPrefix "\\u" t = go "\\u" xs + | Just xs <- T.stripPrefix "&#x" t = maybe ("&#x" <> unicodeTransform xs) + -- drop semicolon + (\(c,s) -> T.cons c $ unicodeTransform $ T.drop 1 s) + $ extractUnicodeChar xs + | Just (x, xs) <- T.uncons t = T.cons x $ unicodeTransform xs + | otherwise = "" + where go pref zs = maybe (pref <> unicodeTransform zs) + (\(c,s) -> T.cons c $ unicodeTransform s) + $ extractUnicodeChar zs + +extractUnicodeChar :: Text -> Maybe (Char, Text) extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc - where (ds,rest) = span isHexDigit s - mbc = safeRead ('\'':'\\':'x':ds ++ "'") + where (ds,rest) = T.span isHexDigit s + mbc = safeRead ("'\\x" <> ds <> "'") extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks) extractCaption = do @@ -963,16 +964,16 @@ extractCaption = do -- divide string by blanklines, and surround with -- \begin{aligned}...\end{aligned} if needed. -toChunks :: String -> [String] -toChunks = dropWhile null - . map (addAligned . trim . unlines) - . splitBy (all (`elem` (" \t" :: String))) . lines +toChunks :: Text -> [Text] +toChunks = dropWhile T.null + . map (addAligned . trim . T.unlines) + . splitBy (T.all (`elem` (" \t" :: String))) . T.lines -- we put this in an aligned environment if it contains \\, see #4254 - where addAligned s = if "\\\\" `isInfixOf` s - then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}" + where addAligned s = if "\\\\" `T.isInfixOf` s + then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}" else s -codeblock :: String -> [String] -> Maybe String -> String -> String -> Bool +codeblock :: Text -> [Text] -> Maybe Text -> Text -> Text -> Bool -> RSTParser m Blocks codeblock ident classes numberLines lang body rmTrailingNewlines = return $ B.codeBlockWith attribs $ stripTrailingNewlines' body @@ -984,7 +985,7 @@ codeblock ident classes numberLines lang body rmTrailingNewlines = : maybe [] (const ["numberLines"]) numberLines ++ classes kvs = maybe [] (\n -> case trimr n of - [] -> [] + "" -> [] xs -> [("startFrom", xs)]) numberLines @@ -992,25 +993,25 @@ codeblock ident classes numberLines lang body rmTrailingNewlines = --- note block --- -noteBlock :: Monad m => RSTParser m [Char] +noteBlock :: Monad m => RSTParser m Text noteBlock = try $ do (ref, raw, replacement) <- noteBlock' noteMarker updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s } -- return blanks so line count isn't affected return replacement -citationBlock :: Monad m => RSTParser m [Char] +citationBlock :: Monad m => RSTParser m Text citationBlock = try $ do (ref, raw, replacement) <- noteBlock' citationMarker updateState $ \s -> s { stateCitations = M.insert ref raw (stateCitations s), - stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[])) + stateKeys = M.insert (toKey ref) (("#" <> ref,""), ("",["citation"],[])) (stateKeys s) } -- return blanks so line count isn't affected return replacement noteBlock' :: Monad m - => RSTParser m String -> RSTParser m (String, String, String) + => RSTParser m Text -> RSTParser m (Text, Text, Text) noteBlock' marker = try $ do startPos <- getPosition string ".." @@ -1021,24 +1022,24 @@ noteBlock' marker = try $ do blanks <- option "" blanklines rest <- option "" indentedBlock endPos <- getPosition - let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" - let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n' + let raw = first <> "\n" <> blanks <> rest <> "\n" + let replacement = T.replicate (sourceLine endPos - sourceLine startPos) "\n" return (ref, raw, replacement) -citationMarker :: Monad m => RSTParser m [Char] +citationMarker :: Monad m => RSTParser m Text citationMarker = do char '[' res <- simpleReferenceName char ']' return res -noteMarker :: Monad m => RSTParser m [Char] +noteMarker :: Monad m => RSTParser m Text noteMarker = do char '[' - res <- many1 digit + res <- many1Char digit <|> - try (char '#' >> liftM ('#':) simpleReferenceName) - <|> count 1 (oneOf "#*") + try (char '#' >> liftM ("#" <>) simpleReferenceName) + <|> countChar 1 (oneOf "#*") char ']' return res @@ -1046,47 +1047,48 @@ noteMarker = do -- reference key -- -quotedReferenceName :: PandocMonad m => RSTParser m String +quotedReferenceName :: PandocMonad m => RSTParser m Text quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - manyTill anyChar (char '`') + manyTillChar anyChar (char '`') -- Simple reference names are single words consisting of alphanumerics -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName :: Monad m => ParserT [Char] st m String +simpleReferenceName :: Monad m => ParserT Text st m Text simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum <|> try (oneOf "-_:+." <* lookAhead alphaNum) - return (x:xs) + return $ T.pack (x:xs) -referenceName :: PandocMonad m => RSTParser m String +referenceName :: PandocMonad m => RSTParser m Text referenceName = quotedReferenceName <|> simpleReferenceName -referenceKey :: PandocMonad m => RSTParser m [Char] +referenceKey :: PandocMonad m => RSTParser m Text referenceKey = do startPos <- getPosition choice [substKey, anonymousKey, regularKey] optional blanklines endPos <- getPosition -- return enough blanks to replace key - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -targetURI :: Monad m => ParserT [Char] st m [Char] +targetURI :: Monad m => ParserT Text st m Text targetURI = do skipSpaces optional $ try $ newline >> notFollowedBy blankline contents <- trim <$> - many1 (satisfy (/='\n') + many1Char (satisfy (/='\n') <|> try (newline >> many1 spaceChar >> noneOf " \t\n")) blanklines - case reverse contents of - -- strip backticks - '_':'`':xs -> return (dropWhile (=='`') (reverse xs) ++ "_") - '_':_ -> return contents - _ -> return (escapeURI contents) + return $ stripBackticks contents + where + stripBackticks t + | Just xs <- T.stripSuffix "`_" t = T.dropWhile (=='`') xs <> "_" + | Just _ <- T.stripSuffix "_" t = t + | otherwise = escapeURI t substKey :: PandocMonad m => RSTParser m () substKey = try $ do @@ -1112,21 +1114,21 @@ anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition - let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) + let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos)) updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames :: PandocMonad m => RSTParser m [Text] referenceNames = do let rn = try $ do string ".. _" ref <- quotedReferenceName - <|> many ( noneOf ":\n" - <|> try (char '\n' <* - string " " <* - notFollowedBy blankline) - <|> try (char ':' <* lookAhead alphaNum) - ) + <|> manyChar ( noneOf ":\n" + <|> try (char '\n' <* + string " " <* + notFollowedBy blankline) + <|> try (char ':' <* lookAhead alphaNum) + ) char ':' return ref first <- rn @@ -1140,18 +1142,18 @@ regularKey = try $ do -- .. _goodbye: url.com refs <- referenceNames src <- targetURI - guard $ not (null src) + guard $ not (T.null src) let keys = map toKey refs forM_ keys $ \key -> updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef :: PandocMonad m => RSTParser m Text anchorDef = try $ do (refs, raw) <- withRaw $ try (referenceNames <* blanklines) forM_ refs $ \rawkey -> updateState $ \s -> s { stateKeys = - M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + M.insert (toKey rawkey) (("#" <> rawkey,""), nullAttr) $ stateKeys s } -- keep this for 2nd round of parsing, where we'll add the divs (anchor) return raw @@ -1174,12 +1176,12 @@ anchor = try $ do -- because it hides them from promoteHeader, see #4240 _ -> return $ foldr addDiv b refs -headerBlock :: PandocMonad m => RSTParser m [Char] +headerBlock :: PandocMonad m => RSTParser m Text headerBlock = do ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader') (ident,_,_) <- registerHeader nullAttr txt let key = toKey (stringify txt) - updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr) + updateState $ \s -> s { stateKeys = M.insert key (("#" <> ident,""), nullAttr) $ stateKeys s } return raw @@ -1201,13 +1203,13 @@ headerBlock = do -- Grid tables TODO: -- - column spans -dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int) +dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator @@ -1215,17 +1217,17 @@ simpleTableSep :: Monad m => Char -> RSTParser m Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: Monad m => RSTParser m [Char] +simpleTableFooter :: Monad m => RSTParser m Text simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text] simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine -simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [Text] simpleTableRawLineWithEmptyCell indices = try $ do cs <- simpleTableRawLine indices - let isEmptyCell = all (\c -> c == ' ' || c == '\t') + let isEmptyCell = T.all (\c -> c == ' ' || c == '\t') guard $ any isEmptyCell cs return cs @@ -1235,15 +1237,15 @@ simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices conLines <- many $ simpleTableRawLineWithEmptyCell indices - let cols = map unlines . transpose $ firstLine : conLines ++ - [replicate (length indices) "" - | not (null conLines)] + let cols = map T.unlines . transpose $ firstLine : conLines ++ + [replicate (length indices) "" + | not (null conLines)] mapM (parseFromString' parseBlocks) cols -simpleTableSplitLine :: [Int] -> String -> [String] +simpleTableSplitLine :: [Int] -> Text -> [Text] simpleTableSplitLine indices line = map trimr - $ tail $ splitByIndices (init indices) line + $ tail $ splitTextByIndices (init indices) line simpleTableHeader :: PandocMonad m => Bool -- ^ Headerless table @@ -1322,35 +1324,35 @@ inlineContent = choice [ whitespace , escapedChar , symbol ] <?> "inline content" -parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines -parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) +parseInlineFromText :: PandocMonad m => Text -> RSTParser m Inlines +parseInlineFromText = parseFromString' (trimInlines . mconcat <$> many inline) hyphens :: Monad m => RSTParser m Inlines hyphens = do - result <- many1 (char '-') + result <- many1Char (char '-') optional endline -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Monad m => ParserT [Char] st m Inlines +escapedChar :: Monad m => ParserT Text st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' || c == '\n' || c == '\r' -- '\ ' is null in RST then mempty - else B.str [c] + else B.str $ T.singleton c symbol :: Monad m => RSTParser m Inlines symbol = do result <- oneOf specialChars - return $ B.str [result] + return $ B.str $ T.singleton result -- parses inline code, between codeStart and codeEnd code :: Monad m => RSTParser m Inlines code = try $ do string "``" - result <- manyTill anyChar (try (string "``")) + result <- manyTillChar anyChar (try (string "``")) return $ B.code - $ trim $ unwords $ lines result + $ trim $ T.unwords $ T.lines result -- succeeds only if we're not right after a str (ie. in middle of word) atStart :: Monad m => RSTParser m a -> RSTParser m a @@ -1382,7 +1384,7 @@ interpretedRole = try $ do renderRole contents Nothing role nullAttr renderRole :: PandocMonad m - => String -> Maybe String -> String -> Attr -> RSTParser m Inlines + => Text -> Maybe Text -> Text -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ treatAsText contents "superscript" -> return $ B.superscript $ treatAsText contents @@ -1412,36 +1414,36 @@ renderRole contents fmt role attr = case role of contents where titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref - rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) - where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" - pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) - where padNo = replicate (4 - length pepNo) '0' ++ pepNo - pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + rfcLink rfcNo = B.link rfcUrl ("RFC " <> rfcNo) $ B.str ("RFC " <> rfcNo) + where rfcUrl = "http://www.faqs.org/rfcs/rfc" <> rfcNo <> ".html" + pepLink pepNo = B.link pepUrl ("PEP " <> pepNo) $ B.str ("PEP " <> pepNo) + where padNo = T.replicate (4 - T.length pepNo) "0" <> pepNo + pepUrl = "http://www.python.org/dev/peps/pep-" <> padNo <> "/" treatAsText = B.text . handleEscapes - handleEscapes [] = [] - handleEscapes ('\\':' ':cs) = handleEscapes cs - handleEscapes ('\\':c:cs) = c : handleEscapes cs - handleEscapes (c:cs) = c : handleEscapes cs + handleEscapes = T.concat . removeSpace . T.splitOn "\\" + where headSpace t = fromMaybe t $ T.stripPrefix " " t + removeSpace (x:xs) = x : map headSpace xs + removeSpace [] = [] -roleName :: PandocMonad m => RSTParser m String -roleName = many1 (letter <|> char '-') +roleName :: PandocMonad m => RSTParser m Text +roleName = many1Char (letter <|> char '-') -roleMarker :: PandocMonad m => RSTParser m String +roleMarker :: PandocMonad m => RSTParser m Text roleMarker = char ':' *> roleName <* char ':' -roleBefore :: PandocMonad m => RSTParser m (String,String) +roleBefore :: PandocMonad m => RSTParser m (Text,Text) roleBefore = try $ do role <- roleMarker contents <- unmarkedInterpretedText return (role,contents) -roleAfter :: PandocMonad m => RSTParser m (String,String) +roleAfter :: PandocMonad m => RSTParser m (Text,Text) roleAfter = try $ do contents <- unmarkedInterpretedText role <- roleMarker <|> (stateRstDefaultRole <$> getState) return (role,contents) -unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] +unmarkedInterpretedText :: PandocMonad m => RSTParser m Text unmarkedInterpretedText = try $ do atStart (char '`') contents <- mconcat <$> (many1 @@ -1453,7 +1455,7 @@ unmarkedInterpretedText = try $ do lookAhead (satisfy isAlphaNum)) )) char '`' - return contents + return $ T.pack contents whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" @@ -1461,7 +1463,7 @@ whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" str :: Monad m => RSTParser m Inlines str = do let strChar = noneOf ("\t\n " ++ specialChars) - result <- many1 strChar + result <- many1Char strChar updateLastStrPos return $ B.str result @@ -1489,7 +1491,7 @@ explicitLink = try $ do notFollowedBy (char '`') -- `` marks start of inline code label' <- trimInlines . mconcat <$> manyTill (notFollowedBy (char '`') >> inlineContent) (char '<') - src <- trim <$> manyTill (noneOf ">\n") (char '>') + src <- trim <$> manyTillChar (noneOf ">\n") (char '>') skipSpaces string "`_" optional $ char '_' -- anonymous form @@ -1501,22 +1503,22 @@ explicitLink = try $ do if isURI src then return ((src, ""), nullAttr) else - case reverse src of - '_':xs -> lookupKey [] (toKey (reverse xs)) - _ -> return ((src, ""), nullAttr) + case T.unsnoc src of + Just (xs, '_') -> lookupKey [] (toKey xs) + _ -> return ((src, ""), nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -citationName :: PandocMonad m => RSTParser m String +citationName :: PandocMonad m => RSTParser m Text citationName = do raw <- citationMarker - return $ "[" ++ raw ++ "]" + return $ "[" <> raw <> "]" referenceLink :: PandocMonad m => RSTParser m Inlines referenceLink = try $ do ref <- (referenceName <|> citationName) <* char '_' let label' = B.text ref - let isAnonKey (Key ('_':_)) = True - isAnonKey _ = False + let isAnonKey (Key (T.uncons -> Just ('_',_))) = True + isAnonKey _ = False state <- getState let keyTable = stateKeys state key <- option (toKey ref) $ @@ -1533,7 +1535,7 @@ referenceLink = try $ do -- We keep a list of oldkeys so we can detect lookup loops. lookupKey :: PandocMonad m - => [Key] -> Key -> RSTParser m ((String, String), Attr) + => [Key] -> Key -> RSTParser m ((Text, Text), Attr) lookupKey oldkeys key = do pos <- getPosition state <- getState @@ -1544,8 +1546,8 @@ lookupKey oldkeys key = do logMessage $ ReferenceNotFound key' pos return (("",""),nullAttr) -- check for keys of the form link_, which need to be resolved: - Just ((u@(c:_),""),_) | last u == '_', c /= '#' -> do - let rawkey = init u + Just ((u, ""),_) | T.length u > 1, T.last u == '_', T.head u /= '#' -> do + let rawkey = T.init u let newkey = toKey rawkey if newkey `elem` oldkeys then do @@ -1576,7 +1578,7 @@ subst = try $ do case M.lookup key substTable of Nothing -> do pos <- getPosition - logMessage $ ReferenceNotFound (show key) pos + logMessage $ ReferenceNotFound (tshow key) pos return mempty Just target -> return target diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 6519587c6..73122cc14 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -1,5 +1,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Roff Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane @@ -21,7 +23,7 @@ module Text.Pandoc.Readers.Roff , TableRow , RoffToken(..) , RoffTokens(..) - , linePartsToString + , linePartsToText , lexRoff ) where @@ -40,7 +42,7 @@ import qualified Data.Text as T import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Shared (safeRead, substitute) +import Text.Pandoc.Shared (safeRead) import Text.Parsec hiding (tokenPrim) import Text.Pandoc.RoffChar (characterCodes, combiningAccents) import qualified Data.Sequence as Seq @@ -60,28 +62,28 @@ data FontSpec = FontSpec{ fontBold :: Bool defaultFontSpec :: FontSpec defaultFontSpec = FontSpec False False False -data LinePart = RoffStr String +data LinePart = RoffStr T.Text | Font FontSpec | MacroArg Int deriving Show type Arg = [LinePart] -type TableOption = (String, String) +type TableOption = (T.Text, T.Text) data CellFormat = CellFormat { columnType :: Char , pipePrefix :: Bool , pipeSuffix :: Bool - , columnSuffixes :: [String] + , columnSuffixes :: [T.Text] } deriving (Show, Eq, Ord) type TableRow = ([CellFormat], [RoffTokens]) data RoffToken = TextLine [LinePart] | EmptyLine - | ControlLine String [Arg] SourcePos + | ControlLine T.Text [Arg] SourcePos | Tbl [TableOption] [TableRow] SourcePos deriving Show @@ -95,7 +97,7 @@ data RoffMode = NormalMode | CopyMode deriving Show -data RoffState = RoffState { customMacros :: M.Map String RoffTokens +data RoffState = RoffState { customMacros :: M.Map T.Text RoffTokens , prevFont :: FontSpec , currentFont :: FontSpec , tableTabChar :: Char @@ -121,10 +123,10 @@ instance Default RoffState where , afterConditional = False } -type RoffLexer m = ParserT [Char] RoffState m +type RoffLexer m = ParserT T.Text RoffState m -- --- Lexer: String -> RoffToken +-- Lexer: T.Text -> RoffToken -- eofline :: Stream s m Char => ParsecT s u m () @@ -133,11 +135,11 @@ eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") spacetab :: Stream s m Char => ParsecT s u m Char spacetab = char ' ' <|> char '\t' -characterCodeMap :: M.Map String Char +characterCodeMap :: M.Map T.Text Char characterCodeMap = M.fromList $ map (\(x,y) -> (y,x)) characterCodes -combiningAccentsMap :: M.Map String Char +combiningAccentsMap :: M.Map T.Text Char combiningAccentsMap = M.fromList $ map (\(x,y) -> (y,x)) combiningAccents @@ -151,43 +153,40 @@ escapeGlyph = do c <- lookAhead (oneOf ['[','(']) escapeArg >>= resolveGlyph c -resolveGlyph :: PandocMonad m => Char -> String -> RoffLexer m [LinePart] +resolveGlyph :: PandocMonad m => Char -> T.Text -> RoffLexer m [LinePart] resolveGlyph delimChar glyph = do - let cs = substitute "_u" " u" glyph -- unicode glyphs separated by _ - (case words cs of + let cs = T.replace "_u" " u" glyph -- unicode glyphs separated by _ + (case T.words cs of [] -> mzero [s] -> case M.lookup s characterCodeMap `mplus` readUnicodeChar s of Nothing -> mzero - Just c -> return [RoffStr [c]] + Just c -> return [RoffStr $ T.singleton c] (s:ss) -> do basechar <- case M.lookup s characterCodeMap `mplus` readUnicodeChar s of Nothing -> - case s of + case T.unpack s of [ch] | isAscii ch && isAlphaNum ch -> return ch _ -> mzero Just c -> return c - let addAccents [] xs = return $ T.unpack . - Normalize.normalize Normalize.NFC . - T.pack $ reverse xs + let addAccents [] xs = return $ Normalize.normalize Normalize.NFC $ + T.reverse xs addAccents (a:as) xs = case M.lookup a combiningAccentsMap `mplus` readUnicodeChar a of - Just x -> addAccents as (x:xs) + Just x -> addAccents as $ T.cons x xs Nothing -> mzero - addAccents ss [basechar] >>= \xs -> return [RoffStr xs]) + addAccents ss (T.singleton basechar) >>= \xs -> return [RoffStr xs]) <|> case delimChar of - '[' -> escUnknown ("\\[" ++ glyph ++ "]") - '(' -> escUnknown ("\\(" ++ glyph) - '\'' -> escUnknown ("\\C'" ++ glyph ++ "'") + '[' -> escUnknown ("\\[" <> glyph <> "]") + '(' -> escUnknown ("\\(" <> glyph) + '\'' -> escUnknown ("\\C'" <> glyph <> "'") _ -> Prelude.fail "resolveGlyph: unknown glyph delimiter" -readUnicodeChar :: String -> Maybe Char -readUnicodeChar ('u':cs@(_:_:_:_:_)) = - case safeRead ('0':'x':cs) of - Just i -> Just (chr i) - Nothing -> Nothing -readUnicodeChar _ = Nothing +readUnicodeChar :: T.Text -> Maybe Char +readUnicodeChar t = case T.uncons t of + Just ('u', cs) | T.length cs > 3 -> chr <$> safeRead ("0x" <> cs) + _ -> Nothing escapeNormal :: PandocMonad m => RoffLexer m [LinePart] escapeNormal = do @@ -218,14 +217,14 @@ escapeNormal = do NormalMode -> return [RoffStr "\\"] 'H' -> escIgnore 'H' [quoteArg] 'L' -> escIgnore 'L' [quoteArg] - 'M' -> escIgnore 'M' [escapeArg, count 1 (satisfy (/='\n'))] + 'M' -> escIgnore 'M' [escapeArg, countChar 1 (satisfy (/='\n'))] 'N' -> escIgnore 'N' [quoteArg] - 'O' -> escIgnore 'O' [count 1 (oneOf ['0','1'])] + 'O' -> escIgnore 'O' [countChar 1 (oneOf ['0','1'])] 'R' -> escIgnore 'R' [quoteArg] 'S' -> escIgnore 'S' [quoteArg] - 'V' -> escIgnore 'V' [escapeArg, count 1 alphaNum] + 'V' -> escIgnore 'V' [escapeArg, countChar 1 alphaNum] 'X' -> escIgnore 'X' [quoteArg] - 'Y' -> escIgnore 'Y' [escapeArg, count 1 (satisfy (/='\n'))] + 'Y' -> escIgnore 'Y' [escapeArg, countChar 1 (satisfy (/='\n'))] 'Z' -> escIgnore 'Z' [quoteArg] '\'' -> return [RoffStr "`"] '\n' -> return mempty -- line continuation @@ -238,12 +237,12 @@ escapeNormal = do 'd' -> escIgnore 'd' [] -- forward down 1/2em 'e' -> return [RoffStr "\\"] 'f' -> escFont - 'g' -> escIgnore 'g' [escapeArg, count 1 (satisfy (/='\n'))] + 'g' -> escIgnore 'g' [escapeArg, countChar 1 (satisfy (/='\n'))] 'h' -> escIgnore 'h' [quoteArg] - 'k' -> escIgnore 'k' [escapeArg, count 1 (satisfy (/='\n'))] + 'k' -> escIgnore 'k' [escapeArg, countChar 1 (satisfy (/='\n'))] 'l' -> escIgnore 'l' [quoteArg] - 'm' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))] - 'n' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))] + 'm' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))] + 'n' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))] 'o' -> escIgnore 'o' [quoteArg] 'p' -> escIgnore 'p' [] 'r' -> escIgnore 'r' [] @@ -253,7 +252,7 @@ escapeNormal = do 'v' -> escIgnore 'v' [quoteArg] 'w' -> escIgnore 'w' [quoteArg] 'x' -> escIgnore 'x' [quoteArg] - 'z' -> escIgnore 'z' [count 1 anyChar] + 'z' -> escIgnore 'z' [countChar 1 anyChar] '|' -> return [RoffStr "\x2006"] --1/6 em space '~' -> return [RoffStr "\160"] -- nonbreaking space '\\' -> do @@ -262,40 +261,40 @@ escapeNormal = do CopyMode -> char '\\' NormalMode -> return '\\' return [RoffStr "\\"] - _ -> return [RoffStr [c]] + _ -> return [RoffStr $ T.singleton c] -- man 7 groff: "If a backslash is followed by a character that -- does not constitute a defined escape sequence, the backslash -- is silently ignored and the character maps to itself." escIgnore :: PandocMonad m => Char - -> [RoffLexer m String] + -> [RoffLexer m T.Text] -> RoffLexer m [LinePart] escIgnore c argparsers = do pos <- getPosition arg <- snd <$> withRaw (choice argparsers) <|> return "" - report $ SkippedContent ('\\':c:arg) pos + report $ SkippedContent ("\\" <> T.cons c arg) pos return mempty -escUnknown :: PandocMonad m => String -> RoffLexer m [LinePart] +escUnknown :: PandocMonad m => T.Text -> RoffLexer m [LinePart] escUnknown s = do pos <- getPosition report $ SkippedContent s pos return [RoffStr "\xFFFD"] -signedNumber :: PandocMonad m => RoffLexer m String +signedNumber :: PandocMonad m => RoffLexer m T.Text signedNumber = try $ do sign <- option "" ("-" <$ char '-' <|> "" <$ char '+') - ds <- many1 digit - return (sign ++ ds) + ds <- many1Char digit + return (sign <> ds) -- Parses: [..] or (.. -escapeArg :: PandocMonad m => RoffLexer m String +escapeArg :: PandocMonad m => RoffLexer m T.Text escapeArg = choice [ char '[' *> optional expandString *> - manyTill (noneOf ['\n',']']) (char ']') + manyTillChar (noneOf ['\n',']']) (char ']') , char '(' *> optional expandString *> - count 2 (satisfy (/='\n')) + countChar 2 (satisfy (/='\n')) ] expandString :: PandocMonad m => RoffLexer m () @@ -303,21 +302,21 @@ expandString = try $ do pos <- getPosition char '\\' char '*' - cs <- escapeArg <|> count 1 anyChar - s <- linePartsToString <$> resolveString cs pos - getInput >>= setInput . (s ++) + cs <- escapeArg <|> countChar 1 anyChar + s <- linePartsToText <$> resolveText cs pos + getInput >>= setInput . (s <>) return () -- Parses: '..' -quoteArg :: PandocMonad m => RoffLexer m String -quoteArg = char '\'' *> manyTill (noneOf ['\n','\'']) (char '\'') +quoteArg :: PandocMonad m => RoffLexer m T.Text +quoteArg = char '\'' *> manyTillChar (noneOf ['\n','\'']) (char '\'') escFont :: PandocMonad m => RoffLexer m [LinePart] escFont = do - font <- escapeArg <|> count 1 alphaNum - font' <- if null font || font == "P" + font <- escapeArg <|> countChar 1 alphaNum + font' <- if T.null font || font == "P" then prevFont <$> getState - else return $ foldr processFontLetter defaultFontSpec font + else return $ foldr processFontLetter defaultFontSpec $ T.unpack font modifyState $ \st -> st{ prevFont = currentFont st , currentFont = font' } return [Font font'] @@ -345,7 +344,7 @@ lexMacro = do guard $ sourceColumn pos == 1 || afterConditional st char '.' <|> char '\'' skipMany spacetab - macroName <- many (satisfy isAlphaNum) + macroName <- manyChar (satisfy isAlphaNum) case macroName of "nop" -> return mempty "ie" -> lexConditional "ie" @@ -374,8 +373,8 @@ lexTable pos = do spaces opts <- try tableOptions <|> [] <$ optional (char ';') case lookup "tab" opts of - Just (c:_) -> modifyState $ \st -> st{ tableTabChar = c } - _ -> modifyState $ \st -> st{ tableTabChar = '\t' } + Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c } + _ -> modifyState $ \st -> st{ tableTabChar = '\t' } spaces skipMany lexComment spaces @@ -388,7 +387,7 @@ lexTable pos = do string ".TE" skipMany spacetab eofline - return $ singleTok $ Tbl opts (rows ++ concat morerows) pos + return $ singleTok $ Tbl opts (rows <> concat morerows) pos lexTableRows :: PandocMonad m => RoffLexer m [TableRow] lexTableRows = do @@ -428,11 +427,11 @@ tableOptions = many1 tableOption <* spaces <* char ';' tableOption :: PandocMonad m => RoffLexer m TableOption tableOption = do - k <- many1 letter + k <- many1Char letter v <- option "" $ try $ do skipMany spacetab char '(' - manyTill anyChar (char ')') + manyTillChar anyChar (char ')') skipMany spacetab optional (char ',' >> skipMany spacetab) return (k,v) @@ -444,7 +443,7 @@ tableFormatSpec = do let speclines = first:rest spaces char '.' - return $ speclines ++ repeat (lastDef [] speclines) -- last line is default + return $ speclines <> repeat (lastDef [] speclines) -- last line is default tableFormatSpecLine :: PandocMonad m => RoffLexer m [CellFormat] tableFormatSpecLine = @@ -456,19 +455,19 @@ tableColFormat = do $ True <$ try (string "|" <* notFollowedBy spacetab) c <- oneOf ['a','A','c','C','l','L','n','N','r','R','s','S','^','_','-', '=','|'] - suffixes <- many $ try (skipMany spacetab *> count 1 digit) <|> + suffixes <- many $ try (skipMany spacetab *> countChar 1 digit) <|> (do x <- oneOf ['b','B','d','D','e','E','f','F','i','I','m','M', 'p','P','t','T','u','U','v','V','w','W','x','X', 'z','Z'] num <- case toLower x of 'w' -> many1 digit <|> (do char '(' xs <- manyTill anyChar (char ')') - return ("(" ++ xs ++ ")")) <|> + return ("(" <> xs <> ")")) <|> return "" 'f' -> count 1 alphaNum <* skipMany spacetab 'm' -> count 1 alphaNum <* skipMany spacetab _ -> return "" - return $ x : num) + return $ T.pack $ x : num) pipeSuffix' <- option False $ True <$ string "|" return $ CellFormat { columnType = c @@ -479,7 +478,7 @@ tableColFormat = do -- We don't fully handle the conditional. But we do -- include everything under '.ie n', which occurs commonly -- in man pages. -lexConditional :: PandocMonad m => String -> RoffLexer m RoffTokens +lexConditional :: PandocMonad m => T.Text -> RoffLexer m RoffTokens lexConditional mname = do pos <- getPosition skipMany spacetab @@ -498,7 +497,7 @@ lexConditional mname = do case mbtest of Nothing -> do putState st -- reset state, so we don't record macros in skipped section - report $ SkippedContent ('.':mname) pos + report $ SkippedContent (T.cons '.' mname) pos return mempty Just True -> return ifPart Just False -> do @@ -508,7 +507,7 @@ lexConditional mname = do expression :: PandocMonad m => RoffLexer m (Maybe Bool) expression = do raw <- charsInBalanced '(' ')' (satisfy (/= '\n')) - <|> many1 nonspaceChar + <|> many1Char nonspaceChar returnValue $ case raw of "1" -> Just True @@ -533,17 +532,17 @@ lexIncludeFile args = do pos <- getPosition case args of (f:_) -> do - let fp = linePartsToString f + let fp = linePartsToText f dirs <- getResourcePath - result <- readFileFromDirs dirs fp + result <- readFileFromDirs dirs $ T.unpack fp case result of Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (s ++) + Just s -> getInput >>= setInput . (s <>) return mempty [] -> return mempty resolveMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> RoffLexer m RoffTokens + => T.Text -> [Arg] -> SourcePos -> RoffLexer m RoffTokens resolveMacro macroName args pos = do macros <- customMacros <$> getState case M.lookup macroName macros of @@ -552,7 +551,7 @@ resolveMacro macroName args pos = do let fillLP (MacroArg i) zs = case drop (i - 1) args of [] -> zs - (ys:_) -> ys ++ zs + (ys:_) -> ys <> zs fillLP z zs = z : zs let fillMacroArg (TextLine lineparts) = TextLine (foldr fillLP [] lineparts) @@ -565,7 +564,7 @@ lexStringDef args = do -- string definition [] -> Prelude.fail "No argument to .ds" (x:ys) -> do let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys) - let stringName = linePartsToString x + let stringName = linePartsToText x modifyState $ \st -> st{ customMacros = M.insert stringName ts (customMacros st) } return mempty @@ -575,14 +574,14 @@ lexMacroDef args = do -- macro definition modifyState $ \st -> st{ roffMode = CopyMode } (macroName, stopMacro) <- case args of - (x : y : _) -> return (linePartsToString x, linePartsToString y) + (x : y : _) -> return (linePartsToText x, linePartsToText y) -- optional second arg - (x:_) -> return (linePartsToString x, ".") + (x:_) -> return (linePartsToText x, ".") [] -> Prelude.fail "No argument to .de" let stop = try $ do char '.' <|> char '\'' skipMany spacetab - string stopMacro + textStr stopMacro _ <- lexArgs return () ts <- mconcat <$> manyTill manToken stop @@ -628,7 +627,7 @@ lexArgs = do char '"' return [RoffStr "\""] -checkDefined :: PandocMonad m => String -> RoffLexer m [LinePart] +checkDefined :: PandocMonad m => T.Text -> RoffLexer m [LinePart] checkDefined name = do macros <- customMacros <$> getState case M.lookup name macros of @@ -638,19 +637,19 @@ checkDefined name = do escString :: PandocMonad m => RoffLexer m [LinePart] escString = try $ do pos <- getPosition - (do cs <- escapeArg <|> count 1 anyChar - resolveString cs pos) + (do cs <- escapeArg <|> countChar 1 anyChar + resolveText cs pos) <|> mempty <$ char 'S' -- strings and macros share namespace -resolveString :: PandocMonad m - => String -> SourcePos -> RoffLexer m [LinePart] -resolveString stringname pos = do +resolveText :: PandocMonad m + => T.Text -> SourcePos -> RoffLexer m [LinePart] +resolveText stringname pos = do RoffTokens ts <- resolveMacro stringname [] pos case Foldable.toList ts of [TextLine xs] -> return xs _ -> do - report $ SkippedContent ("unknown string " ++ stringname) pos + report $ SkippedContent ("unknown string " <> stringname) pos return mempty lexLine :: PandocMonad m => RoffLexer m RoffTokens @@ -688,16 +687,16 @@ macroArg = try $ do pos <- getPosition backslash char '$' - x <- escapeArg <|> count 1 digit + x <- escapeArg <|> countChar 1 digit case safeRead x of Just i -> return [MacroArg i] Nothing -> do - report $ SkippedContent ("illegal macro argument " ++ x) pos + report $ SkippedContent ("illegal macro argument " <> x) pos return [] regularText :: PandocMonad m => RoffLexer m [LinePart] regularText = do - s <- many1 $ noneOf "\n\r\t \\\"" + s <- many1Char $ noneOf "\n\r\t \\\"" return [RoffStr s] quoteChar :: PandocMonad m => RoffLexer m [LinePart] @@ -708,7 +707,7 @@ quoteChar = do spaceTabChar :: PandocMonad m => RoffLexer m [LinePart] spaceTabChar = do c <- spacetab - return [RoffStr [c]] + return [RoffStr $ T.singleton c] lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens lexEmptyLine = newline >> return (singleTok EmptyLine) @@ -716,8 +715,8 @@ lexEmptyLine = newline >> return (singleTok EmptyLine) manToken :: PandocMonad m => RoffLexer m RoffTokens manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine -linePartsToString :: [LinePart] -> String -linePartsToString = mconcat . map go +linePartsToText :: [LinePart] -> T.Text +linePartsToText = mconcat . map go where go (RoffStr s) = s go _ = mempty @@ -726,7 +725,7 @@ linePartsToString = mconcat . map go lexRoff :: PandocMonad m => SourcePos -> T.Text -> m RoffTokens lexRoff pos txt = do eithertokens <- readWithM (do setPosition pos - mconcat <$> many manToken) def (T.unpack txt) + mconcat <$> many manToken) def txt case eithertokens of Left e -> throwError e Right tokenz -> return tokenz diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9796de4b9..d587bc41b 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | @@ -31,7 +32,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Text.Pandoc.Shared (crFilter) +import Text.Pandoc.Shared (crFilter, tshow) import Text.Pandoc.XML (fromEntities) -- | Read twiki from an input string and return a Pandoc document. @@ -41,19 +42,19 @@ readTWiki :: PandocMonad m -> m Pandoc readTWiki opts s = do res <- readWithM parseTWiki def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case res of Left e -> throwError e Right d -> return d -type TWParser = ParserT [Char] ParserState +type TWParser = ParserT Text ParserState -- -- utility functions -- -tryMsg :: String -> TWParser m a -> TWParser m a -tryMsg msg p = try p <?> msg +tryMsg :: Text -> TWParser m a -> TWParser m a +tryMsg msg p = try p <?> T.unpack msg nested :: PandocMonad m => TWParser m a -> TWParser m a nested p = do @@ -64,25 +65,25 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -htmlElement :: PandocMonad m => String -> TWParser m (Attr, String) +htmlElement :: PandocMonad m => Text -> TWParser m (Attr, Text) htmlElement tag = tryMsg tag $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) - content <- manyTill anyChar (endtag <|> endofinput) + content <- T.pack <$> manyTill anyChar (endtag <|> endofinput) return (htmlAttrToPandoc attr, trim content) where endtag = void $ htmlTag (~== TagClose tag) endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof - trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + trim = T.dropAround (=='\n') -htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc :: [Attribute Text] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs + classes = maybe [] T.words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] parseHtmlContentWithAttrs :: PandocMonad m - => String -> TWParser m a -> TWParser m (Attr, [a]) + => Text -> TWParser m a -> TWParser m (Attr, [a]) parseHtmlContentWithAttrs tag parser = do (attr, content) <- htmlElement tag parsedContent <- try $ parseContent content @@ -91,7 +92,13 @@ parseHtmlContentWithAttrs tag parser = do parseContent = parseFromString' $ nested $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof -parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] +parseCharHtmlContentWithAttrs :: PandocMonad m + => Text -> TWParser m Char -> TWParser m (Attr, Text) +parseCharHtmlContentWithAttrs tag = fmap go . parseHtmlContentWithAttrs tag + where + go (x, y) = (x, T.pack y) + +parseHtmlContent :: PandocMonad m => Text -> TWParser m a -> TWParser m [a] parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p -- @@ -113,7 +120,7 @@ block = do <|> blockElements <|> para skipMany blankline - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blockElements :: PandocMonad m => TWParser m B.Blocks @@ -150,38 +157,38 @@ literal = rawBlock <$> htmlElement "literal" format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) content -list :: PandocMonad m => String -> TWParser m B.Blocks +list :: PandocMonad m => Text -> TWParser m B.Blocks list prefix = choice [ bulletList prefix , orderedList prefix , definitionList prefix] -definitionList :: PandocMonad m => String -> TWParser m B.Blocks +definitionList :: PandocMonad m => Text -> TWParser m B.Blocks definitionList prefix = tryMsg "definitionList" $ do - indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ " - elements <- many $ parseDefinitionListItem (prefix ++ concat indent) + indent <- lookAhead $ textStr prefix *> many1 (textStr " ") <* textStr "$ " + elements <- many $ parseDefinitionListItem (prefix <> T.concat indent) return $ B.definitionList elements where parseDefinitionListItem :: PandocMonad m - => String -> TWParser m (B.Inlines, [B.Blocks]) + => Text -> TWParser m (B.Inlines, [B.Blocks]) parseDefinitionListItem indent = do - string (indent ++ "$ ") >> skipSpaces + textStr (indent <> "$ ") >> skipSpaces term <- many1Till inline $ string ": " line <- listItemLine indent $ string "$ " return (mconcat term, [line]) -bulletList :: PandocMonad m => String -> TWParser m B.Blocks +bulletList :: PandocMonad m => Text -> TWParser m B.Blocks bulletList prefix = tryMsg "bulletList" $ parseList prefix (char '*') (char ' ') -orderedList :: PandocMonad m => String -> TWParser m B.Blocks +orderedList :: PandocMonad m => Text -> TWParser m B.Blocks orderedList prefix = tryMsg "orderedList" $ parseList prefix (oneOf "1iIaA") (string ". ") parseList :: PandocMonad m - => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks + => Text -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks parseList prefix marker delim = do - (indent, style) <- lookAhead $ string prefix *> listStyle <* delim - blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) + (indent, style) <- lookAhead $ textStr prefix *> listStyle <* delim + blocks <- many $ parseListItem (prefix <> indent) (char style <* delim) return $ case style of '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks @@ -191,24 +198,24 @@ parseList prefix marker delim = do _ -> B.bulletList blocks where listStyle = do - indent <- many1 $ string " " + indent <- many1 $ textStr " " style <- marker - return (concat indent, style) + return (T.concat indent, style) parseListItem :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks -parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker + => Text -> TWParser m a -> TWParser m B.Blocks +parseListItem prefix marker = textStr prefix >> marker >> listItemLine prefix marker listItemLine :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks + => Text -> TWParser m a -> TWParser m B.Blocks listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent) where lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation - filterSpaces = reverse . dropWhile (== ' ') . reverse - listContinuation = notFollowedBy (string prefix >> marker) >> + return $ filterSpaces content <> "\n" <> maybe "" (" " <>) continuation + filterSpaces = T.dropWhileEnd (== ' ') + listContinuation = notFollowedBy (textStr prefix >> marker) >> string " " >> lineContent parseContent = parseFromString' $ many1 $ nestedList <|> parseInline parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList) @@ -352,29 +359,29 @@ macroWithParameters = try $ do char '%' return $ buildSpan name kvs $ B.str content -buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines +buildSpan :: Text -> [(Text, Text)] -> B.Inlines -> B.Inlines buildSpan className kvs = B.spanWith attrs where attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) - additionalClasses = maybe [] words $ lookup "class" kvs + additionalClasses = maybe [] T.words $ lookup "class" kvs kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] -macroName :: PandocMonad m => TWParser m String +macroName :: PandocMonad m => TWParser m Text macroName = do first <- letter rest <- many $ alphaNum <|> char '_' - return (first:rest) + return $ T.pack $ first:rest -attributes :: PandocMonad m => TWParser m (String, [(String, String)]) -attributes = foldr (either mkContent mkKvs) ([], []) +attributes :: PandocMonad m => TWParser m (Text, [(Text, Text)]) +attributes = foldr (either mkContent mkKvs) ("", []) <$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}') where spnl = skipMany (spaceChar <|> newline) - mkContent c ([], kvs) = (c, kvs) - mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) + mkContent c ("", kvs) = (c, kvs) + mkContent c (rest, kvs) = (c <> " " <> rest, kvs) mkKvs kv (cont, rest) = (cont, kv : rest) -attribute :: PandocMonad m => TWParser m (Either String (String, String)) +attribute :: PandocMonad m => TWParser m (Either Text (Text, Text)) attribute = withKey <|> withoutKey where withKey = try $ do @@ -383,10 +390,10 @@ attribute = withKey <|> withoutKey curry Right key <$> parseValue False withoutKey = try $ Left <$> parseValue True parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces) - withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) + withQuotes = between (char '"') (char '"') (\_ -> countChar 1 $ noneOf ['"']) withoutQuotes allowSpaces - | allowSpaces = many1 $ noneOf "}" - | otherwise = many1 $ noneOf " }" + | allowSpaces = many1Char $ noneOf "}" + | otherwise = many1Char $ noneOf " }" nestedInlines :: (Show a, PandocMonad m) => TWParser m a -> TWParser m B.Inlines @@ -413,10 +420,10 @@ emphHtml :: PandocMonad m => TWParser m B.Inlines emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) nestedString :: (Show a, PandocMonad m) - => TWParser m a -> TWParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar + => TWParser m a -> TWParser m Text +nestedString end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ many1Char spaceChar <* notFollowedBy end boldCode :: PandocMonad m => TWParser m B.Inlines boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString @@ -429,7 +436,7 @@ code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString codeHtml :: PandocMonad m => TWParser m B.Inlines codeHtml = do - (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + (attrs, content) <- parseCharHtmlContentWithAttrs "code" anyChar return $ B.codeWith attrs $ fromEntities content autoLink :: PandocMonad m => TWParser m B.Inlines @@ -437,7 +444,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- parseLink - guard $ checkLink (last url) + guard $ checkLink (T.last url) return $ makeLink (text, url) where parseLink = notFollowedBy nop >> (uri <|> emailAddress) @@ -447,17 +454,17 @@ autoLink = try $ do | otherwise = isAlphaNum c str :: PandocMonad m => TWParser m B.Inlines -str = B.str <$> (many1 alphaNum <|> count 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) nop :: PandocMonad m => TWParser m B.Inlines nop = try $ (void exclamation <|> void nopTag) >> followContent where exclamation = char '!' nopTag = stringAnyCase "<nop>" - followContent = B.str . fromEntities <$> many1 nonspaceChar + followContent = B.str . fromEntities <$> many1Char nonspaceChar symbol :: PandocMonad m => TWParser m B.Inlines -symbol = B.str <$> count 1 nonspaceChar +symbol = B.str <$> countChar 1 nonspaceChar smart :: PandocMonad m => TWParser m B.Inlines smart = do @@ -491,13 +498,13 @@ link = try $ do setState $ st{ stateAllowLinks = True } return $ B.link url title content -linkText :: PandocMonad m => TWParser m (String, String, B.Inlines) +linkText :: PandocMonad m => TWParser m (Text, Text, B.Inlines) linkText = do string "[[" - url <- many1Till anyChar (char ']') + url <- T.pack <$> many1Till anyChar (char ']') content <- option (B.str url) (mconcat <$> linkContent) char ']' return (url, "", content) where - linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent + linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent . T.pack parseLinkContent = parseFromString' $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index a638fdf40..5e7aaf910 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Textile Copyright : Copyright (C) 2010-2012 Paul Rivier @@ -38,7 +40,7 @@ import Prelude import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import Data.Char (digitToInt, isUpper) -import Data.List (intercalate, intersperse, transpose) +import Data.List (intersperse, transpose) import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup (Tag (..), fromAttrib) @@ -52,7 +54,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (crFilter, trim, underlineSpan) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow) -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m @@ -61,21 +63,21 @@ readTextile :: PandocMonad m -> m Pandoc readTextile opts s = do parsed <- readWithM parseTextile def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e -- | Generate a Pandoc ADT from a textile document -parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc +parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc parseTextile = do many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys/notes were... let firstPassParser = noteBlock <|> lineClump - manyTill firstPassParser eof >>= setInput . concat + manyTill firstPassParser eof >>= setInput . T.concat setPosition startPos st' <- getState let reversedNotes = stateNotes st' @@ -84,29 +86,29 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc nullMeta (B.toList blocks) -- FIXME -noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char] -noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') +noteMarker :: PandocMonad m => ParserT Text ParserState m Text +noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.') -noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char] +noteBlock :: PandocMonad m => ParserT Text ParserState m Text noteBlock = try $ do startPos <- getPosition ref <- noteMarker optional blankline - contents <- unlines <$> many1Till anyLine (blanklines <|> noteBlock) + contents <- T.unlines <$> many1Till anyLine (blanklines <|> noteBlock) endPos <- getPosition - let newnote = (ref, contents ++ "\n") + let newnote = (ref, contents <> "\n") st <- getState let oldnotes = stateNotes st updateState $ \s -> s { stateNotes = newnote : oldnotes } -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -- | Parse document blocks -parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks +parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks] +blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -121,22 +123,22 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: PandocMonad m => ParserT [Char] ParserState m Blocks +block :: PandocMonad m => ParserT Text ParserState m Blocks block = do res <- choice blockParsers <?> "block" - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res -commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlockBc :: PandocMonad m => ParserT Text ParserState m Blocks codeBlockBc = try $ do string "bc." extended <- option False (True <$ char '.') @@ -150,31 +152,31 @@ codeBlockBc = try $ do rest <- many (notFollowedBy ender *> anyLine) return (f:rest) else manyTill anyLine blanklines - return $ B.codeBlock (trimTrailingNewlines (unlines contents)) + return $ B.codeBlock (trimTrailingNewlines (T.unlines contents)) -trimTrailingNewlines :: String -> String -trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse +trimTrailingNewlines :: Text -> Text +trimTrailingNewlines = T.dropWhileEnd (=='\n') -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlockPre :: PandocMonad m => ParserT Text ParserState m Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) - result' <- manyTill anyChar (htmlTag (tagClose (=="pre"))) + result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre"))) -- drop leading newline if any - let result'' = case result' of - '\n':xs -> xs - _ -> result' + let result'' = case T.uncons result' of + Just ('\n', xs) -> xs + _ -> result' -- drop trailing newline if any - let result''' = case reverse result'' of - '\n':_ -> init result'' - _ -> result'' - let classes = words $ fromAttrib "class" t + let result''' = case T.unsnoc result'' of + Just (xs, '\n') -> xs + _ -> result'' + let classes = T.words $ fromAttrib "class" t let ident = fromAttrib "id" t let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: PandocMonad m => ParserT [Char] ParserState m Blocks +header :: PandocMonad m => ParserT Text ParserState m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -186,14 +188,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks +blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: PandocMonad m => ParserT [Char] st m Blocks +hrule :: PandocMonad m => ParserT Text st m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -208,39 +210,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks +anyList :: PandocMonad m => ParserT Text ParserState m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|> @@ -250,25 +252,25 @@ genericListItemAtDepth c depth = try $ do return $ contents <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks +definitionList :: PandocMonad m => ParserT Text ParserState m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: PandocMonad m => ParserT [Char] ParserState m () +listStart :: PandocMonad m => ParserT Text ParserState m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: PandocMonad m => Char -> ParserT [Char] st m () +genericListStart :: PandocMonad m => Char -> ParserT Text st m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: PandocMonad m => ParserT [Char] ParserState m () +basicDLStart :: PandocMonad m => ParserT Text ParserState m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines +definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -281,26 +283,26 @@ definitionListStart = try $ do -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks]) definitionListItem = try $ do term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] + where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline - multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] + multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] multilineDef = try $ do optional whitespace >> newline - s <- many1Till anyChar (try (string "=:" >> newline)) - -- this ++ "\n\n" does not look very good - ds <- parseFromString' parseBlocks (s ++ "\n\n") + s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline)) + -- this <> "\n\n" does not look very good + ds <- parseFromString' parseBlocks (s <> "\n\n") return [ds] -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -308,14 +310,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks +rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: PandocMonad m => ParserT [Char] ParserState m Blocks +para :: PandocMonad m => ParserT Text ParserState m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -326,7 +328,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment) +cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -339,18 +341,18 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' (isHeader, alignment) <- option (False, AlignDefault) cellAttributes notFollowedBy blankline - raw <- trim <$> + raw <- trim . T.pack <$> many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) content <- mconcat <$> parseFromString' (many inline) raw return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -360,7 +362,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: PandocMonad m => ParserT [Char] ParserState m Blocks +table :: PandocMonad m => ParserT Text ParserState m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -384,7 +386,7 @@ table = try $ do (map (map snd) rows) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: PandocMonad m => ParserT [Char] ParserState m () +ignorableRow :: PandocMonad m => ParserT Text ParserState m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -393,9 +395,9 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m () +explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m () explicitBlockStart name = try $ do - string name + string (T.unpack name) attributes char '.' optional whitespace @@ -404,9 +406,9 @@ explicitBlockStart name = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: PandocMonad m - => String -- ^ block tag name - -> ParserT [Char] ParserState m Blocks -- ^ implicit block - -> ParserT [Char] ParserState m Blocks + => Text -- ^ block tag name + -> ParserT Text ParserState m Blocks -- ^ implicit block + -> ParserT Text ParserState m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -419,11 +421,11 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: PandocMonad m => ParserT [Char] ParserState m Inlines +inline :: PandocMonad m => ParserT Text ParserState m Inlines inline = choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines] +inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines] inlineParsers = [ str , whitespace , endline @@ -437,13 +439,13 @@ inlineParsers = [ str , link , image , mark - , (B.str . (:[])) <$> characterReference + , (B.str . T.singleton) <$> characterReference , smartPunctuation inline , symbol ] -- | Inline markups -inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines +inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph @@ -457,33 +459,33 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: PandocMonad m => ParserT [Char] st m Inlines +mark :: PandocMonad m => ParserT Text st m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: PandocMonad m => ParserT [Char] st m Inlines +reg :: PandocMonad m => ParserT Text st m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: PandocMonad m => ParserT [Char] st m Inlines +tm :: PandocMonad m => ParserT Text st m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: PandocMonad m => ParserT [Char] st m Inlines +copy :: PandocMonad m => ParserT Text st m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: PandocMonad m => ParserT [Char] ParserState m Inlines +note :: PandocMonad m => ParserT Text ParserState m Inlines note = try $ do ref <- char '[' *> many1 digit <* char ']' notes <- stateNotes <$> getState - case lookup ref notes of + case lookup (T.pack ref) notes of Nothing -> Prelude.fail "note not found" Just raw -> B.note <$> parseFromString' parseBlocks raw @@ -500,42 +502,42 @@ stringBreakers :: [Char] stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]" wordBoundaries :: [Char] -wordBoundaries = markupChars ++ stringBreakers +wordBoundaries = markupChars <> stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String +hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) - return $ intercalate "-" (x:xs) + return $ T.intercalate "-" (x:xs) -wordChunk :: PandocMonad m => ParserT [Char] ParserState m String +wordChunk :: PandocMonad m => ParserT Text ParserState m Text wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( noneOf wordBoundaries <|> try (notFollowedBy' note *> oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) - return $ hd:tl + return $ T.pack $ hd:tl -- | Any string -str :: PandocMonad m => ParserT [Char] ParserState m Inlines +str :: PandocMonad m => ParserT Text ParserState m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediately -- followed by parens, parens content is unconditionally word acronym fullStr <- option baseStr $ try $ do - guard $ all isUpper baseStr - acro <- enclosed (char '(') (char ')') anyChar' - return $ concat [baseStr, " (", acro, ")"] + guard $ T.all isUpper baseStr + acro <- T.pack <$> enclosed (char '(') (char ')') anyChar' + return $ T.concat [baseStr, " (", acro, ")"] updateLastStrPos return $ B.str fullStr -- | Some number of space chars -whitespace :: PandocMonad m => ParserT [Char] st m Inlines +whitespace :: PandocMonad m => ParserT Text st m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: PandocMonad m => ParserT [Char] ParserState m Inlines +endline :: PandocMonad m => ParserT Text ParserState m Inlines endline = try $ do newline notFollowedBy blankline @@ -543,18 +545,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines +rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines +rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: PandocMonad m => ParserT [Char] ParserState m Inlines +link :: PandocMonad m => ParserT Text ParserState m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -567,121 +569,122 @@ link = try $ do else lookAhead $ space <|> eof' <|> try (oneOf "!.,;:" *> (space <|> newline <|> eof')) - url <- many1Till nonspaceChar stop + url <- T.pack <$> many1Till nonspaceChar stop let name' = if B.toList name == [Str "$"] then B.str url else name return $ if attr == nullAttr then B.link url "" name' else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: PandocMonad m => ParserT [Char] ParserState m Inlines +image :: PandocMonad m => ParserT Text ParserState m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes let attr = case lookup "style" kvs of Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls) Nothing -> (ident, cls, kvs) - src <- many1 (noneOf " \t\n\r!(") - alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')') + src <- T.pack <$> many1 (noneOf " \t\n\r!(") + alt <- fmap T.pack $ option "" $ try $ char '(' *> manyTill anyChar (char ')') char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines +escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedEqs = B.str <$> +escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines +escapedEqs = B.str . T.pack <$> try (string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedTag = B.str <$> +escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines +escapedTag = B.str . T.pack <$> try (string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines -symbol = B.str . singleton <$> (notFollowedBy newline *> - notFollowedBy rawHtmlBlock *> - oneOf wordBoundaries) +symbol :: PandocMonad m => ParserT Text ParserState m Inlines +symbol = B.str . T.singleton <$> (notFollowedBy newline *> + notFollowedBy rawHtmlBlock *> + oneOf wordBoundaries) -- | Inline code -code :: PandocMonad m => ParserT [Char] ParserState m Inlines +code :: PandocMonad m => ParserT Text ParserState m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char +anyChar' :: PandocMonad m => ParserT Text ParserState m Char anyChar' = satisfy (/='\n') <|> try (char '\n' <* notFollowedBy blankline) -code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines -code1 = B.code <$> surrounded (char '@') anyChar' +code1 :: PandocMonad m => ParserT Text ParserState m Inlines +code1 = B.code . T.pack <$> surrounded (char '@') anyChar' -code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines +code2 :: PandocMonad m => ParserT Text ParserState m Inlines code2 = do htmlTag (tagOpen (=="tt") null) - B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) + B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: PandocMonad m => ParserT [Char] ParserState m Attr +attributes :: PandocMonad m => ParserT Text ParserState m Attr attributes = foldl (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> ("right" <$ char '>') <|> ("left" <$ char '<') notFollowedBy spaceChar - return $ addStyle ("text-align:" ++ alignStr) + return $ addStyle $ T.pack $ "text-align:" ++ alignStr -attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' - ws <- words `fmap` manyTill anyChar' (char ')') + ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')') case reverse ws of - [] -> return $ \(_,_,keyvals) -> ("",[],keyvals) - (('#':ident'):classes') -> return $ \(_,_,keyvals) -> - (ident',classes',keyvals) - classes' -> return $ \(_,_,keyvals) -> - ("",classes',keyvals) - -styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) + [] + -> return $ \(_,_,keyvals) -> ("",[],keyvals) + ((T.uncons -> Just ('#', ident')):classes') + -> return $ \(_,_,keyvals) -> (ident',classes',keyvals) + classes' + -> return $ \(_,_,keyvals) -> ("",classes',keyvals) + +styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' - return $ addStyle style + return $ addStyle $ T.pack style -addStyle :: String -> Attr -> Attr +addStyle :: Text -> Attr -> Attr addStyle style (id',classes,keyvals) = (id',classes,keyvals') where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] - style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals] + style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals] -langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum - return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) + return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals) -- | Parses material surrounded by a parser. surrounded :: (PandocMonad m, Show t) - => ParserT [Char] st m t -- ^ surrounding parser - -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) - -> ParserT [Char] st m [a] + => ParserT Text st m t -- ^ surrounding parser + -> ParserT Text st m a -- ^ content parser (to be used repeatedly) + -> ParserT Text st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: PandocMonad m - => ParserT [Char] ParserState m t -- ^ surrounding parser + => ParserT Text ParserState m t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor - -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly) + -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -695,7 +698,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines +groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace @@ -704,9 +707,5 @@ groupedInlineMarkup = try $ do char ']' return $ sp1 <> result <> sp2 --- | Create a singleton list -singleton :: a -> [a] -singleton x = [x] - -eof' :: Monad m => ParserT [Char] s m Char +eof' :: Monad m => ParserT Text s m Char eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index 5daf6b0bb..501c204f5 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -43,19 +43,19 @@ readTikiWiki :: PandocMonad m -> m Pandoc readTikiWiki opts s = do res <- readWithM parseTikiWiki def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case res of Left e -> throwError e Right d -> return d -type TikiWikiParser = ParserT [Char] ParserState +type TikiWikiParser = ParserT Text ParserState -- -- utility functions -- -tryMsg :: String -> TikiWikiParser m a -> TikiWikiParser m a -tryMsg msg p = try p <?> msg +tryMsg :: Text -> TikiWikiParser m a -> TikiWikiParser m a +tryMsg msg p = try p <?> (T.unpack msg) skip :: TikiWikiParser m a -> TikiWikiParser m () skip parser = Control.Monad.void parser @@ -89,7 +89,7 @@ block = do <|> para skipMany blankline when (verbosity >= INFO) $ - trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) + trace (T.pack $ printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) return res blockElements :: PandocMonad m => TikiWikiParser m B.Blocks @@ -133,7 +133,7 @@ tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks] tableRow = try $ do -- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n")) -- return $ map (B.plain . mconcat) row - row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n")) + row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn . T.pack) (try $ string "|" <* notFollowedBy (oneOf "|\n")) return $ map B.plain row where parseColumn x = do @@ -342,15 +342,15 @@ listItemLine nest = lineContent >>= parseContent lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ Data.Maybe.fromMaybe "" continuation - filterSpaces = reverse . dropWhile (== ' ') . reverse + return $ filterSpaces content <> "\n" <> Data.Maybe.fromMaybe "" continuation + filterSpaces = T.dropWhileEnd (== ' ') listContinuation = string (replicate nest '+') >> lineContent parseContent x = do parsed <- parseFromString (many1 inline) x return $ mconcat $ dropWhileEnd (== B.space) parsed -- Turn the CODE macro attributes into Pandoc code block attributes. -mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)]) +mungeAttrs :: [(Text, Text)] -> (Text, [Text], [(Text, Text)]) mungeAttrs rawAttrs = ("", classes, rawAttrs) where -- "colors" is TikiWiki CODE macro for "name of language to do @@ -370,7 +370,7 @@ codeMacro = try $ do string "{CODE(" rawAttrs <- macroAttrs string ")}" - body <- manyTill anyChar (try (string "{CODE}")) + body <- T.pack <$> manyTill anyChar (try (string "{CODE}")) newline if not (null rawAttrs) then @@ -428,9 +428,9 @@ nbsp = try $ do htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines htmlComment = try $ do string "~hc~" - inner <- many1 $ noneOf "~" + inner <- fmap T.pack $ many1 $ noneOf "~" string "~/hc~" - return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END " + return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " <> inner <> " ~/hc~ :END " linebreak :: PandocMonad m => TikiWikiParser m B.Inlines linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) @@ -469,15 +469,15 @@ image = try $ do let title = fromMaybe src $ lookup "desc" rawAttrs let alt = fromMaybe title $ lookup "alt" rawAttrs let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs - if not (null src) + if not (T.null src) then return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt) else - return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END " + return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " <> printAttrs rawAttrs <> "} :END " where - printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs + printAttrs attrs = T.unwords $ map (\(a, b) -> a <> "=\"" <> b <> "\"") attrs -imageAttr :: PandocMonad m => TikiWikiParser m (String, String) +imageAttr :: PandocMonad m => TikiWikiParser m (Text, Text) imageAttr = try $ do key <- many1 (noneOf "=} \t\n") char '=' @@ -485,7 +485,7 @@ imageAttr = try $ do value <- many1 (noneOf "}\"\n") optional $ char '"' optional $ char ',' - return (key, value) + return (T.pack key, T.pack value) -- __strong__ @@ -500,57 +500,57 @@ emph = try $ fmap B.emph (enclosed (string "''") nestedInlines) escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines escapedChar = try $ do string "~" - mNumber <- safeRead <$> many1 digit + mNumber <- safeRead . T.pack <$> many1 digit string "~" return $ B.str $ case mNumber of - Just number -> [toEnum (number :: Int) :: Char] - Nothing -> [] + Just number -> T.singleton $ toEnum (number :: Int) + Nothing -> "" -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this centered :: PandocMonad m => TikiWikiParser m B.Inlines centered = try $ do string "::" - inner <- many1 $ noneOf ":\n" + inner <- fmap T.pack $ many1 $ noneOf ":\n" string "::" - return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END " + return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" <> inner <> ":: :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this colored :: PandocMonad m => TikiWikiParser m B.Inlines colored = try $ do string "~~" - inner <- many1 $ noneOf "~\n" + inner <- fmap T.pack $ many1 $ noneOf "~\n" string "~~" - return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END " + return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" <> inner <> "~~ :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this underlined :: PandocMonad m => TikiWikiParser m B.Inlines underlined = try $ do string "===" - inner <- many1 $ noneOf "=\n" + inner <- fmap T.pack $ many1 $ noneOf "=\n" string "===" - return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END " + return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" <> inner <> "=== :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this boxed :: PandocMonad m => TikiWikiParser m B.Inlines boxed = try $ do string "^" - inner <- many1 $ noneOf "^\n" + inner <- fmap T.pack $ many1 $ noneOf "^\n" string "^" - return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END " + return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" <> inner <> "^ :END " -- --text-- strikeout :: PandocMonad m => TikiWikiParser m B.Inlines strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines) -nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar +nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m Text +nestedString end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ T.pack <$> many1 spaceChar <* notFollowedBy end breakChars :: PandocMonad m => TikiWikiParser m B.Inlines breakChars = try $ string "%%%" >> return B.linebreak @@ -564,7 +564,7 @@ superMacro = try $ do string "{SUP(" manyTill anyChar (string ")}") body <- manyTill anyChar (string "{SUP}") - return $ B.superscript $ B.text body + return $ B.superscript $ B.text $ T.pack body -- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux subTag :: PandocMonad m => TikiWikiParser m B.Inlines @@ -575,22 +575,22 @@ subMacro = try $ do string "{SUB(" manyTill anyChar (string ")}") body <- manyTill anyChar (string "{SUB}") - return $ B.subscript $ B.text body + return $ B.subscript $ B.text $ T.pack body -- -+text+- code :: PandocMonad m => TikiWikiParser m B.Inlines code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString) -macroAttr :: PandocMonad m => TikiWikiParser m (String, String) +macroAttr :: PandocMonad m => TikiWikiParser m (Text, Text) macroAttr = try $ do key <- many1 (noneOf "=)") char '=' optional $ char '"' value <- many1 (noneOf " )\"") optional $ char '"' - return (key, value) + return (T.pack key, T.pack value) -macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)] +macroAttrs :: PandocMonad m => TikiWikiParser m [(Text, Text)] macroAttrs = try $ sepEndBy macroAttr spaces -- ~np~ __not bold__ ~/np~ @@ -598,13 +598,13 @@ noparse :: PandocMonad m => TikiWikiParser m B.Inlines noparse = try $ do string "~np~" body <- manyTill anyChar (string "~/np~") - return $ B.str body + return $ B.str $ T.pack body str :: PandocMonad m => TikiWikiParser m B.Inlines -str = fmap B.str (many1 alphaNum <|> count 1 characterReference) +str = fmap B.str (T.pack <$> many1 alphaNum <|> countChar 1 characterReference) symbol :: PandocMonad m => TikiWikiParser m B.Inlines -symbol = fmap B.str (count 1 nonspaceChar) +symbol = fmap B.str (countChar 1 nonspaceChar) -- [[not a link] notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines @@ -612,14 +612,14 @@ notExternalLink = try $ do start <- string "[[" body <- many (noneOf "\n[]") end <- string "]" - return $ B.text (start ++ body ++ end) + return $ B.text $ T.pack $ start ++ body ++ end -- [http://www.somesite.org url|Some Site title] -- ((internal link)) -- -- The ((...)) wiki links and [...] external links are handled -- exactly the same; this abstracts that out -makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines +makeLink :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m B.Inlines makeLink start middle end = try $ do st <- getState guard $ stateAllowLinks st @@ -627,15 +627,15 @@ makeLink start middle end = try $ do (url, title, anchor) <- wikiLinkText start middle end parsedTitle <- parseFromString (many1 inline) title setState $ st{ stateAllowLinks = True } - return $ B.link (url++anchor) "" $mconcat parsedTitle + return $ B.link (url <> anchor) "" $ mconcat parsedTitle -wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String) +wikiLinkText :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m (Text, Text, Text) wikiLinkText start middle end = do - string start - url <- many1 (noneOf $ middle ++ "\n") + string (T.unpack start) + url <- T.pack <$> many1 (noneOf $ T.unpack middle ++ "\n") seg1 <- option url linkContent seg2 <- option "" linkContent - string end + string (T.unpack end) if seg2 /= "" then return (url, seg2, seg1) @@ -644,7 +644,7 @@ wikiLinkText start middle end = do where linkContent = do char '|' - many (noneOf middle) + T.pack <$> many (noneOf $ T.unpack middle) externalLink :: PandocMonad m => TikiWikiParser m B.Inlines externalLink = makeLink "[" "]|" "]" diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 0af52e046..996a818fd 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Txt2Tags Copyright : Copyright (C) 2014 Matthew Pickering @@ -18,7 +19,6 @@ import Prelude import Control.Monad (guard, void, when) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader (Reader, asks, runReader) -import Data.Char (toLower) import Data.Default import Data.List (intercalate, transpose) import Data.Maybe (fromMaybe) @@ -36,13 +36,13 @@ import Text.Pandoc.Parsing hiding (space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI, underlineSpan) -type T2T = ParserT String ParserState (Reader T2TMeta) +type T2T = ParserT Text ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file data T2TMeta = T2TMeta { - date :: String -- ^ Current date - , mtime :: String -- ^ Last modification time of infile + date :: Text -- ^ Current date + , mtime :: Text -- ^ Last modification time of infile , infile :: FilePath -- ^ Input file , outfile :: FilePath -- ^ Output file } deriving Show @@ -63,7 +63,7 @@ getT2TMeta = do _ -> catchError (maximum <$> mapM getModTime inps) (const (return "")) - return $ T2TMeta curDate curMtime (intercalate ", " inps) outp + return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document readTxt2Tags :: PandocMonad m @@ -74,14 +74,14 @@ readTxt2Tags opts s = do meta <- getT2TMeta let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) $ - T.unpack (crFilter s) ++ "\n\n" + crFilter s <> "\n\n" case parsed of Right result -> return result Left e -> throwError e -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document --- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc +-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -- readTxt2TagsNoMacros = readTxt2Tags parseT2T :: T2T Pandoc @@ -106,7 +106,7 @@ parseHeader = do header :: T2T () header = titleline >> authorline >> dateline -headerline :: B.ToMetaValue a => String -> T2T a -> T2T () +headerline :: B.ToMetaValue a => Text -> T2T a -> T2T () headerline field p = (() <$ try blankline) <|> (p >>= updateState . B.setMeta field) @@ -123,15 +123,15 @@ authorline = dateline :: T2T () dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline) -type Keyword = String -type Value = String +type Keyword = Text +type Value = Text setting :: T2T (Keyword, Value) setting = do string "%!" - keyword <- ignoreSpacesCap (many1 alphaNum) + keyword <- ignoreSpacesCap (many1Char alphaNum) char ':' - value <- ignoreSpacesCap (manyTill anyChar newline) + value <- ignoreSpacesCap (manyTillChar anyChar newline) return (keyword, value) -- Blocks @@ -163,10 +163,10 @@ balancedTitle c = try $ do spaces level <- length <$> many1 (char c) guard (level <= 5) -- Max header level 5 - heading <- manyTill (noneOf "\n\r") (count level (char c)) + heading <- manyTillChar (noneOf "\n\r") (count level (char c)) label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-")) many spaceChar *> newline - let attr = maybe nullAttr (\x -> (x, [], [])) label + let attr = maybe nullAttr (\x -> (T.pack x, [], [])) label return $ B.headerWith attr level (trimInlines $ B.text heading) para :: T2T Blocks @@ -192,7 +192,7 @@ quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + contents <- parseFromString' parseBlocks (T.intercalate "\n" rawQuote <> "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines @@ -243,17 +243,17 @@ listItem start end = try $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString' end $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString' end $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Int - -> T2T String + -> T2T Text listContinuation markerLength = try $ notFollowedBy' (blankline >> blankline) - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) + *> (mappend <$> (T.concat <$> many1 listLine) + <*> manyChar blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -- Table @@ -327,16 +327,16 @@ taggedBlock = do -- Generic -genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s -blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks blockMarkupArea p f s = try (do - string s *> blankline - f . mconcat <$> manyTill p (eof <|> void (string s *> blankline))) + textStr s *> blankline + f . mconcat <$> manyTill p (eof <|> void (textStr s *> blankline))) -blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks -blockMarkupLine p f s = try (f <$> (string s *> space *> p)) +blockMarkupLine :: T2T a -> (a -> Blocks) -> Text -> T2T Blocks +blockMarkupLine p f s = try (f <$> (textStr s *> space *> p)) -- Can be in either block or inline position comment :: Monoid a => T2T a @@ -385,15 +385,15 @@ italic :: T2T Inlines italic = inlineMarkup inline B.emph '/' B.str code :: T2T Inlines -code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id +code = inlineMarkup (T.singleton <$> anyChar) B.code '`' id raw :: T2T Inlines -raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id +raw = inlineMarkup (T.singleton <$> anyChar) B.text '"' id tagged :: T2T Inlines tagged = do target <- getTarget - inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id + inlineMarkup (T.singleton <$> anyChar) (B.rawInline target) '\'' id -- Parser for markup indicated by a double character. -- Inline markup is greedy and glued @@ -404,33 +404,33 @@ inlineMarkup :: Monoid a => T2T a -- Content parser -> (a -> Inlines) -- Constructor -> Char -- Fence - -> (String -> a) -- Special Case to handle ****** + -> (Text -> a) -- Special Case to handle ****** -> T2T Inlines inlineMarkup p f c special = try $ do - start <- many1 (char c) - let l = length start + start <- many1Char (char c) + let l = T.length start guard (l >= 2) when (l == 2) (void $ notFollowedBy space) -- We must make sure that there is no space before the start of the -- closing tags - body <- optionMaybe (try $ manyTill (noneOf "\n\r") + body <- optionMaybe (try $ manyTillChar (noneOf "\n\r") (try $ lookAhead (noneOf " " >> string [c,c] ))) case body of Just middle -> do lastChar <- anyChar - end <- many1 (char c) + end <- many1Char (char c) let parser inp = parseFromString' (mconcat <$> many p) inp - let start' = case drop 2 start of + let start' = case T.drop 2 start of "" -> mempty xs -> special xs - body' <- parser (middle ++ [lastChar]) - let end' = case drop 2 end of + body' <- parser (middle <> T.singleton lastChar) + let end' = case T.drop 2 end of "" -> mempty xs -> special xs return $ f (start' `mappend` body' `mappend` end') Nothing -> do -- Either bad or case such as ***** guard (l >= 5) - let body' = replicate (l - 4) c + let body' = T.replicate (l - 4) $ T.singleton c return $ f (special body') link :: T2T Inlines @@ -441,12 +441,12 @@ titleLink :: T2T Inlines titleLink = try $ do char '[' notFollowedBy space - tokens <- sepBy1 (many $ noneOf " ]") space + tokens <- sepBy1 (manyChar $ noneOf " ]") space guard (length tokens >= 2) char ']' let link' = last tokens - guard $ not $ null link' - let tit = unwords (init tokens) + guard $ not $ T.null link' + let tit = T.unwords (init tokens) return $ B.link link' "" (B.text tit) -- Link with image @@ -455,7 +455,7 @@ imageLink = try $ do char '[' body <- image many1 space - l <- manyTill (noneOf "\n\r ") (char ']') + l <- manyTillChar (noneOf "\n\r ") (char ']') return (B.link l "" body) macro :: T2T Inlines @@ -466,7 +466,7 @@ macro = try $ do maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands) where commands = [ ("date", date), ("mtime", mtime) - , ("infile", infile), ("outfile", outfile)] + , ("infile", T.pack . infile), ("outfile", T.pack . outfile)] -- raw URLs in text are automatically linked url :: T2T Inlines @@ -474,7 +474,7 @@ url = try $ do (rawUrl, escapedUrl) <- try uri <|> emailAddress return $ B.link rawUrl "" (B.str escapedUrl) -uri :: T2T (String, String) +uri :: T2T (Text, Text) uri = try $ do address <- t2tURI return (address, escapeURI address) @@ -486,25 +486,25 @@ uri = try $ do --isT2TURI (parse t2tURI "" -> Right _) = True --isT2TURI _ = False -t2tURI :: T2T String +t2tURI :: T2T Text t2tURI = do - start <- try ((++) <$> proto <*> urlLogin) <|> guess - domain <- many1 chars - sep <- many (char '/') - form' <- option mempty ((:) <$> char '?' <*> many1 form) - anchor' <- option mempty ((:) <$> char '#' <*> many anchor) - return (start ++ domain ++ sep ++ form' ++ anchor') + start <- try ((<>) <$> proto <*> urlLogin) <|> guess + domain <- many1Char chars + sep <- manyChar (char '/') + form' <- option mempty (T.cons <$> char '?' <*> many1Char form) + anchor' <- option mempty (T.cons <$> char '#' <*> manyChar anchor) + return (start <> domain <> sep <> form' <> anchor') where protos = ["http", "https", "ftp", "telnet", "gopher", "wais"] - proto = (++) <$> oneOfStrings protos <*> string "://" - guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23")) - <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.') + proto = (<>) <$> oneOfStrings protos <*> textStr "://" + guess = (<>) <$> (((<>) <$> stringAnyCase "www" <*> option mempty (T.singleton <$> oneOf "23")) + <|> stringAnyCase "ftp") <*> (T.singleton <$> char '.') login = alphaNum <|> oneOf "_.-" - pass = many (noneOf " @") + pass = manyChar (noneOf " @") chars = alphaNum <|> oneOf "%._/~:,=$@&+-" anchor = alphaNum <|> oneOf "%._0" form = chars <|> oneOf ";*" - urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@') + urlLogin = option mempty $ try ((\x y z -> x <> y <> T.singleton z) <$> many1Char login <*> option mempty (T.cons <$> char ':' <*> pass) <*> char '@') image :: T2T Inlines @@ -512,12 +512,12 @@ image = try $ do -- List taken from txt2tags source let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] char '[' - (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions) + (path, ext) <- manyUntilChar (noneOf "\n\t\r ") (oneOfStrings extensions) char ']' - return $ B.image (path ++ ext) "" mempty + return $ B.image (path <> ext) "" mempty -- Characters used in markup -specialChars :: String +specialChars :: [Char] specialChars = "%*-_/|:+;" tab :: T2T Char @@ -526,8 +526,8 @@ tab = char '\t' space :: T2T Char space = char ' ' -spaces :: T2T String -spaces = many space +spaces :: T2T Text +spaces = manyChar space endline :: T2T Inlines endline = try $ do @@ -544,17 +544,17 @@ endline = try $ do return B.softbreak str :: T2T Inlines -str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = try $ B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") whitespace :: T2T Inlines whitespace = try $ B.space <$ spaceChar symbol :: T2T Inlines -symbol = B.str . (:[]) <$> oneOf specialChars +symbol = B.str . T.singleton <$> oneOf specialChars -- Utility -getTarget :: T2T String +getTarget :: T2T Text getTarget = do mv <- lookupMeta "target" . stateMeta <$> getState return $ case mv of @@ -565,5 +565,5 @@ getTarget = do atStart :: T2T () atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) -ignoreSpacesCap :: T2T String -> T2T String -ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces) +ignoreSpacesCap :: T2T Text -> T2T Text +ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces) diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 27b7d7245..f7edabc48 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Readers.Vimwiki @@ -51,9 +52,10 @@ import Prelude import Control.Monad (guard) import Control.Monad.Except (throwError) import Data.Default -import Data.List (isInfixOf, isPrefixOf) +import Data.List (isInfixOf) import Data.Maybe import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines, fromList, toList, trimInlines) import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code, codeBlockWith, definitionList, @@ -73,12 +75,13 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, registerHeader, spaceChar, stateMeta, - stateOptions, uri) -import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast, - isURI) + stateOptions, uri, manyTillChar, manyChar, textStr, + many1Char, countChar, many1TillChar) +import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast, + isURI, tshow) import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, spaces, string) -import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1, +import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1, manyTill, notFollowedBy, option, skipMany1) import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) @@ -128,7 +131,7 @@ block = do , definitionList , para ] - trace (take 60 $ show $ toList res) + trace (T.take 60 $ tshow $ toList res) return res blockML :: PandocMonad m => VwParser m Blocks @@ -218,32 +221,32 @@ defMarkerM = string "::" >> spaceChar defMarkerE :: PandocMonad m => VwParser m Char defMarkerE = string "::" >> newline -hasDefMarkerM :: PandocMonad m => VwParser m String -hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) +hasDefMarkerM :: PandocMonad m => VwParser m Text +hasDefMarkerM = manyTillChar (noneOf "\n") (try defMarkerM) preformatted :: PandocMonad m => VwParser m Blocks preformatted = try $ do many spaceChar >> string "{{{" - attrText <- many (noneOf "\n") + attrText <- manyChar (noneOf "\n") lookAhead newline - contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" + contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}}" >> many spaceChar >> newline)) - if (contents /= "") && (head contents == '\n') - then return $ B.codeBlockWith (makeAttr attrText) (tail contents) + if (contents /= "") && (T.head contents == '\n') + then return $ B.codeBlockWith (makeAttr attrText) (T.tail contents) else return $ B.codeBlockWith (makeAttr attrText) contents -makeAttr :: String -> Attr +makeAttr :: Text -> Attr makeAttr s = - let xs = splitBy (`elem` " \t") s in + let xs = splitTextBy (`elem` (" \t" :: String)) s in ("", [], mapMaybe nameValue xs) -nameValue :: String -> Maybe (String, String) +nameValue :: Text -> Maybe (Text, Text) nameValue s = - let t = splitBy (== '=') s in + let t = splitTextBy (== '=') s in if length t /= 2 then Nothing else let (a, b) = (head t, last t) in - if (length b < 2) || ((head b, last b) /= ('"', '"')) + if (T.length b < 2) || ((T.head b, T.last b) /= ('"', '"')) then Nothing else Just (a, stripFirstAndLast b) @@ -253,16 +256,16 @@ displayMath = try $ do many spaceChar >> string "{{$" mathTag <- option "" mathTagParser many space - contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" + contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}$" >> many spaceChar >> newline)) let contentsWithTags | mathTag == "" = contents - | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents - ++ "\n\\end{" ++ mathTag ++ "}" + | otherwise = "\\begin{" <> mathTag <> "}\n" <> contents + <> "\n\\end{" <> mathTag <> "}" return $ B.para $ B.displayMath contentsWithTags -mathTagLaTeX :: String -> String +mathTagLaTeX :: Text -> Text mathTagLaTeX s = case s of "equation" -> "" "equation*" -> "" @@ -360,17 +363,17 @@ combineList x [y] = case toList y of _ -> x:[y] combineList x xs = x:xs -listStart :: PandocMonad m => VwParser m (Int, String) +listStart :: PandocMonad m => VwParser m (Int, Text) listStart = try $ do s <- many spaceChar listType <- bulletListMarkers <|> orderedListMarkers spaceChar return (length s, listType) -bulletListMarkers :: PandocMonad m => VwParser m String +bulletListMarkers :: PandocMonad m => VwParser m Text bulletListMarkers = "ul" <$ (char '*' <|> char '-') -orderedListMarkers :: PandocMonad m => VwParser m String +orderedListMarkers :: PandocMonad m => VwParser m Text orderedListMarkers = ("ol" <$choice (orderedListMarker Decimal Period:(($OneParen) . orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) <|> ("ol" <$ char '#') @@ -421,9 +424,9 @@ placeholder :: PandocMonad m => VwParser m () placeholder = try $ choice (ph <$> ["title", "date"]) <|> noHtmlPh <|> templatePh -ph :: PandocMonad m => String -> VwParser m () +ph :: PandocMonad m => Text -> VwParser m () ph s = try $ do - many spaceChar >>string ('%':s) >> spaceChar + many spaceChar >> textStr (T.cons '%' s) >> spaceChar contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline) --use lookAhead because of placeholder in the whitespace parser let meta' = B.setMeta s contents nullMeta @@ -476,7 +479,7 @@ inlineML :: PandocMonad m => VwParser m Inlines inlineML = choice $ whitespace endlineML:inlineList str :: PandocMonad m => VwParser m Inlines -str = B.str <$>many1 (noneOf $ spaceChars ++ specialChars) +str = B.str <$> many1Char (noneOf $ spaceChars ++ specialChars) whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines whitespace endline = B.space <$ (skipMany1 spaceChar <|> @@ -487,7 +490,7 @@ whitespace' :: PandocMonad m => VwParser m Inlines whitespace' = B.space <$ skipMany1 spaceChar special :: PandocMonad m => VwParser m Inlines -special = B.str <$> count 1 (oneOf specialChars) +special = B.str <$> countChar 1 (oneOf specialChars) bareURL :: PandocMonad m => VwParser m Inlines bareURL = try $ do @@ -505,8 +508,8 @@ strong = try $ do return $ B.spanWith (makeId contents, [], []) mempty <> B.strong contents -makeId :: Inlines -> String -makeId i = concat (stringify <$> toList i) +makeId :: Inlines -> Text +makeId i = T.concat (stringify <$> toList i) emph :: PandocMonad m => VwParser m Inlines emph = try $ do @@ -527,7 +530,7 @@ strikeout = try $ do code :: PandocMonad m => VwParser m Inlines code = try $ do char '`' - contents <- many1Till (noneOf "\n") (char '`') + contents <- many1TillChar (noneOf "\n") (char '`') return $ B.code contents superscript :: PandocMonad m => VwParser m Inlines @@ -542,8 +545,8 @@ subscript = try $ link :: PandocMonad m => VwParser m Inlines link = try $ do string "[[" - contents <- lookAhead $ manyTill anyChar (string "]]") - case '|' `elem` contents of + contents <- lookAhead $ manyTillChar anyChar (string "]]") + case T.any (== '|') contents of False -> do manyTill anyChar (string "]]") -- not using try here because [[hell]o]] is not rendered as a link in vimwiki @@ -552,7 +555,7 @@ link = try $ do else "wikilink" return $ B.link (procLink contents) tit (B.str contents) True -> do - url <- manyTill anyChar $ char '|' + url <- manyTillChar anyChar $ char '|' lab <- mconcat <$> manyTill inline (string "]]") let tit = if isURI url then "" @@ -568,52 +571,52 @@ image = try $ do images :: PandocMonad m => Int -> VwParser m Inlines images k | k == 0 = do - imgurl <- manyTill anyChar (try $ string "}}") + imgurl <- manyTillChar anyChar (try $ string "}}") return $ B.image (procImgurl imgurl) "" (B.str "") | k == 1 = do - imgurl <- manyTill anyChar (char '|') + imgurl <- manyTillChar anyChar (char '|') alt <- mconcat <$> manyTill inline (try $ string "}}") return $ B.image (procImgurl imgurl) "" alt | k == 2 = do - imgurl <- manyTill anyChar (char '|') - alt <- mconcat <$>manyTill inline (char '|') - attrText <- manyTill anyChar (try $ string "}}") + imgurl <- manyTillChar anyChar (char '|') + alt <- mconcat <$> manyTill inline (char '|') + attrText <- manyTillChar anyChar (try $ string "}}") return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt | otherwise = do - imgurl <- manyTill anyChar (char '|') - alt <- mconcat <$>manyTill inline (char '|') - attrText <- manyTill anyChar (char '|') + imgurl <- manyTillChar anyChar (char '|') + alt <- mconcat <$> manyTill inline (char '|') + attrText <- manyTillChar anyChar (char '|') manyTill anyChar (try $ string "}}") return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt -procLink' :: String -> String +procLink' :: Text -> Text procLink' s - | take 6 s == "local:" = "file" ++ drop 5 s - | take 6 s == "diary:" = "diary/" ++ drop 6 s - | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + | T.take 6 s == "local:" = "file" <> T.drop 5 s + | T.take 6 s == "diary:" = "diary/" <> T.drop 6 s + | or ((`T.isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ]) = s | s == "" = "" - | last s == '/' = s + | T.last s == '/' = s | otherwise = s -procLink :: String -> String -procLink s = procLink' x ++ y - where (x, y) = break (=='#') s +procLink :: Text -> Text +procLink s = procLink' x <> y + where (x, y) = T.break (=='#') s -procImgurl :: String -> String -procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s +procImgurl :: Text -> Text +procImgurl s = if T.take 6 s == "local:" then "file" <> T.drop 5 s else s inlineMath :: PandocMonad m => VwParser m Inlines inlineMath = try $ - B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$') + B.math <$ char '$' <*> many1TillChar (noneOf "\n") (char '$') tag :: PandocMonad m => VwParser m Inlines tag = try $ do char ':' - s <- manyTill (noneOf spaceChars) (try (char ':' >> lookAhead space)) - guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") - let ss = splitBy (==':') s + s <- manyTillChar (noneOf spaceChars) (try (char ':' >> lookAhead space)) + guard $ not $ "::" `T.isInfixOf` (":" <> s <> ":") + let ss = splitTextBy (==':') s return $ mconcat $ makeTagSpan' (head ss):(makeTagSpan <$> tail ss) todoMark :: PandocMonad m => VwParser m Inlines @@ -646,16 +649,16 @@ nFBTTBSB = hasDefMarker :: PandocMonad m => VwParser m () hasDefMarker = () <$ manyTill (noneOf "\n") (string "::" >> oneOf spaceChars) -makeTagSpan' :: String -> Inlines -makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> +makeTagSpan' :: Text -> Inlines +makeTagSpan' s = B.spanWith (T.cons '-' s, [], []) (B.str "") <> B.spanWith (s, ["tag"], []) (B.str s) -makeTagSpan :: String -> Inlines +makeTagSpan :: Text -> Inlines makeTagSpan s = B.space <> makeTagSpan' s -mathTagParser :: PandocMonad m => VwParser m String +mathTagParser :: PandocMonad m => VwParser m Text mathTagParser = do - s <- try $ lookAhead (char '%' >> manyTill (noneOf spaceChars) + s <- try $ lookAhead (char '%' >> manyTillChar (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space)) - char '%' >> string s >> char '%' + char '%' >> textStr s >> char '%' return $ mathTagLaTeX s diff --git a/src/Text/Pandoc/RoffChar.hs b/src/Text/Pandoc/RoffChar.hs index f5dee27f5..5e4a34603 100644 --- a/src/Text/Pandoc/RoffChar.hs +++ b/src/Text/Pandoc/RoffChar.hs @@ -18,10 +18,11 @@ module Text.Pandoc.RoffChar ( , combiningAccents ) where import Prelude +import qualified Data.Text as T -- | These are the escapes specifically mentioned in groff_man(7), -- plus @ and ellipsis. -standardEscapes :: [(Char, String)] +standardEscapes :: [(Char, T.Text)] standardEscapes = [ ('\160', "\\ ") , ('\'', "\\[aq]") @@ -40,7 +41,7 @@ standardEscapes = , ('\x2026', "\\&...") -- because u2026 doesn't render on tty ] -characterCodes :: [(Char, String)] +characterCodes :: [(Char, T.Text)] characterCodes = [ ('Ð', "-D") , ('ð', "Sd") @@ -402,7 +403,7 @@ characterCodes = ] -- use like: \\[E a^ aa] -combiningAccents :: [(Char, String)] +combiningAccents :: [(Char, T.Text)] combiningAccents = [ ('\779' , "a\"") , ('\772', "a-") diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index f3fca9c07..d9f330e29 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -24,8 +24,8 @@ import Data.ByteString (ByteString) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as L -import Data.Char (isAlphaNum, isAscii, toLower) -import Data.List (isPrefixOf) +import qualified Data.Text as T +import Data.Char (isAlphaNum, isAscii) import Network.URI (escapeURIString) import System.FilePath (takeDirectory, takeExtension, (</>)) import Text.HTML.TagSoup @@ -35,24 +35,24 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Shared (isURI, renderTags', trim) -import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.UTF8 (toString, toText, fromText) import Text.Parsec (ParsecT, runParserT) import qualified Text.Parsec as P isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c -makeDataURI :: (MimeType, ByteString) -> String +makeDataURI :: (MimeType, ByteString) -> T.Text makeDataURI (mime, raw) = if textual - then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw) - else "data:" ++ mime' ++ ";base64," ++ toString (encode raw) - where textual = "text/" `Data.List.isPrefixOf` mime - mime' = if textual && ';' `notElem` mime - then mime ++ ";charset=utf-8" + then "data:" <> mime' <> "," <> T.pack (escapeURIString isOk (toString raw)) + else "data:" <> mime' <> ";base64," <> toText (encode raw) + where textual = "text/" `T.isPrefixOf` mime + mime' = if textual && T.any (== ';') mime + then mime <> ";charset=utf-8" else mime -- mime type already has charset -convertTags :: PandocMonad m => [Tag String] -> m [Tag String] +convertTags :: PandocMonad m => [Tag T.Text] -> m [Tag T.Text] convertTags [] = return [] convertTags (t@TagOpen{}:ts) | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts @@ -69,10 +69,10 @@ convertTags (t@(TagOpen tagname as):ts) enc <- getDataURI (fromAttrib "type" t) y return (x, enc) else return (x,y) -convertTags (t@(TagOpen "script" as):TagClose "script":ts) = +convertTags (t@(TagOpen "script" as):TagClose "script":ts) = case fromAttrib "src" t of - [] -> (t:) <$> convertTags ts - src -> do + "" -> (t:) <$> convertTags ts + src -> do let typeAttr = fromAttrib "type" t res <- getData typeAttr src rest <- convertTags ts @@ -81,13 +81,13 @@ convertTags (t@(TagOpen "script" as):TagClose "script":ts) = (("src",dataUri) : [(x,y) | (x,y) <- as, x /= "src"]) : TagClose "script" : rest Right (mime, bs) - | ("text/javascript" `isPrefixOf` mime || - "application/javascript" `isPrefixOf` mime || - "application/x-javascript" `isPrefixOf` mime) && + | ("text/javascript" `T.isPrefixOf` mime || + "application/javascript" `T.isPrefixOf` mime || + "application/x-javascript" `T.isPrefixOf` mime) && not ("</script" `B.isInfixOf` bs) -> return $ - TagOpen "script" [("type", typeAttr)|not (null typeAttr)] - : TagText (toString bs) + TagOpen "script" [("type", typeAttr)|not (T.null typeAttr)] + : TagText (toText bs) : TagClose "script" : rest | otherwise -> @@ -97,7 +97,7 @@ convertTags (t@(TagOpen "script" as):TagClose "script":ts) = TagClose "script" : rest convertTags (t@(TagOpen "link" as):ts) = case fromAttrib "href" t of - [] -> (t:) <$> convertTags ts + "" -> (t:) <$> convertTags ts src -> do res <- getData (fromAttrib "type" t) src case res of @@ -107,14 +107,14 @@ convertTags (t@(TagOpen "link" as):ts) = (("href",dataUri) : [(x,y) | (x,y) <- as, x /= "href"]) : rest Right (mime, bs) - | "text/css" `isPrefixOf` mime - && null (fromAttrib "media" t) + | "text/css" `T.isPrefixOf` mime + && T.null (fromAttrib "media" t) && not ("</" `B.isInfixOf` bs) -> do rest <- convertTags $ dropWhile (==TagClose "link") ts return $ TagOpen "style" [("type", "text/css")] -- see #5725 - : TagText (toString bs) + : TagText (toText bs) : TagClose "style" : rest | otherwise -> do @@ -130,7 +130,7 @@ cssURLs d orig = do res <- runParserT (parseCSSUrls d) () "css" orig case res of Left e -> do - report $ CouldNotParseCSS (show e) + report $ CouldNotParseCSS $ T.pack $ show e return orig Right bs -> return bs @@ -176,52 +176,52 @@ pCSSUrl d = P.try $ do Left b -> return b Right (mt,b) -> do let enc = makeDataURI (mt, b) - return (B.pack $ "url(" ++ enc ++ ")") + return $ fromText $ "url(" <> enc <> ")" pQuoted :: PandocMonad m - => ParsecT ByteString () m (String, ByteString) + => ParsecT ByteString () m (T.Text, ByteString) pQuoted = P.try $ do quote <- P.oneOf "\"'" - url <- P.manyTill P.anyChar (P.char quote) - let fallback = B.pack ([quote] ++ trim url ++ [quote]) + url <- T.pack <$> P.manyTill P.anyChar (P.char quote) + let fallback = fromText $ T.singleton quote <> trim url <> T.singleton quote return (url, fallback) pUrl :: PandocMonad m - => ParsecT ByteString () m (String, ByteString) + => ParsecT ByteString () m (T.Text, ByteString) pUrl = P.try $ do P.string "url(" P.spaces quote <- P.option Nothing (Just <$> P.oneOf "\"'") - url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote) + url <- T.pack <$> P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote) P.spaces P.char ')' - let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++ - maybe "" (:[]) quote ++ ")") + let fallback = fromText ("url(" <> maybe "" T.singleton quote <> trim url <> + maybe "" T.singleton quote <> ")") return (url, fallback) handleCSSUrl :: PandocMonad m - => FilePath -> (String, ByteString) + => FilePath -> (T.Text, ByteString) -> ParsecT ByteString () m (Either ByteString (MimeType, ByteString)) handleCSSUrl d (url, fallback) = - case escapeURIString (/='|') (trim url) of + case escapeURIString (/='|') (T.unpack $ trim url) of '#':_ -> return $ Left fallback 'd':'a':'t':'a':':':_ -> return $ Left fallback - u -> do let url' = if isURI u then u else d </> u + u -> do let url' = if isURI (T.pack u) then T.pack u else T.pack (d </> u) res <- lift $ getData "" url' case res of - Left uri -> return $ Left (B.pack $ "url(" ++ uri ++ ")") + Left uri -> return $ Left (fromText $ "url(" <> uri <> ")") Right (mt', raw) -> do -- note that the downloaded CSS may -- itself contain url(...). - (mt, b) <- if "text/css" `isPrefixOf` mt' + (mt, b) <- if "text/css" `T.isPrefixOf` mt' -- see #5725: in HTML5, content type -- isn't allowed on style type attribute then ("text/css",) <$> cssURLs d raw else return (mt', raw) return $ Right (mt, b) -getDataURI :: PandocMonad m => MimeType -> String -> m String +getDataURI :: PandocMonad m => MimeType -> T.Text -> m T.Text getDataURI mimetype src = do res <- getData mimetype src case res of @@ -229,35 +229,36 @@ getDataURI mimetype src = do Right x -> return $ makeDataURI x getData :: PandocMonad m - => MimeType -> String - -> m (Either String (MimeType, ByteString)) -getData _ src@('d':'a':'t':'a':':':_) = return $ Left src-- already data: uri -getData mimetype src = do - let ext = map toLower $ takeExtension src - (raw, respMime) <- fetchItem src - let raw' = if ext `elem` [".gz", ".svgz"] - then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw] - else raw - mime <- case (mimetype, respMime) of - ("",Nothing) -> throwError $ PandocSomeError - $ "Could not determine mime type for `" ++ src ++ "'" - (x, Nothing) -> return x - (_, Just x ) -> return x - result <- if "text/css" `isPrefixOf` mime - then do - oldInputs <- getInputFiles - setInputFiles [src] - res <- cssURLs (takeDirectory src) raw' - setInputFiles oldInputs - return res + => MimeType -> T.Text + -> m (Either T.Text (MimeType, ByteString)) +getData mimetype src + | "data:" `T.isPrefixOf` src = return $ Left src -- already data: uri + | otherwise = do + let ext = T.toLower $ T.pack $ takeExtension $ T.unpack src + (raw, respMime) <- fetchItem src + let raw' = if ext `elem` [".gz", ".svgz"] + then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw] + else raw + mime <- case (mimetype, respMime) of + ("",Nothing) -> throwError $ PandocSomeError + $ "Could not determine mime type for `" <> src <> "'" + (x, Nothing) -> return x + (_, Just x ) -> return x + result <- if "text/css" `T.isPrefixOf` mime + then do + oldInputs <- getInputFiles + setInputFiles [T.unpack src] + res <- cssURLs (takeDirectory $ T.unpack src) raw' + setInputFiles oldInputs + return res else return raw' - return $ Right (mime, result) + return $ Right (mime, result) -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. -makeSelfContained :: PandocMonad m => String -> m String +makeSelfContained :: PandocMonad m => T.Text -> m T.Text makeSelfContained inp = do let tags = parseTags inp out' <- convertTags tags diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 797a0a0b0..926116e23 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -8,6 +8,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Shared Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -22,14 +23,20 @@ Utility functions and definitions used by the various Pandoc modules. module Text.Pandoc.Shared ( -- * List processing splitBy, + splitTextBy, splitByIndices, splitStringByIndices, + splitTextByIndices, substitute, ordNub, -- * Text processing ToString (..), + ToText (..), + tshow, backslashEscapes, escapeStringUsing, + elemText, + notElemText, stripTrailingNewlines, trim, triml, @@ -37,6 +44,7 @@ module Text.Pandoc.Shared ( trimMath, stripFirstAndLast, camelCaseToHyphenated, + camelCaseStrToHyphenated, toRomanNumeral, escapeURI, tabFilter, @@ -90,6 +98,7 @@ module Text.Pandoc.Shared ( defaultBlocksSeparator, -- * Safe read safeRead, + safeStrRead, -- * User data directory defaultUserDataDirs, -- * Version @@ -133,8 +142,8 @@ import Text.DocLayout (charWidth) import Text.Pandoc.Walk -- | Version number of pandoc library. -pandocVersion :: String -pandocVersion = showVersion version +pandocVersion :: T.Text +pandocVersion = T.pack $ showVersion version -- -- List processing @@ -148,6 +157,13 @@ splitBy isSep lst = rest' = dropWhile isSep rest in first:splitBy isSep rest' +splitTextBy :: (Char -> Bool) -> T.Text -> [T.Text] +splitTextBy isSep t + | T.null t = [] + | otherwise = let (first, rest) = T.break isSep t + rest' = T.dropWhile isSep rest + in first : splitTextBy isSep rest' + splitByIndices :: [Int] -> [a] -> [[a]] splitByIndices [] lst = [lst] splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x) xs) rest @@ -160,6 +176,9 @@ splitStringByIndices (x:xs) lst = let (first, rest) = splitAt' x lst in first : splitStringByIndices (map (\y -> y - x) xs) rest +splitTextByIndices :: [Int] -> T.Text -> [T.Text] +splitTextByIndices ns = fmap T.pack . splitStringByIndices ns . T.unpack + splitAt' :: Int -> [Char] -> ([Char],[Char]) splitAt' _ [] = ([],[]) splitAt' n xs | n <= 0 = ([],xs) @@ -195,89 +214,115 @@ instance ToString String where instance ToString T.Text where toString = T.unpack +class ToText a where + toText :: a -> T.Text + +instance ToText String where + toText = T.pack + +instance ToText T.Text where + toText = id + +tshow :: Show a => a -> T.Text +tshow = T.pack . show + -- | Returns an association list of backslash escapes for the -- designated characters. backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, String)] -backslashEscapes = map (\ch -> (ch, ['\\',ch])) + -> [(Char, T.Text)] +backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch])) -- | Escape a string of characters, using an association list of -- characters and strings. -escapeStringUsing :: [(Char, String)] -> String -> String -escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = - case lookup x escapeTable of - Just str -> str ++ rest - Nothing -> x:rest - where rest = escapeStringUsing escapeTable xs +escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text +escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl + +-- | @True@ exactly when the @Char@ appears in the @Text@. +elemText :: Char -> T.Text -> Bool +elemText c = T.any (== c) + +-- | @True@ exactly when the @Char@ does not appear in the @Text@. +notElemText :: Char -> T.Text -> Bool +notElemText c = T.all (/= c) -- | Strip trailing newlines from string. -stripTrailingNewlines :: String -> String -stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse +stripTrailingNewlines :: T.Text -> T.Text +stripTrailingNewlines = T.dropWhileEnd (== '\n') -- | Remove leading and trailing space (including newlines) from string. -trim :: String -> String -trim = triml . trimr +trim :: T.Text -> T.Text +trim = T.dropAround (`elemText` " \r\n\t") -- | Remove leading space (including newlines) from string. -triml :: String -> String -triml = dropWhile (`elem` " \r\n\t") +triml :: T.Text -> T.Text +triml = T.dropWhile (`elemText` " \r\n\t") -- | Remove trailing space (including newlines) from string. -trimr :: String -> String -trimr = reverse . triml . reverse +trimr :: T.Text -> T.Text +trimr = T.dropWhileEnd (`elemText` " \r\n\t") -- | Trim leading space and trailing space unless after \. -trimMath :: String -> String -trimMath = triml . reverse . stripspace . reverse +trimMath :: T.Text -> T.Text +trimMath = triml . T.reverse . stripBeginSpace . T.reverse -- no Text.spanEnd where - stripspace (c1:c2:cs) - | c1 `elem` [' ','\t','\n','\r'] - , c2 /= '\\' = stripspace (c2:cs) - stripspace cs = cs + stripBeginSpace t + | T.null pref = t + | Just ('\\', _) <- T.uncons suff = T.cons (T.last pref) suff + | otherwise = suff + where + (pref, suff) = T.span (`elemText` " \t\n\r") t -- | Strip leading and trailing characters from string -stripFirstAndLast :: String -> String -stripFirstAndLast str = - drop 1 $ take (length str - 1) str +stripFirstAndLast :: T.Text -> T.Text +stripFirstAndLast t = case T.uncons t of + Just (_, t') -> case T.unsnoc t' of + Just (t'', _) -> t'' + _ -> t' + _ -> "" -- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). -camelCaseToHyphenated :: String -> String -camelCaseToHyphenated [] = "" -camelCaseToHyphenated (a:b:rest) +camelCaseToHyphenated :: T.Text -> T.Text +camelCaseToHyphenated = T.pack . camelCaseStrToHyphenated . T.unpack + +-- This may not work as expected on general Unicode, if it contains +-- letters with a longer lower case form than upper case. I don't know +-- what the camel case practices of affected scripts are, though. +camelCaseStrToHyphenated :: String -> String +camelCaseStrToHyphenated [] = "" +camelCaseStrToHyphenated (a:b:rest) | isLower a - , isUpper b = a:'-':toLower b:camelCaseToHyphenated rest + , isUpper b = a:'-':toLower b:camelCaseStrToHyphenated rest -- handle ABCDef = abc-def -camelCaseToHyphenated (a:b:c:rest) +camelCaseStrToHyphenated (a:b:c:rest) | isUpper a , isUpper b - , isLower c = toLower a:'-':toLower b:camelCaseToHyphenated (c:rest) -camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest + , isLower c = toLower a:'-':toLower b:camelCaseStrToHyphenated (c:rest) +camelCaseStrToHyphenated (a:rest) = toLower a:camelCaseStrToHyphenated rest -- | Convert number < 4000 to uppercase roman numeral. -toRomanNumeral :: Int -> String +toRomanNumeral :: Int -> T.Text toRomanNumeral x | x >= 4000 || x < 0 = "?" - | x >= 1000 = "M" ++ toRomanNumeral (x - 1000) - | x >= 900 = "CM" ++ toRomanNumeral (x - 900) - | x >= 500 = "D" ++ toRomanNumeral (x - 500) - | x >= 400 = "CD" ++ toRomanNumeral (x - 400) - | x >= 100 = "C" ++ toRomanNumeral (x - 100) - | x >= 90 = "XC" ++ toRomanNumeral (x - 90) - | x >= 50 = "L" ++ toRomanNumeral (x - 50) - | x >= 40 = "XL" ++ toRomanNumeral (x - 40) - | x >= 10 = "X" ++ toRomanNumeral (x - 10) + | x >= 1000 = "M" <> toRomanNumeral (x - 1000) + | x >= 900 = "CM" <> toRomanNumeral (x - 900) + | x >= 500 = "D" <> toRomanNumeral (x - 500) + | x >= 400 = "CD" <> toRomanNumeral (x - 400) + | x >= 100 = "C" <> toRomanNumeral (x - 100) + | x >= 90 = "XC" <> toRomanNumeral (x - 90) + | x >= 50 = "L" <> toRomanNumeral (x - 50) + | x >= 40 = "XL" <> toRomanNumeral (x - 40) + | x >= 10 = "X" <> toRomanNumeral (x - 10) | x == 9 = "IX" - | x >= 5 = "V" ++ toRomanNumeral (x - 5) + | x >= 5 = "V" <> toRomanNumeral (x - 5) | x == 4 = "IV" - | x >= 1 = "I" ++ toRomanNumeral (x - 1) + | x >= 1 = "I" <> toRomanNumeral (x - 1) | otherwise = "" -- | Escape whitespace and some punctuation characters in URI. -escapeURI :: String -> String -escapeURI = escapeURIString (not . needsEscaping) - where needsEscaping c = isSpace c || c `elem` - ['<','>','|','"','{','}','[',']','^', '`'] +escapeURI :: T.Text -> T.Text +escapeURI = T.pack . escapeURIString (not . needsEscaping) . T.unpack + where needsEscaping c = isSpace c || c `elemText` "<>|\"{}[]^`" + -- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop @@ -304,8 +349,11 @@ crFilter = T.filter (/= '\r') -- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We -- limit years to the range 1601-9999 (ISO 8601 accepts greater than -- or equal to 1583, but MS Word only accepts dates starting 1601). -normalizeDate :: String -> Maybe String -normalizeDate s = fmap (formatTime defaultTimeLocale "%F") +normalizeDate :: T.Text -> Maybe T.Text +normalizeDate = fmap T.pack . normalizeDate' . T.unpack + +normalizeDate' :: String -> Maybe String +normalizeDate' s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day) where rejectBadYear day = case toGregorian day of (y, _, _) | y >= 1601 && y <= 9999 -> Just day @@ -321,26 +369,26 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") -- | Generate infinite lazy list of markers for an ordered list, -- depending on list attributes. -orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] +orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [T.Text] orderedListMarkers (start, numstyle, numdelim) = - let singleton c = [c] - nums = case numstyle of - DefaultStyle -> map show [start..] - Example -> map show [start..] - Decimal -> map show [start..] + let nums = case numstyle of + DefaultStyle -> map tshow [start..] + Example -> map tshow [start..] + Decimal -> map tshow [start..] UpperAlpha -> drop (start - 1) $ cycle $ - map singleton ['A'..'Z'] + map T.singleton ['A'..'Z'] LowerAlpha -> drop (start - 1) $ cycle $ - map singleton ['a'..'z'] + map T.singleton ['a'..'z'] UpperRoman -> map toRomanNumeral [start..] - LowerRoman -> map (map toLower . toRomanNumeral) [start..] + LowerRoman -> map (T.toLower . toRomanNumeral) [start..] inDelim str = case numdelim of - DefaultDelim -> str ++ "." - Period -> str ++ "." - OneParen -> str ++ ")" - TwoParens -> "(" ++ str ++ ")" + DefaultDelim -> str <> "." + Period -> str <> "." + OneParen -> str <> ")" + TwoParens -> "(" <> str <> ")" in map inDelim nums + -- | Extract the leading and trailing spaces from inside an inline element -- and place them outside the element. SoftBreaks count as Spaces for -- these purposes. @@ -387,15 +435,16 @@ deQuote x = x -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). -stringify :: Walkable Inline a => a -> String +stringify :: Walkable Inline a => a -> T.Text stringify = query go . walk (deNote . deQuote) - where go :: Inline -> [Char] + where go :: Inline -> T.Text go Space = " " go SoftBreak = " " go (Str x) = x go (Code _ x) = x go (Math _ x) = x - go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 + go (RawInline (Format "html") (T.unpack -> ('<':'b':'r':_))) + = " " -- see #2105 go LineBreak = " " go _ = "" @@ -407,7 +456,7 @@ stringify = query go . walk (deNote . deQuote) capitalize :: Walkable Inline a => a -> a capitalize = walk go where go :: Inline -> Inline - go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s) + go (Str s) = Str $ T.toUpper s go x = x -- | Change final list item from @Para@ to @Plain@ if the list contains @@ -463,7 +512,7 @@ isPara _ = False -- | Convert Pandoc inline list to plain text identifier. HTML -- identifiers must start with a letter, and may contain only -- letters, digits, and the characters _-. -inlineListToIdentifier :: Extensions -> [Inline] -> String +inlineListToIdentifier :: Extensions -> [Inline] -> T.Text inlineListToIdentifier exts = dropNonLetter . filterAscii . toIdent . stringify . walk unEmojify where @@ -476,23 +525,23 @@ inlineListToIdentifier exts = unEmoji x = x dropNonLetter | extensionEnabled Ext_gfm_auto_identifiers exts = id - | otherwise = dropWhile (not . isAlpha) + | otherwise = T.dropWhile (not . isAlpha) filterAscii | extensionEnabled Ext_ascii_identifiers exts - = mapMaybe toAsciiChar + = T.pack . mapMaybe toAsciiChar . T.unpack | otherwise = id toIdent | extensionEnabled Ext_gfm_auto_identifiers exts = - filterPunct . spaceToDash . map toLower - | otherwise = intercalate "-" . words . filterPunct . map toLower - filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c) + filterPunct . spaceToDash . T.toLower + | otherwise = T.intercalate "-" . T.words . filterPunct . T.toLower + filterPunct = T.filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c) isAllowedPunct c | extensionEnabled Ext_gfm_auto_identifiers exts = c == '-' || c == '_' || generalCategory c `elem` [NonSpacingMark, SpacingCombiningMark, EnclosingMark, ConnectorPunctuation] | otherwise = c == '_' || c == '-' || c == '.' - spaceToDash = map (\c -> if isSpace c then '-' else c) + spaceToDash = T.map (\c -> if isSpace c then '-' else c) -- | Put a list of Pandoc blocks into a hierarchical structure: @@ -529,7 +578,7 @@ makeSections numbering mbBaseLevel bs = -- don't touch number if already present case lookup "number" kvs of Nothing | numbering -> - ("number", intercalate "." (map show newnum)) : kvs + ("number", T.intercalate "." (map tshow newnum)) : kvs _ -> kvs) return $ Div divattr (Header level' attr title' : sectionContents') : rest' @@ -542,7 +591,7 @@ makeSections numbering mbBaseLevel bs = let inner' = case inner of (Div (dident',dclasses',dkvs') zs@(Header{}:zs') : ws) - | null dident -> + | T.null dident -> Div (dident',dclasses' ++ dclasses,dkvs' ++ dkvs) zs : ws | otherwise -> -- keep id on header so we don't lose anchor Div (dident,dclasses ++ dclasses',dkvs ++ dkvs') @@ -564,7 +613,7 @@ headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. -- Second argument is a list of already used identifiers. -uniqueIdent :: Extensions -> [Inline] -> Set.Set String -> String +uniqueIdent :: Extensions -> [Inline] -> Set.Set T.Text -> T.Text uniqueIdent exts title' usedIdents = if baseIdent `Set.member` usedIdents then case find (\x -> not $ numIdent x `Set.member` usedIdents) @@ -577,7 +626,7 @@ uniqueIdent exts title' usedIdents = baseIdent = case inlineListToIdentifier exts title' of "" -> "section" x -> x - numIdent n = baseIdent ++ "-" ++ show n + numIdent n = baseIdent <> "-" <> tshow n -- | True if block is a Header block. isHeaderBlock :: Block -> Bool @@ -664,7 +713,7 @@ handleTaskListItem handleInlines exts bls = -- | Set a field of a 'Meta' object. If the field already has a value, -- convert it into a list with the new value appended to the old value(s). addMetaField :: ToMetaValue a - => String + => T.Text -> a -> Meta -> Meta @@ -686,12 +735,16 @@ makeMeta title authors date = -- | Remove soft breaks between East Asian characters. eastAsianLineBreakFilter :: Pandoc -> Pandoc eastAsianLineBreakFilter = bottomUp go - where go (x:SoftBreak:y:zs) = - case (stringify x, stringify y) of - (xs@(_:_), c:_) - | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs - _ -> x:SoftBreak:y:zs - go xs = xs + where go (x:SoftBreak:y:zs) + | Just (_, b) <- T.unsnoc $ stringify x + , Just (c, _) <- T.uncons $ stringify y + , charWidth b == 2 + , charWidth c == 2 + = x:y:zs + | otherwise + = x:SoftBreak:y:zs + go xs + = xs -- | Builder for underline. -- This probably belongs in Builder.hs in pandoc-types. @@ -702,27 +755,28 @@ underlineSpan = B.spanWith ("", ["underline"], []) -- | Set of HTML elements that are represented as Span with a class equal as -- the element tag itself. htmlSpanLikeElements :: Set.Set T.Text -htmlSpanLikeElements = Set.fromList [T.pack "kbd", T.pack "mark", T.pack "dfn"] +htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"] -- | Returns the first sentence in a list of inlines, and the rest. breakSentence :: [Inline] -> ([Inline], [Inline]) breakSentence [] = ([],[]) breakSentence xs = - let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True - isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True - isSentenceEndInline LineBreak = True - isSentenceEndInline _ = False + let isSentenceEndInline (Str ys) + | Just (_, c) <- T.unsnoc ys = c == '.' || c == '?' + isSentenceEndInline LineBreak = True + isSentenceEndInline _ = False (as, bs) = break isSentenceEndInline xs in case bs of - [] -> (as, []) - [c] -> (as ++ [c], []) - (c:Space:cs) -> (as ++ [c], cs) - (c:SoftBreak:cs) -> (as ++ [c], cs) - (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) - (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) - (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) - (c:cs) -> (as ++ [c] ++ ds, es) - where (ds, es) = breakSentence cs + [] -> (as, []) + [c] -> (as ++ [c], []) + (c:Space:cs) -> (as ++ [c], cs) + (c:SoftBreak:cs) -> (as ++ [c], cs) + (Str ".":Str s@(T.uncons -> Just (')',_)):cs) + -> (as ++ [Str ".", Str s], cs) + (x@(Str (T.stripPrefix ".)" -> Just _)):cs) -> (as ++ [x], cs) + (LineBreak:x@(Str (T.uncons -> Just ('.',_))):cs) -> (as ++[LineBreak], x:cs) + (c:cs) -> (as ++ [c] ++ ds, es) + where (ds, es) = breakSentence cs -- | Split a list of inlines into sentences. splitSentences :: [Inline] -> [[Inline]] @@ -763,10 +817,11 @@ filterIpynbOutput mode = walk go removeANSI (CodeBlock attr code) = CodeBlock attr (removeANSIEscapes code) removeANSI x = x - removeANSIEscapes [] = [] - removeANSIEscapes ('\x1b':'[':cs) = - removeANSIEscapes (drop 1 $ dropWhile (/='m') cs) - removeANSIEscapes (c:cs) = c : removeANSIEscapes cs + removeANSIEscapes t + | Just cs <- T.stripPrefix "\x1b[" t = + removeANSIEscapes $ T.drop 1 $ T.dropWhile (/='m') cs + | Just (c, cs) <- T.uncons t = T.cons c $ removeANSIEscapes cs + | otherwise = "" go x = x -- @@ -774,12 +829,12 @@ filterIpynbOutput mode = walk go -- -- | Render HTML tags. -renderTags' :: [Tag String] -> String +renderTags' :: [Tag T.Text] -> T.Text renderTags' = renderTagsOptions renderOptions{ optMinimize = matchTags ["hr", "br", "img", "meta", "link"] , optRawTag = matchTags ["script", "style"] } - where matchTags tags = flip elem tags . map toLower + where matchTags tags = flip elem tags . T.toLower -- -- File handling @@ -826,8 +881,8 @@ collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories -- Convert the path part of a file: URI to a regular path. -- On windows, @/c:/foo@ should be @c:/foo@. -- On linux, @/foo@ should be @/foo@. -uriPathToPath :: String -> FilePath -uriPathToPath path = +uriPathToPath :: T.Text -> FilePath +uriPathToPath (T.unpack -> path) = #ifdef _WINDOWS case path of '/':ps -> ps @@ -853,7 +908,7 @@ filteredFilesFromArchive zf f = -- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus -- the unofficial schemes doi, javascript, isbn, pmid. -schemes :: Set.Set String +schemes :: Set.Set T.Text schemes = Set.fromList -- Official IANA schemes [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs" @@ -905,11 +960,11 @@ schemes = Set.fromList -- | Check if the string is a valid URL with a IANA or frequently used but -- unofficial scheme (see @schemes@). -isURI :: String -> Bool -isURI = maybe False hasKnownScheme . parseURI +isURI :: T.Text -> Bool +isURI = maybe False hasKnownScheme . parseURI . T.unpack where - hasKnownScheme = (`Set.member` schemes) . map toLower . - filter (/= ':') . uriScheme + hasKnownScheme = (`Set.member` schemes) . T.toLower . + T.filter (/= ':') . T.pack . uriScheme --- --- Squash blocks into inlines @@ -962,12 +1017,14 @@ defaultBlocksSeparator = -- Safe read -- -safeRead :: (MonadPlus m, Read a) => String -> m a -safeRead s = case reads s of +safeRead :: (MonadPlus m, Read a) => T.Text -> m a +safeRead = safeStrRead . T.unpack + +safeStrRead :: (MonadPlus m, Read a) => String -> m a +safeStrRead s = case reads s of (d,x):_ | all isSpace x -> return d _ -> mzero - -- -- User data directory -- diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 4a53a1c23..324731c11 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Slides Copyright : Copyright (C) 2012-2019 John MacFarlane diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 640197c45..8d92e306b 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -25,13 +25,14 @@ import Text.DocTemplates (Template, compileTemplate, renderTemplate) import Text.Pandoc.Class (PandocMonad, readDataFile) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Text (Text) +import qualified Data.Text as T -- | Get default template for the specified writer. getDefaultTemplate :: PandocMonad m - => String -- ^ Name of writer + => Text -- ^ Name of writer -> m Text getDefaultTemplate writer = do - let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions + let format = T.takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions case format of "native" -> return "" "json" -> return "" @@ -51,7 +52,7 @@ getDefaultTemplate writer = do "markdown_phpextra" -> getDefaultTemplate "markdown" "gfm" -> getDefaultTemplate "commonmark" _ -> do - let fname = "templates" </> "default" <.> format + let fname = "templates" </> "default" <.> T.unpack format UTF8.toText <$> readDataFile fname diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs index 50b172eda..cbee5ef8c 100644 --- a/src/Text/Pandoc/Translations.hs +++ b/src/Text/Pandoc/Translations.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Translations Copyright : Copyright (C) 2017-2019 John MacFarlane @@ -34,7 +35,7 @@ import Data.Aeson.Types (Value(..), FromJSON(..)) import qualified Data.Aeson.Types as Aeson import qualified Data.HashMap.Strict as HM import qualified Data.Map as M -import Data.Text as T +import qualified Data.Text as T import qualified Data.YAML as YAML import GHC.Generics (Generic) import Text.Pandoc.Shared (safeRead) @@ -65,11 +66,11 @@ data Term = | To deriving (Show, Eq, Ord, Generic, Enum, Read) -newtype Translations = Translations (M.Map Term String) +newtype Translations = Translations (M.Map Term T.Text) deriving (Show, Generic, Semigroup, Monoid) instance FromJSON Term where - parseJSON (String t) = case safeRead (T.unpack t) of + parseJSON (String t) = case safeRead t of Just t' -> pure t' Nothing -> Prelude.fail $ "Invalid Term name " ++ show t @@ -77,7 +78,7 @@ instance FromJSON Term where instance YAML.FromYAML Term where parseYAML (YAML.Scalar _ (YAML.SStr t)) = - case safeRead (T.unpack t) of + case safeRead t of Just t' -> pure t' Nothing -> Prelude.fail $ "Invalid Term name " ++ show t @@ -88,11 +89,11 @@ instance FromJSON Translations where xs <- mapM addItem (HM.toList hm) return $ Translations (M.fromList xs) where addItem (k,v) = - case safeRead (T.unpack k) of + case safeRead k of Nothing -> Prelude.fail $ "Invalid Term name " ++ show k Just t -> case v of - (String s) -> return (t, T.unpack $ T.strip s) + (String s) -> return (t, T.strip s) inv -> Aeson.typeMismatch "String" inv parseJSON invalid = Aeson.typeMismatch "Translations" invalid @@ -100,22 +101,22 @@ instance YAML.FromYAML Translations where parseYAML = YAML.withMap "Translations" $ \tr -> Translations .M.fromList <$> mapM addItem (M.toList tr) where addItem (n@(YAML.Scalar _ (YAML.SStr k)), v) = - case safeRead (T.unpack k) of + case safeRead k of Nothing -> YAML.typeMismatch "Term" n Just t -> case v of (YAML.Scalar _ (YAML.SStr s)) -> - return (t, T.unpack (T.strip s)) + return (t, T.strip s) n' -> YAML.typeMismatch "String" n' addItem (n, _) = YAML.typeMismatch "String" n -lookupTerm :: Term -> Translations -> Maybe String +lookupTerm :: Term -> Translations -> Maybe T.Text lookupTerm t (Translations tm) = M.lookup t tm -readTranslations :: String -> Either String Translations +readTranslations :: T.Text -> Either T.Text Translations readTranslations s = - case YAML.decodeStrict $ UTF8.fromString s of - Left (pos,err') -> Left $ err' ++ + case YAML.decodeStrict $ UTF8.fromText s of + Left (pos,err') -> Left $ T.pack $ err' ++ " (line " ++ show (YAML.posLine pos) ++ " column " ++ show (YAML.posColumn pos) ++ ")" Right (t:_) -> Right t diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index c88f860bb..724c22a50 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -74,8 +75,8 @@ import Control.Monad.Except (throwError) import Control.Monad (unless) import Data.Aeson import qualified Data.ByteString.Lazy as BL -import Data.List (intercalate) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Options @@ -121,7 +122,7 @@ data Writer m = TextWriter (WriterOptions -> Pandoc -> m Text) | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString) -- | Association list of formats and writers. -writers :: PandocMonad m => [ ( String, Writer m) ] +writers :: PandocMonad m => [ (Text, Writer m) ] writers = [ ("native" , TextWriter writeNative) ,("json" , TextWriter $ \o d -> writeJSON o d) @@ -179,11 +180,11 @@ writers = [ ] -- | Retrieve writer, extensions based on formatSpec (format+extensions). -getWriter :: PandocMonad m => String -> m (Writer m, Extensions) +getWriter :: PandocMonad m => Text -> m (Writer m, Extensions) getWriter s = case parseFormatSpec s of Left e -> throwError $ PandocAppError - $ intercalate "\n" [m | Message m <- errorMessages e] + $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e] Right (writerName, extsToEnable, extsToDisable) -> case lookup writerName writers of Nothing -> throwError $ @@ -198,7 +199,7 @@ getWriter s = unless (extensionEnabled ext allExts) $ throwError $ PandocUnsupportedExtensionError - (drop 4 $ show ext) writerName) + (T.drop 4 $ T.pack $ show ext) writerName) (extsToEnable ++ extsToDisable) return (w, exts) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index c0f215d57..1c4c24f7f 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -22,9 +22,9 @@ AsciiDoc: <http://www.methods.co.nz/asciidoc/> module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where import Prelude import Control.Monad.State.Strict -import Data.Char (isPunctuation, isSpace, toLower, toUpper) -import Data.List (intercalate, intersperse, stripPrefix) -import Data.Maybe (fromMaybe, isJust, listToMaybe) +import Data.Char (isPunctuation, isSpace) +import Data.List (intercalate, intersperse) +import Data.Maybe (fromMaybe, isJust) import qualified Data.Set as Set import qualified Data.Text as T import Data.Text (Text) @@ -39,11 +39,11 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared -data WriterState = WriterState { defListMarker :: String +data WriterState = WriterState { defListMarker :: Text , orderedListLevel :: Int , bulletListLevel :: Int , intraword :: Bool - , autoIds :: Set.Set String + , autoIds :: Set.Set Text , asciidoctorVariant :: Bool , inList :: Bool , hasMath :: Bool @@ -98,12 +98,12 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do Just tpl -> renderTemplate tpl context -- | Escape special characters for AsciiDoc. -escapeString :: String -> String +escapeString :: Text -> Text escapeString = escapeStringUsing escs where escs = backslashEscapes "{" -- | Ordered list start parser for use in Para below. -olMarker :: Parser [Char] ParserState Char +olMarker :: Parser Text ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && @@ -113,15 +113,18 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker -- | True if string begins with an ordered list marker -- or would be interpreted as an AsciiDoc option command -needsEscaping :: String -> Bool +needsEscaping :: Text -> Bool needsEscaping s = beginsWithOrderedListMarker s || isBracketed s where beginsWithOrderedListMarker str = - case runParser olMarker defaultParserState "para start" (take 10 str) of + case runParser olMarker defaultParserState "para start" (T.take 10 str) of Left _ -> False Right _ -> True - isBracketed ('[':cs) = listToMaybe (reverse cs) == Just ']' - isBracketed _ = False + isBracketed t + | Just ('[', t') <- T.uncons t + , Just (_, ']') <- T.unsnoc t' + = True + | otherwise = False -- | Convert Pandoc block element to asciidoc. blockToAsciiDoc :: PandocMonad m @@ -137,12 +140,13 @@ blockToAsciiDoc opts (Div (id',"section":_,_) blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines return $ contents <> blankline -blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = - blockToAsciiDoc opts (Para [Image attr alt (src,tit)]) +blockToAsciiDoc opts (Para [Image attr alt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = blockToAsciiDoc opts (Para [Image attr alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do contents <- inlineListToAsciiDoc opts inlines -- escape if para starts with ordered list marker - let esc = if needsEscaping (T.unpack $ render Nothing contents) + let esc = if needsEscaping (render Nothing contents) then text "{empty}" else empty return $ esc <> contents <> blankline @@ -154,7 +158,7 @@ blockToAsciiDoc opts (LineBlock lns) = do contents <- joinWithLinefeeds <$> mapM docify lns return $ "[verse]" $$ text "--" $$ contents $$ text "--" $$ blankline blockToAsciiDoc _ b@(RawBlock f s) - | f == "asciidoc" = return $ text s + | f == "asciidoc" = return $ literal s | otherwise = do report $ BlockNotRendered b return empty @@ -165,20 +169,20 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do ids <- gets autoIds let autoId = uniqueIdent (writerExtensions opts) inlines ids modify $ \st -> st{ autoIds = Set.insert autoId ids } - let identifier = if null ident || + let identifier = if T.null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) then empty - else "[[" <> text ident <> "]]" + else "[[" <> literal ident <> "]]" return $ identifier $$ nowrap (text (replicate (level + 1) '=') <> space <> contents) <> blankline blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush ( if null classes - then "...." $$ text str $$ "...." - else attrs $$ "----" $$ text str $$ "----") + then "...." $$ literal str $$ "...." + else attrs $$ "----" $$ literal str $$ "----") <> blankline - where attrs = "[" <> text (intercalate "," ("source" : classes)) <> "]" + where attrs = "[" <> literal (T.intercalate "," ("source" : classes)) <> "]" blockToAsciiDoc opts (BlockQuote blocks) = do contents <- blockListToAsciiDoc opts blocks let isBlock (BlockQuote _) = True @@ -258,11 +262,11 @@ blockToAsciiDoc opts (OrderedList (start, sty, _delim) items) = do DefaultStyle -> [] Decimal -> ["arabic"] Example -> [] - _ -> [map toLower (show sty)] - let listStart = if start == 1 then [] else ["start=" ++ show start] - let listoptions = case intercalate ", " (listStyle ++ listStart) of - [] -> empty - x -> brackets (text x) + _ -> [T.toLower (tshow sty)] + let listStart = if start == 1 then [] else ["start=" <> tshow start] + let listoptions = case T.intercalate ", " (listStyle ++ listStart) of + "" -> empty + x -> brackets (literal x) inlist <- gets inList modify $ \st -> st{ inList = True } contents <- mapM (orderedListItemToAsciiDoc opts) items @@ -275,7 +279,7 @@ blockToAsciiDoc opts (DefinitionList items) = do modify $ \st -> st{ inList = inlist } return $ mconcat contents <> blankline blockToAsciiDoc opts (Div (ident,classes,_) bs) = do - let identifier = if null ident then empty else "[[" <> text ident <> "]]" + let identifier = if T.null ident then empty else "[[" <> literal ident <> "]]" let admonitions = ["attention","caution","danger","error","hint", "important","note","tip","warning"] contents <- @@ -290,7 +294,7 @@ blockToAsciiDoc opts (Div (ident,classes,_) bs) = do else ("." <>) <$> blockListToAsciiDoc opts titleBs admonitionBody <- blockListToAsciiDoc opts bodyBs - return $ "[" <> text (map toUpper l) <> "]" $$ + return $ "[" <> literal (T.toUpper l) <> "]" $$ chomp admonitionTitle $$ "====" $$ chomp admonitionBody $$ @@ -365,7 +369,7 @@ definitionListItemToAsciiDoc opts (label, defs) = do defs' <- mapM defsToAsciiDoc defs modify (\st -> st{ defListMarker = marker }) let contents = nest 2 $ vcat $ intersperse divider $ map chomp defs' - return $ labelText <> text marker <> cr <> contents <> cr + return $ labelText <> literal marker <> cr <> contents <> cr -- | Convert list of Pandoc block elements to asciidoc. blockListToAsciiDoc :: PandocMonad m @@ -408,10 +412,11 @@ inlineListToAsciiDoc opts lst = do isSpacy _ SoftBreak = True -- Note that \W characters count as spacy in AsciiDoc -- for purposes of determining interword: - isSpacy End (Str xs) = case reverse xs of - c:_ -> isPunctuation c || isSpace c - _ -> False - isSpacy Start (Str (c:_)) = isPunctuation c || isSpace c + isSpacy End (Str xs) = case T.unsnoc xs of + Just (_, c) -> isPunctuation c || isSpace c + _ -> False + isSpacy Start (Str xs) + | Just (c, _) <- T.uncons xs = isPunctuation c || isSpace c isSpacy _ _ = False setIntraword :: PandocMonad m => Bool -> ADW m () @@ -456,25 +461,25 @@ inlineToAsciiDoc opts (Quoted qt lst) = do | otherwise -> [Str "``"] ++ lst ++ [Str "''"] inlineToAsciiDoc _ (Code _ str) = do isAsciidoctor <- gets asciidoctorVariant - let contents = text (escapeStringUsing (backslashEscapes "`") str) + let contents = literal (escapeStringUsing (backslashEscapes "`") str) return $ if isAsciidoctor then text "`+" <> contents <> "+`" else text "`" <> contents <> "`" -inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str +inlineToAsciiDoc _ (Str str) = return $ literal $ escapeString str inlineToAsciiDoc _ (Math InlineMath str) = do isAsciidoctor <- gets asciidoctorVariant modify $ \st -> st{ hasMath = True } let content = if isAsciidoctor - then text str - else "$" <> text str <> "$" + then literal str + else "$" <> literal str <> "$" return $ "latexmath:[" <> content <> "]" inlineToAsciiDoc _ (Math DisplayMath str) = do isAsciidoctor <- gets asciidoctorVariant modify $ \st -> st{ hasMath = True } let content = if isAsciidoctor - then text str - else "\\[" <> text str <> "\\]" + then literal str + else "\\[" <> literal str <> "\\]" inlist <- gets inList let sepline = if inlist then text "+" @@ -483,7 +488,7 @@ inlineToAsciiDoc _ (Math DisplayMath str) = do (cr <> sepline) $$ "[latexmath]" $$ "++++" $$ content $$ "++++" <> cr inlineToAsciiDoc _ il@(RawInline f s) - | f == "asciidoc" = return $ text s + | f == "asciidoc" = return $ literal s | otherwise = do report $ InlineNotRendered il return empty @@ -501,38 +506,38 @@ inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do -- abs: http://google.cod[Google] -- or my@email.com[email john] linktext <- inlineListToAsciiDoc opts txt - let isRelative = ':' `notElem` src + let isRelative = T.all (/= ':') src let prefix = if isRelative then text "link:" else empty - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) let useAuto = case txt of [Str s] | escapeURI s == srcSuffix -> True _ -> False return $ if useAuto - then text srcSuffix - else prefix <> text src <> "[" <> linktext <> "]" + then literal srcSuffix + else prefix <> literal src <> "[" <> linktext <> "]" inlineToAsciiDoc opts (Image attr alternate (src, tit)) = do -- image:images/logo.png[Company logo, title="blah"] let txt = if null alternate || (alternate == [Str ""]) then [Str "image"] else alternate linktext <- inlineListToAsciiDoc opts txt - let linktitle = if null tit + let linktitle = if T.null tit then empty - else ",title=\"" <> text tit <> "\"" + else ",title=\"" <> literal tit <> "\"" showDim dir = case dimension dir attr of Just (Percent a) -> ["scaledwidth=" <> text (show (Percent a))] Just dim -> - [text (show dir) <> "=" <> text (showInPixel opts dim)] + [text (show dir) <> "=" <> literal (showInPixel opts dim)] Nothing -> [] dimList = showDim Width ++ showDim Height dims = if null dimList then empty else "," <> mconcat (intersperse "," dimList) - return $ "image:" <> text src <> "[" <> linktext <> linktitle <> dims <> "]" + return $ "image:" <> literal src <> "[" <> linktext <> linktitle <> dims <> "]" inlineToAsciiDoc opts (Note [Para inlines]) = inlineToAsciiDoc opts (Note [Plain inlines]) inlineToAsciiDoc opts (Note [Plain inlines]) = do @@ -544,9 +549,9 @@ inlineToAsciiDoc opts (Span (ident,classes,_) ils) = do contents <- inlineListToAsciiDoc opts ils isIntraword <- gets intraword let marker = if isIntraword then "##" else "#" - if null ident && null classes + if T.null ident && null classes then return contents else do - let modifier = brackets $ text $ unwords $ - [ '#':ident | not (null ident)] ++ map ('.':) classes + let modifier = brackets $ literal $ T.unwords $ + [ "#" <> ident | not (T.null ident)] ++ map ("." <>) classes return $ modifier <> marker <> contents <> marker diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 8e6e8af51..e2d2b8e4d 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.CommonMark Copyright : Copyright (C) 2015-2019 John MacFarlane @@ -28,7 +29,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared (capitalize, isHeaderBlock, isTightList, - linesToPara, onlySimpleTableCells, substitute, taskListItemToAscii) + linesToPara, onlySimpleTableCells, taskListItemToAscii, tshow) import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Walk (walk, walkM) import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) @@ -73,7 +74,7 @@ processNotes :: Inline -> State [[Block]] Inline processNotes (Note bs) = do modify (bs :) notes <- get - return $ Str $ "[" ++ show (length notes) ++ "]" + return $ Str $ "[" <> tshow (length notes) <> "]" processNotes x = return x node :: NodeType -> [Node] -> Node @@ -109,14 +110,14 @@ blockToNodes opts (Para xs) ns = return (node PARAGRAPH (inlinesToNodes opts xs) : ns) blockToNodes opts (LineBlock lns) ns = blockToNodes opts (linesToPara lns) ns blockToNodes _ (CodeBlock (_,classes,_) xs) ns = return - (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns) + (node (CODE_BLOCK (T.unwords classes) xs) [] : ns) blockToNodes opts (RawBlock (Format f) xs) ns | f == "html" && isEnabled Ext_raw_html opts - = return (node (HTML_BLOCK (T.pack xs)) [] : ns) + = return (node (HTML_BLOCK xs) [] : ns) | (f == "latex" || f == "tex") && isEnabled Ext_raw_tex opts - = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns) + = return (node (CUSTOM_BLOCK xs T.empty) [] : ns) | f == "markdown" - = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns) + = return (node (CUSTOM_BLOCK xs T.empty) [] : ns) | otherwise = return ns blockToNodes opts (BlockQuote bs) ns = do nodes <- blocksToNodes opts bs @@ -169,9 +170,9 @@ blockToNodes opts t@(Table capt aligns _widths headers rows) ns = do let capt' = node PARAGRAPH (inlinesToNodes opts capt) -- backslash | in code and raw: let fixPipe (Code attr xs) = - Code attr (substitute "|" "\\|" xs) + Code attr (T.replace "|" "\\|" xs) fixPipe (RawInline format xs) = - RawInline format (substitute "|" "\\|" xs) + RawInline format (T.replace "|" "\\|" xs) fixPipe x = x let toCell [Plain ils] = T.strip $ nodeToCommonmark [] Nothing @@ -276,19 +277,19 @@ inlineToNodes opts (SmallCaps xs) = [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) else (inlinesToNodes opts (capitalize xs) ++) inlineToNodes opts (Link _ ils (url,tit)) = - (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) + (node (LINK url tit) (inlinesToNodes opts ils) :) -- title beginning with fig: indicates implicit figure -inlineToNodes opts (Image alt ils (url,'f':'i':'g':':':tit)) = +inlineToNodes opts (Image alt ils (url,T.stripPrefix "fig:" -> Just tit)) = inlineToNodes opts (Image alt ils (url,tit)) inlineToNodes opts (Image _ ils (url,tit)) = - (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) + (node (IMAGE url tit) (inlinesToNodes opts ils) :) inlineToNodes opts (RawInline (Format f) xs) | f == "html" && isEnabled Ext_raw_html opts - = (node (HTML_INLINE (T.pack xs)) [] :) + = (node (HTML_INLINE xs) [] :) | (f == "latex" || f == "tex") && isEnabled Ext_raw_tex opts - = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :) + = (node (CUSTOM_INLINE xs T.empty) [] :) | f == "markdown" - = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :) + = (node (CUSTOM_INLINE xs T.empty) [] :) | otherwise = id inlineToNodes opts (Quoted qt ils) = ((node (HTML_INLINE start) [] : @@ -304,12 +305,12 @@ inlineToNodes opts (Quoted qt ils) = | writerPreferAscii opts -> ("“", "”") | otherwise -> ("“", "”") -inlineToNodes _ (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes _ (Code _ str) = (node (CODE str) [] :) inlineToNodes opts (Math mt str) = case writerHTMLMathMethod opts of WebTeX url -> let core = inlineToNodes opts - (Image nullAttr [Str str] (url ++ urlEncode str, str)) + (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) sep = if mt == DisplayMath then (node LINEBREAK [] :) else id @@ -317,14 +318,14 @@ inlineToNodes opts (Math mt str) = _ -> case mt of InlineMath -> - (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + (node (HTML_INLINE ("\\(" <> str <> "\\)")) [] :) DisplayMath -> - (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) + (node (HTML_INLINE ("\\[" <> str <> "\\]")) [] :) inlineToNodes opts (Span ("",["emoji"],kvs) [Str s]) = do case lookup "data-emoji" kvs of Just emojiname | isEnabled Ext_emoji opts -> - (node (TEXT (":" <> T.pack emojiname <> ":")) [] :) - _ -> (node (TEXT (T.pack s)) [] :) + (node (TEXT (":" <> emojiname <> ":")) [] :) + _ -> (node (TEXT s) [] :) inlineToNodes opts (Span attr ils) = let nodes = inlinesToNodes opts ils op = tagWithAttributes opts True False "span" attr @@ -336,17 +337,17 @@ inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++) inlineToNodes _ (Note _) = id -- should not occur -- we remove Note elements in preprocessing -stringToNodes :: WriterOptions -> String -> [Node] -> [Node] +stringToNodes :: WriterOptions -> Text -> [Node] -> [Node] stringToNodes opts s - | not (writerPreferAscii opts) = (node (TEXT (T.pack s)) [] :) + | not (writerPreferAscii opts) = (node (TEXT s) [] :) | otherwise = step s where step input = - let (ascii, rest) = span isAscii input - this = node (TEXT (T.pack ascii)) [] - nodes = case rest of - [] -> id - (nonAscii : rest') -> + let (ascii, rest) = T.span isAscii input + this = node (TEXT ascii) [] + nodes = case T.uncons rest of + Nothing -> id + Just (nonAscii, rest') -> let escaped = toHtml5Entities (T.singleton nonAscii) in (node (HTML_INLINE escaped) [] :) . step rest' in (this :) . nodes @@ -354,7 +355,7 @@ stringToNodes opts s toSubscriptInline :: Inline -> Maybe Inline toSubscriptInline Space = Just Space toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils -toSubscriptInline (Str s) = Str <$> traverse toSubscript s +toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s) toSubscriptInline LineBreak = Just LineBreak toSubscriptInline SoftBreak = Just SoftBreak toSubscriptInline _ = Nothing @@ -362,7 +363,7 @@ toSubscriptInline _ = Nothing toSuperscriptInline :: Inline -> Maybe Inline toSuperscriptInline Space = Just Space toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils -toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s +toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s) toSuperscriptInline LineBreak = Just LineBreak toSuperscriptInline SoftBreak = Just SoftBreak toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index bef1e6265..2ec86fd78 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ConTeXt Copyright : Copyright (C) 2007-2019 John MacFarlane @@ -15,8 +16,8 @@ Conversion of 'Pandoc' format into ConTeXt. module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Prelude import Control.Monad.State.Strict -import Data.Char (ord, isDigit, toLower) -import Data.List (intercalate, intersperse) +import Data.Char (ord, isDigit) +import Data.List (intersperse) import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -89,14 +90,14 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) $ maybe id (\l -> - defField "context-lang" (text l :: Doc Text)) mblang + defField "context-lang" (literal l :: Doc Text)) mblang $ (case T.unpack . render Nothing <$> getField "papersize" metadata of Just (('a':d:ds) :: String) | all isDigit (d:ds) -> resetField "papersize" (T.pack ('A':d:ds)) _ -> id) - $ (case toLower <$> lookupMetaString "pdfa" meta of + $ (case T.toLower $ lookupMetaString "pdfa" meta of "true" -> resetField "pdfa" (T.pack "1b:2005") _ -> id) metadata let context' = defField "context-dir" (maybe mempty toContextDir @@ -114,7 +115,7 @@ toContextDir = fmap (\t -> case t of _ -> t) -- | escape things as needed for ConTeXt -escapeCharForConTeXt :: WriterOptions -> Char -> String +escapeCharForConTeXt :: WriterOptions -> Char -> Text escapeCharForConTeXt opts ch = let ligatures = isEnabled Ext_smart opts in case ch of @@ -133,18 +134,18 @@ escapeCharForConTeXt opts ch = '\x2013' | ligatures -> "--" '\x2019' | ligatures -> "'" '\x2026' -> "\\ldots{}" - x -> [x] + x -> T.singleton x -- | Escape string for ConTeXt -stringToConTeXt :: WriterOptions -> String -> String -stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) +stringToConTeXt :: WriterOptions -> Text -> Text +stringToConTeXt opts = T.concatMap (escapeCharForConTeXt opts) -- | Sanitize labels -toLabel :: String -> String -toLabel z = concatMap go z +toLabel :: Text -> Text +toLabel z = T.concatMap go z where go x - | x `elem` ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) - | otherwise = [x] + | x `elem` ("\\#[]\",{}%()|=" :: String) = "ux" <> T.pack (printf "%x" (ord x)) + | otherwise = T.singleton x -- | Convert Pandoc block element to ConTeXt. blockToConTeXt :: PandocMonad m => Block -> WM m (Doc Text) @@ -157,14 +158,16 @@ blockToConTeXt (Div attr@(_,"section":_,_) return $ header' $$ innerContents $$ footer' blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure -blockToConTeXt (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do - capt <- inlineListToConTeXt txt - img <- inlineToConTeXt (Image attr txt (src, "")) - let (ident, _, _) = attr - label = if null ident - then empty - else "[]" <> brackets (text $ toLabel ident) - return $ blankline $$ "\\placefigure" <> label <> braces capt <> img <> blankline +blockToConTeXt (Para [Image attr txt (src,tgt)]) + | Just _ <- T.stripPrefix "fig:" tgt + = do + capt <- inlineListToConTeXt txt + img <- inlineToConTeXt (Image attr txt (src, "")) + let (ident, _, _) = attr + label = if T.null ident + then empty + else "[]" <> brackets (literal $ toLabel ident) + return $ blankline $$ "\\placefigure" <> label <> braces capt <> img <> blankline blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst return $ contents <> blankline @@ -175,17 +178,17 @@ blockToConTeXt (BlockQuote lst) = do contents <- blockListToConTeXt lst return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline blockToConTeXt (CodeBlock _ str) = - return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline + return $ flush ("\\starttyping" <> cr <> literal str <> cr <> "\\stoptyping") $$ blankline -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt b@(RawBlock f str) - | f == Format "context" || f == Format "tex" = return $ text str <> blankline + | f == Format "context" || f == Format "tex" = return $ literal str <> blankline | otherwise = empty <$ report (BlockNotRendered b) blockToConTeXt (Div (ident,_,kvs) bs) = do let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" mblang <- fromBCP47 (lookup "lang" kvs) - let wrapRef txt = if null ident + let wrapRef txt = if T.null ident then txt - else ("\\reference" <> brackets (text $ toLabel ident) <> + else ("\\reference" <> brackets (literal $ toLabel ident) <> braces empty <> "%") $$ txt wrapDir = case lookup "dir" kvs of Just "rtl" -> align "righttoleft" @@ -193,7 +196,7 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do _ -> id wrapLang txt = case mblang of Just lng -> "\\start\\language[" - <> text lng <> "]" $$ txt $$ "\\stop" + <> literal lng <> "]" $$ txt $$ "\\stop" Nothing -> txt wrapBlank txt = blankline <> txt <> blankline (wrapBlank . wrapLang . wrapDir . wrapRef) <$> blockListToConTeXt bs @@ -202,29 +205,29 @@ blockToConTeXt (BulletList lst) = do return $ ("\\startitemize" <> if isTightList lst then brackets "packed" else empty) $$ - vcat contents $$ text "\\stopitemize" <> blankline + vcat contents $$ literal "\\stopitemize" <> blankline blockToConTeXt (OrderedList (start, style', delim) lst) = do st <- get let level = stOrderedListLevel st put st {stOrderedListLevel = level + 1} contents <- mapM listItemToConTeXt lst put st {stOrderedListLevel = level} - let start' = if start == 1 then "" else "start=" ++ show start + let start' = if start == 1 then "" else "start=" <> tshow start let delim' = case delim of DefaultDelim -> "" Period -> "stopper=." OneParen -> "stopper=)" TwoParens -> "left=(,stopper=)" - let width = maximum $ map length $ take (length contents) + let width = maximum $ map T.length $ take (length contents) (orderedListMarkers (start, style', delim)) let width' = (toEnum width + 1) / 2 let width'' = if width' > (1.5 :: Double) - then "width=" ++ show width' ++ "em" + then "width=" <> tshow width' <> "em" else "" - let specs2Items = filter (not . null) [start', delim', width''] + let specs2Items = filter (not . T.null) [start', delim', width''] let specs2 = if null specs2Items then "" - else "[" ++ intercalate "," specs2Items ++ "]" + else "[" <> T.intercalate "," specs2Items <> "]" let style'' = '[': (case style' of DefaultStyle -> orderedListStyles !! level Decimal -> 'n' @@ -234,8 +237,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do LowerAlpha -> 'a' UpperAlpha -> 'A') : if isTightList lst then ",packed]" else "]" - let specs = style'' ++ specs2 - return $ "\\startitemize" <> text specs $$ vcat contents $$ + let specs = T.pack style'' <> specs2 + return $ "\\startitemize" <> literal specs $$ vcat contents $$ "\\stopitemize" <> blankline blockToConTeXt (DefinitionList lst) = liftM vcat $ mapM defListItemToConTeXt lst @@ -343,9 +346,9 @@ inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst addStruts xs addStruts (x:xs) = x : addStruts xs addStruts [] = [] - isSpacey Space = True - isSpacey (Str ('\160':_)) = True - isSpacey _ = False + isSpacey Space = True + isSpacey (Str (T.uncons -> Just ('\160',_))) = True + isSpacey _ = False -- | Convert inline element to ConTeXt inlineToConTeXt :: PandocMonad m @@ -369,11 +372,11 @@ inlineToConTeXt (Subscript lst) = do inlineToConTeXt (SmallCaps lst) = do contents <- inlineListToConTeXt lst return $ braces $ "\\sc " <> contents -inlineToConTeXt (Code _ str) | not ('{' `elem` str || '}' `elem` str) = - return $ "\\type" <> braces (text str) +inlineToConTeXt (Code _ str) | not ('{' `elemText` str || '}' `elemText` str) = + return $ "\\type" <> braces (literal str) inlineToConTeXt (Code _ str) = do opts <- gets stOptions - return $ "\\mono" <> braces (text $ stringToConTeXt opts str) + return $ "\\mono" <> braces (literal $ stringToConTeXt opts str) inlineToConTeXt (Quoted SingleQuote lst) = do contents <- inlineListToConTeXt lst return $ "\\quote" <> braces contents @@ -383,15 +386,15 @@ inlineToConTeXt (Quoted DoubleQuote lst) = do inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst inlineToConTeXt (Str str) = do opts <- gets stOptions - return $ text $ stringToConTeXt opts str + return $ literal $ stringToConTeXt opts str inlineToConTeXt (Math InlineMath str) = - return $ char '$' <> text str <> char '$' + return $ char '$' <> literal str <> char '$' inlineToConTeXt (Math DisplayMath str) = - return $ text "\\startformula " <> text str <> text " \\stopformula" <> space + return $ literal "\\startformula " <> literal str <> literal " \\stopformula" <> space inlineToConTeXt il@(RawInline f str) - | f == Format "tex" || f == Format "context" = return $ text str + | f == Format "tex" || f == Format "context" = return $ literal str | otherwise = empty <$ report (InlineNotRendered il) -inlineToConTeXt LineBreak = return $ text "\\crlf" <> cr +inlineToConTeXt LineBreak = return $ literal "\\crlf" <> cr inlineToConTeXt SoftBreak = do wrapText <- gets (writerWrapText . stOptions) return $ case wrapText of @@ -400,39 +403,39 @@ inlineToConTeXt SoftBreak = do WrapPreserve -> cr inlineToConTeXt Space = return space -- Handle HTML-like internal document references to sections -inlineToConTeXt (Link _ txt ('#' : ref, _)) = do +inlineToConTeXt (Link _ txt (T.uncons -> Just ('#', ref), _)) = do opts <- gets stOptions contents <- inlineListToConTeXt txt let ref' = toLabel $ stringToConTeXt opts ref - return $ text "\\goto" + return $ literal "\\goto" <> braces contents - <> brackets (text ref') + <> brackets (literal ref') inlineToConTeXt (Link _ txt (src, _)) = do - let isAutolink = txt == [Str (unEscapeString src)] + let isAutolink = txt == [Str (T.pack $ unEscapeString $ T.unpack src)] st <- get let next = stNextRef st put $ st {stNextRef = next + 1} - let ref = "url" ++ show next + let ref = "url" <> tshow next contents <- inlineListToConTeXt txt return $ "\\useURL" - <> brackets (text ref) - <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) + <> brackets (literal ref) + <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) <> (if isAutolink then empty else brackets empty <> brackets contents) <> "\\from" - <> brackets (text ref) + <> brackets (literal ref) inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do opts <- gets stOptions - let showDim dir = let d = text (show dir) <> "=" + let showDim dir = let d = literal (tshow dir) <> "=" in case dimension dir attr of Just (Pixel a) -> - [d <> text (showInInch opts (Pixel a)) <> "in"] + [d <> literal (showInInch opts (Pixel a)) <> "in"] Just (Percent a) -> - [d <> text (showFl (a / 100)) <> "\\textwidth"] + [d <> literal (showFl (a / 100)) <> "\\textwidth"] Just dim -> - [d <> text (show dim)] + [d <> literal (tshow dim)] Nothing -> [] dimList = showDim Width ++ showDim Height @@ -441,25 +444,25 @@ inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do else brackets $ mconcat (intersperse "," dimList) clas = if null cls then empty - else brackets $ text $ toLabel $ head cls + else brackets $ literal $ toLabel $ head cls -- Use / for path separators on Windows; see #4918 - fixPathSeparators = map $ \c -> case c of - '\\' -> '/' - _ -> c + fixPathSeparators = T.map $ \c -> case c of + '\\' -> '/' + _ -> c src' = fixPathSeparators $ if isURI src then src - else unEscapeString src - return $ braces $ "\\externalfigure" <> brackets (text src') <> dims <> clas + else T.pack $ unEscapeString $ T.unpack src + return $ braces $ "\\externalfigure" <> brackets (literal src') <> dims <> clas inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents let codeBlock x@(CodeBlock _ _) = [x] codeBlock _ = [] let codeBlocks = query codeBlock contents return $ if null codeBlocks - then text "\\footnote{" <> nest 2 (chomp contents') <> char '}' - else text "\\startbuffer " <> nest 2 (chomp contents') <> - text "\\stopbuffer\\footnote{\\getbuffer}" + then literal "\\footnote{" <> nest 2 (chomp contents') <> char '}' + else literal "\\startbuffer " <> nest 2 (chomp contents') <> + literal "\\stopbuffer\\footnote{\\getbuffer}" inlineToConTeXt (Span (_,_,kvs) ils) = do mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of @@ -467,7 +470,7 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do Just "ltr" -> braces $ "\\lefttoright " <> txt _ -> txt wrapLang txt = case mblang of - Just lng -> "\\start\\language[" <> text lng + Just lng -> "\\start\\language[" <> literal lng <> "]" <> txt <> "\\stop " Nothing -> txt (wrapLang . wrapDir) <$> inlineListToConTeXt ils @@ -482,9 +485,9 @@ sectionHeader (ident,classes,kvs) hdrLevel lst = do opts <- gets stOptions contents <- inlineListToConTeXt lst levelText <- sectionLevelToText opts (ident,classes,kvs) hdrLevel - let ident' = if null ident + let ident' = if T.null ident then empty - else "reference=" <> braces (text (toLabel ident)) + else "reference=" <> braces (literal (toLabel ident)) let contents' = if isEmpty contents then empty else "title=" <> braces contents @@ -515,23 +518,23 @@ sectionLevelToText opts (_,classes,_) hdrLevel = do TopLevelSection -> hdrLevel TopLevelDefault -> hdrLevel let (section, chapter) = if "unnumbered" `elem` classes - then (text "subject", text "title") - else (text "section", text "chapter") + then (literal "subject", literal "title") + else (literal "section", literal "chapter") return $ case level' of - -1 -> text "part" + -1 -> literal "part" 0 -> chapter n | n >= 1 -> text (concat (replicate (n - 1) "sub")) <> section _ -> empty -- cannot happen -fromBCP47 :: PandocMonad m => Maybe String -> WM m (Maybe String) +fromBCP47 :: PandocMonad m => Maybe Text -> WM m (Maybe Text) fromBCP47 mbs = fromBCP47' <$> toLang mbs -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes -fromBCP47' :: Maybe Lang -> Maybe String +fromBCP47' :: Maybe Lang -> Maybe Text fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 6c4f92db0..733b29ac7 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,5 +1,6 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Custom Copyright : Copyright (C) 2012-2019 John MacFarlane @@ -17,9 +18,9 @@ import Prelude import Control.Arrow ((***)) import Control.Exception import Control.Monad (when) -import Data.Char (toLower) import Data.List (intersperse) import qualified Data.Map as M +import qualified Data.Text as T import Data.Text (Text, pack) import Data.Typeable import Foreign.Lua (Lua, Pushable) @@ -36,16 +37,16 @@ import Text.Pandoc.Writers.Shared import qualified Foreign.Lua as Lua -attrToMap :: Attr -> M.Map String String +attrToMap :: Attr -> M.Map T.Text T.Text attrToMap (id',classes,keyvals) = M.fromList $ ("id", id') - : ("class", unwords classes) + : ("class", T.unwords classes) : keyvals newtype Stringify a = Stringify a instance Pushable (Stringify Format) where - push (Stringify (Format f)) = Lua.push (map toLower f) + push (Stringify (Format f)) = Lua.push (T.toLower f) instance Pushable (Stringify [Inline]) where push (Stringify ils) = Lua.push =<< inlineListToCustom ils @@ -82,7 +83,7 @@ instance (Pushable a, Pushable b) => Pushable (KeyValue a b) where Lua.push v Lua.rawset (Lua.nthFromTop 3) -data PandocLuaException = PandocLuaException String +data PandocLuaException = PandocLuaException Text deriving (Show, Typeable) instance Exception PandocLuaException @@ -99,7 +100,7 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do -- check for error in lua script (later we'll change the return type -- to handle this more gracefully): when (stat /= Lua.OK) $ - Lua.tostring' (-1) >>= throw . PandocLuaException . UTF8.toString + Lua.tostring' (-1) >>= throw . PandocLuaException . UTF8.toText rendered <- docToCustom opts doc context <- metaToContext opts (fmap (literal . pack) . blockListToCustom) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index b0472e1d1..a72d121e1 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Docbook Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -15,9 +16,7 @@ Conversion of 'Pandoc' documents to Docbook XML. module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where import Prelude import Control.Monad.Reader -import Data.Char (toLower) import Data.Generics (everywhere, mkT) -import Data.List (isPrefixOf, stripPrefix) import Data.Monoid (Any (..)) import Data.Text (Text) import qualified Data.Text as T @@ -46,26 +45,26 @@ type DB = ReaderT DocBookVersion -- | Convert list of authors to a docbook <author> section authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines authorToDocbook opts name' = do - name <- T.unpack . render Nothing <$> inlinesToDocbook opts name' + name <- render Nothing <$> inlinesToDocbook opts name' let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing return $ B.rawInline "docbook" $ render colwidth $ - if ',' `elem` name + if T.any (== ',') name then -- last name first - let (lastname, rest) = break (==',') name + let (lastname, rest) = T.break (==',') name firstname = triml rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) + inTagsSimple "firstname" (literal $ escapeStringForXML firstname) <> + inTagsSimple "surname" (literal $ escapeStringForXML lastname) else -- last name last - let namewords = words name + let namewords = T.words name lengthname = length namewords (firstname, lastname) = case lengthname of 0 -> ("","") 1 -> ("", name) - n -> (unwords (take (n-1) namewords), last namewords) - in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) + n -> (T.unwords (take (n-1) namewords), last namewords) + in inTagsSimple "firstname" (literal $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (literal $ escapeStringForXML lastname) writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeDocbook4 opts d = @@ -141,13 +140,13 @@ listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m (Doc Text listItemToDocbook opts item = inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item) -imageToDocbook :: WriterOptions -> Attr -> String -> Doc Text +imageToDocbook :: WriterOptions -> Attr -> Text -> Doc Text imageToDocbook _ attr src = selfClosingTag "imagedata" $ - ("fileref", src) : idAndRole attr ++ dims + ("fileref", src) : idAndRole attr <> dims where - dims = go Width "width" ++ go Height "depth" + dims = go Width "width" <> go Height "depth" go dir dstr = case dimension dir attr of - Just a -> [(dstr, show a)] + Just a -> [(dstr, tshow a)] Nothing -> [] -- | Convert a Pandoc block element to Docbook. @@ -166,20 +165,20 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do 0 -> "chapter" n | n >= 1 && n <= 5 -> if version == DocBook5 then "section" - else "sect" ++ show n + else "sect" <> tshow n _ -> "simplesect" idName = if version == DocBook5 then "xml:id" else "id" - idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')] + idAttr = [(idName, writerIdentifierPrefix opts <> id') | not (T.null id')] nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] else [] - attribs = nsAttr ++ idAttr + attribs = nsAttr <> idAttr title' <- inlinesToDocbook opts ils contents <- blocksToDocbook opts bs return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents blockToDocbook opts (Div (ident,_,_) [Para lst]) = - let attribs = [("id", ident) | not (null ident)] in + let attribs = [("id", ident) | not (T.null ident)] in if hasLineBreaks lst then (flush . nowrap . inTags False "literallayout" attribs) <$> inlinesToDocbook opts lst @@ -187,7 +186,7 @@ blockToDocbook opts (Div (ident,_,_) [Para lst]) = blockToDocbook opts (Div (ident,_,_) bs) = do contents <- blocksToDocbook opts (map plainToPara bs) return $ - (if null ident + (if T.null ident then mempty else selfClosingTag "anchor" [("id", ident)]) $$ contents blockToDocbook _ h@Header{} = do @@ -196,7 +195,7 @@ blockToDocbook _ h@Header{} = do return empty blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure -blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do +blockToDocbook opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just _)]) = do alt <- inlinesToDocbook opts txt let capt = if null txt then empty @@ -216,16 +215,16 @@ blockToDocbook opts (LineBlock lns) = blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" <$> blocksToDocbook opts blocks blockToDocbook _ (CodeBlock (_,classes,_) str) = return $ - text ("<programlisting" ++ lang ++ ">") <> cr <> - flush (text (escapeStringForXML str) <> cr <> text "</programlisting>") + literal ("<programlisting" <> lang <> ">") <> cr <> + flush (literal (escapeStringForXML str) <> cr <> literal "</programlisting>") where lang = if null langs then "" - else " language=\"" ++ escapeStringForXML (head langs) ++ + else " language=\"" <> escapeStringForXML (head langs) <> "\"" - isLang l = map toLower l `elem` map (map toLower) languages + isLang l = T.toLower l `elem` map T.toLower languages langsFrom s = if isLang s then [s] - else languagesByExtension . map toLower $ s + else languagesByExtension . T.toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = do let attribs = [("spacing", "compact") | isTightList lst] @@ -241,26 +240,26 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] spacing = [("spacing", "compact") | isTightList (first:rest)] - attribs = numeration ++ spacing + attribs = numeration <> spacing items <- if start == 1 then listItemsToDocbook opts (first:rest) else do first' <- blocksToDocbook opts (map plainToPara first) rest' <- listItemsToDocbook opts rest return $ - inTags True "listitem" [("override",show start)] first' $$ + inTags True "listitem" [("override",tshow start)] first' $$ rest' return $ inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = do let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst blockToDocbook _ b@(RawBlock f str) - | f == "docbook" = return $ text str -- raw XML block + | f == "docbook" = return $ literal str -- raw XML block | f == "html" = do version <- ask if version == DocBook5 then return empty -- No html in Docbook5 - else return $ text str -- allow html for backwards compatibility + else return $ literal str -- allow html for backwards compatibility | otherwise = do report $ BlockNotRendered b return empty @@ -271,9 +270,9 @@ blockToDocbook opts (Table caption aligns widths headers rows) = do else inTagsIndented "title" <$> inlinesToDocbook opts caption let tableType = if isEmpty captionDoc then "informaltable" else "table" - percent w = show (truncate (100*w) :: Integer) ++ "*" + percent w = tshow (truncate (100*w) :: Integer) <> "*" coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec" - ([("colwidth", percent w) | w > 0] ++ + ([("colwidth", percent w) | w > 0] <> [("align", alignmentToString al)])) widths aligns head' <- if all null headers then return empty @@ -281,7 +280,7 @@ blockToDocbook opts (Table caption aligns widths headers rows) = do body' <- (inTagsIndented "tbody" . vcat) <$> mapM (tableRowToDocbook opts) rows return $ inTagsIndented tableType $ captionDoc $$ - inTags True "tgroup" [("cols", show (length headers))] ( + inTags True "tgroup" [("cols", tshow (length headers))] ( coltags $$ head' $$ body') hasLineBreaks :: [Inline] -> Bool @@ -294,7 +293,7 @@ hasLineBreaks = getAny . query isLineBreak . walk removeNote isLineBreak LineBreak = Any True isLineBreak _ = Any False -alignmentToString :: Alignment -> [Char] +alignmentToString :: Alignment -> Text alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" @@ -321,7 +320,7 @@ inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst -- | Convert an inline element to Docbook. inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m (Doc Text) -inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str +inlineToDocbook _ (Str str) = return $ literal $ escapeStringForXML str inlineToDocbook opts (Emph lst) = inTagsSimple "emphasis" <$> inlinesToDocbook opts lst inlineToDocbook opts (Strong lst) = @@ -341,18 +340,18 @@ inlineToDocbook opts (Quoted _ lst) = inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst inlineToDocbook opts (Span (ident,_,_) ils) = - ((if null ident + ((if T.null ident then mempty else selfClosingTag "anchor" [("id", ident)]) <>) <$> inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = - return $ inTagsSimple "literal" $ text (escapeStringForXML str) + return $ inTagsSimple "literal" $ literal (escapeStringForXML str) inlineToDocbook opts (Math t str) | isMathML (writerHTMLMathMethod opts) = do res <- convertMath writeMathML t str case res of Right r -> return $ inTagsSimple tagtype - $ text $ Xml.ppcElement conf + $ literal $ T.pack $ Xml.ppcElement conf $ fixNS $ removeAttr r Left il -> inlineToDocbook opts il @@ -366,19 +365,19 @@ inlineToDocbook opts (Math t str) fixNS' qname = qname{ Xml.qPrefix = Just "mml" } fixNS = everywhere (mkT fixNS') inlineToDocbook _ il@(RawInline f x) - | f == "html" || f == "docbook" = return $ text x + | f == "html" || f == "docbook" = return $ literal x | otherwise = do report $ InlineNotRendered il return empty -inlineToDocbook _ LineBreak = return $ text "\n" +inlineToDocbook _ LineBreak = return $ literal "\n" -- currently ignore, would require the option to add custom -- styles to the document inlineToDocbook _ Space = return space -- because we use \n for LineBreak, we can't do soft breaks: inlineToDocbook _ SoftBreak = return space inlineToDocbook opts (Link attr txt (src, _)) - | Just email <- stripPrefix "mailto:" src = - let emailLink = inTagsSimple "email" $ text $ + | Just email <- T.stripPrefix "mailto:" src = + let emailLink = inTagsSimple "email" $ literal $ escapeStringForXML email in case txt of [Str s] | escapeURI s == email -> return emailLink @@ -387,17 +386,17 @@ inlineToDocbook opts (Link attr txt (src, _)) char '(' <> emailLink <> char ')' | otherwise = do version <- ask - (if "#" `isPrefixOf` src - then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr + (if "#" `T.isPrefixOf` src + then inTags False "link" $ ("linkend", writerIdentifierPrefix opts <> T.drop 1 src) : idAndRole attr else if version == DocBook5 then inTags False "link" $ ("xlink:href", src) : idAndRole attr else inTags False "ulink" $ ("url", src) : idAndRole attr ) <$> inlinesToDocbook opts txt inlineToDocbook opts (Image attr _ (src, tit)) = return $ - let titleDoc = if null tit + let titleDoc = if T.null tit then empty else inTagsIndented "objectinfo" $ - inTagsIndented "title" (text $ escapeStringForXML tit) + inTagsIndented "title" (literal $ escapeStringForXML tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ imageToDocbook opts attr src inlineToDocbook opts (Note contents) = @@ -407,12 +406,12 @@ isMathML :: HTMLMathMethod -> Bool isMathML MathML = True isMathML _ = False -idAndRole :: Attr -> [(String, String)] -idAndRole (id',cls,_) = ident ++ role +idAndRole :: Attr -> [(Text, Text)] +idAndRole (id',cls,_) = ident <> role where - ident = if null id' + ident = if T.null id' then [] else [("id", id')] role = if null cls then [] - else [("role", unwords cls)] + else [("role", T.unwords cls)] diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1a8ea0118..3c387d9d9 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -32,6 +32,7 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting @@ -40,7 +41,7 @@ import Text.Pandoc.BCP47 (getLang, renderLang) import Text.Pandoc.Class (PandocMonad, report, toLang) import qualified Text.Pandoc.Class as P import Data.Time -import Text.Pandoc.UTF8 (fromStringLazy) +import Text.Pandoc.UTF8 (fromTextLazy) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Highlighting (highlight) @@ -107,8 +108,8 @@ data WriterEnv = WriterEnv{ envTextProperties :: EnvProps , envListLevel :: Int , envListNumId :: Int , envInDel :: Bool - , envChangesAuthor :: String - , envChangesDate :: String + , envChangesAuthor :: T.Text + , envChangesDate :: T.Text , envPrintWidth :: Integer } @@ -126,8 +127,8 @@ defaultWriterEnv = WriterEnv{ envTextProperties = mempty data WriterState = WriterState{ stFootnotes :: [Element] - , stComments :: [([(String,String)], [Inline])] - , stSectionIds :: Set.Set String + , stComments :: [([(T.Text, T.Text)], [Inline])] + , stSectionIds :: Set.Set T.Text , stExternalLinks :: M.Map String String , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) , stLists :: [ListMarker] @@ -163,7 +164,6 @@ defaultWriterState = WriterState{ type WS m = ReaderT WriterEnv (StateT WriterState m) - renumIdMap :: Int -> [Element] -> M.Map String String renumIdMap _ [] = M.empty renumIdMap n (e:es) @@ -189,10 +189,16 @@ renumId f renumMap e renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) +findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text +findAttrTextBy x = fmap T.pack . findAttrBy x + +lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text +lookupAttrTextBy x = fmap T.pack . lookupAttrBy x + -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: String -> String -stripInvalidChars = filter isValidChar +stripInvalidChars :: T.Text -> T.Text +stripInvalidChars = T.filter isValidChar -- | See XML reference isValidChar :: Char -> Bool @@ -230,11 +236,11 @@ writeDocx opts doc@(Pandoc meta _) = do -- Gets the template size let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz")) - let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= lookupAttrBy ((=="w") . qName) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= lookupAttrTextBy ((=="w") . qName) let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar")) - let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="left") . qName) - let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="right") . qName) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrTextBy ((=="left") . qName) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrTextBy ((=="right") . qName) -- Get the available area (converting the size and the margins to int and -- doing the difference @@ -248,7 +254,7 @@ writeDocx opts doc@(Pandoc meta _) = do mblang <- toLang $ getLang opts meta let addLang :: Element -> Element addLang e = case mblang >>= \l -> - (return . XMLC.toTree . go (renderLang l) + (return . XMLC.toTree . go (T.unpack $ renderLang l) . XMLC.fromElement) e of Just (Elem e') -> e' _ -> e -- return original @@ -289,7 +295,7 @@ writeDocx opts doc@(Pandoc meta _) = do let env = defaultWriterEnv { envRTL = isRTLmeta , envChangesAuthor = fromMaybe "unknown" username - , envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime + , envChangesDate = T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime , envPrintWidth = maybe 420 (`quot` 20) pgContentWidth } @@ -337,9 +343,9 @@ writeDocx opts doc@(Pandoc meta _) = do [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _) = mkOverrideNode ("/word/" ++ imgpath, - fromMaybe "application/octet-stream" mbMimeType) + maybe "application/octet-stream" T.unpack mbMimeType) let mkMediaOverride imgpath = - mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath) + mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -488,10 +494,10 @@ writeDocx opts doc@(Pandoc meta _) = do numbering <- parseXml refArchive distArchive numpath newNumElts <- mkNumbering (stLists st) let pandocAdded e = - case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of + case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of Just numid -> numid >= (990 :: Int) Nothing -> - case findAttrBy ((== "numId") . qName) e >>= safeRead of + case findAttrTextBy ((== "numId") . qName) e >>= safeRead of Just numid -> numid >= (1000 :: Int) Nothing -> False let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering) @@ -513,11 +519,11 @@ writeDocx opts doc@(Pandoc meta _) = do let extraCoreProps = ["subject","lang","category","description"] let extraCorePropsMap = M.fromList $ zip extraCoreProps ["dc:subject","dc:language","cp:category","dc:description"] - let lookupMetaString' :: String -> Meta -> String + let lookupMetaString' :: T.Text -> Meta -> T.Text lookupMetaString' key' meta' = case key' of - "description" -> intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') - _ -> lookupMetaString key' meta' + "description" -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') + key'' -> lookupMetaString key'' meta' let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -525,11 +531,11 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:dcterms","http://purl.org/dc/terms/") ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] - $ mknode "dc:title" [] (stringify $ docTitle meta) - : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) - : [ mknode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) + $ mktnode "dc:title" [] (stringify $ docTitle meta) + : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta)) + : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) | k <- M.keys (unMeta meta), k `elem` extraCoreProps] - ++ mknode "cp:keywords" [] (intercalate ", " keywords) + ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -537,7 +543,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- docProps/custom.xml let customProperties :: [(String, String)] - customProperties = [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta) + customProperties = [(T.unpack k, T.unpack $ lookupMetaString k meta) | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords"] ++ extraCoreProps)] let mkCustomProp (k, v) pid = mknode "property" @@ -584,7 +590,7 @@ writeDocx opts doc@(Pandoc meta _) = do let entryFromArchive arch path = maybe (throwError $ PandocSomeError - $ path ++ " missing in reference docx") + $ T.pack $ path ++ " missing in reference docx") return (findEntryByPath path arch `mplus` findEntryByPath path distArchive) docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" @@ -614,25 +620,24 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive - newParaPropToOpenXml :: ParaStyleName -> Element newParaPropToOpenXml (fromStyleName -> s) = - let styleId = filter (not . isSpace) s + let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "paragraph") , ("w:customStyle", "1") - , ("w:styleId", styleId)] - [ mknode "w:name" [("w:val", s)] () + , ("w:styleId", T.unpack styleId)] + [ mknode "w:name" [("w:val", T.unpack s)] () , mknode "w:basedOn" [("w:val","BodyText")] () , mknode "w:qFormat" [] () ] newTextPropToOpenXml :: CharStyleName -> Element newTextPropToOpenXml (fromStyleName -> s) = - let styleId = filter (not . isSpace) s + let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "character") , ("w:customStyle", "1") - , ("w:styleId", styleId)] - [ mknode "w:name" [("w:val", s)] () + , ("w:styleId", T.unpack styleId)] + [ mknode "w:name" [("w:val", T.unpack s)] () , mknode "w:basedOn" [("w:val","BodyTextChar")] () ] @@ -821,8 +826,8 @@ writeOpenXML opts (Pandoc meta blocks) = do abstract <- if null abstract' then return [] else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract' - let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs - convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs + let convertSpace (Str x : Space : Str y : xs) = Str (x <> " " <> y) : xs + convertSpace (Str x : Str y : xs) = Str (x <> y) : xs convertSpace xs = xs let blocks' = bottomUp convertSpace blocks doc' <- setFirstPara >> blocksToOpenXML opts blocks' @@ -831,7 +836,7 @@ writeOpenXML opts (Pandoc meta blocks) = do let toComment (kvs, ils) = do annotation <- inlinesToOpenXML opts ils return $ - mknode "w:comment" [('w':':':k,v) | (k,v) <- kvs] + mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs] [ mknode "w:p" [] $ [ mknode "w:pPr" [] [ mknode "w:pStyle" [("w:val", "CommentText")] () ] @@ -858,13 +863,13 @@ pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element pStyleM styleName = do pStyleMap <- gets (smParaStyle . stStyleMaps) let sty' = getStyleIdFromName styleName pStyleMap - return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () + return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] () rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do cStyleMap <- gets (smCharStyle . stStyleMaps) let sty' = getStyleIdFromName styleName cStyleMap - return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] () + return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] () getUniqueId :: (PandocMonad m) => WS m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -875,7 +880,7 @@ getUniqueId = do return $ show n -- | Key for specifying user-defined docx styles. -dynamicStyleKey :: String +dynamicStyleKey :: T.Text dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. @@ -886,7 +891,7 @@ blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML' _ Null = return [] blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do stylemod <- case lookup dynamicStyleKey kvs of - Just (fromString -> sty) -> do + Just (fromString . T.unpack -> sty) -> do modify $ \s -> s{stDynamicParaProps = Set.insert sty (stDynamicParaProps s)} @@ -904,14 +909,14 @@ blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do else id header <- dirmod $ stylemod $ blocksToOpenXML opts hs contents <- dirmod $ bibmod $ stylemod $ blocksToOpenXML opts bs' - wrapBookmark ident $ header ++ contents + wrapBookmark ident $ header <> contents blockToOpenXML' opts (Header lev (ident,_,_) lst) = do setFirstPara paraProps <- withParaPropM (pStyleM (fromString $ "Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst - if null ident - then return [mknode "w:p" [] (paraProps ++contents)] + if T.null ident + then return [mknode "w:p" [] (paraProps ++ contents)] else do let bookmarkName = ident modify $ \s -> s{ stSectionIds = Set.insert bookmarkName @@ -924,7 +929,7 @@ blockToOpenXML' opts (Plain lst) = do prop <- pStyleM "Compact" if isInTable then withParaProp prop block else block -- title beginning with fig: indicates that the image is a figure -blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do +blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do setFirstPara prop <- pStyleM $ if null alt @@ -1021,7 +1026,7 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do ( mknode "w:tblStyle" [("w:val","Table")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ] () : - [ mknode "w:tblCaption" [("w:val", captionStr)] () + [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths @@ -1122,19 +1127,19 @@ withParaProp d p = withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withParaPropM = (. flip withParaProp) . (>>=) -formattedString :: PandocMonad m => String -> WS m [Element] +formattedString :: PandocMonad m => T.Text -> WS m [Element] formattedString str = -- properly handle soft hyphens - case splitBy (=='\173') str of + case splitTextBy (=='\173') str of [w] -> formattedString' w ws -> do sh <- formattedRun [mknode "w:softHyphen" [] ()] intercalate sh <$> mapM formattedString' ws -formattedString' :: PandocMonad m => String -> WS m [Element] +formattedString' :: PandocMonad m => T.Text -> WS m [Element] formattedString' str = do inDel <- asks envInDel - formattedRun [ mknode (if inDel then "w:delText" else "w:t") + formattedRun [ mktnode (if inDel then "w:delText" else "w:t") [("xml:space","preserve")] (stripInvalidChars str) ] formattedRun :: PandocMonad m => [Element] -> WS m [Element] @@ -1163,21 +1168,21 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do let ident' = fromMaybe ident (lookup "id" kvs) kvs' = filter (("id" /=) . fst) kvs modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } - return [ mknode "w:commentRangeStart" [("w:id", ident')] () ] + return [ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ] inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = -- prefer the "id" in kvs, since that is the one produced by the docx -- reader. let ident' = fromMaybe ident (lookup "id" kvs) in - return [ mknode "w:commentRangeEnd" [("w:id", ident')] () + return [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] () , mknode "w:r" [] [ mknode "w:rPr" [] [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] - , mknode "w:commentReference" [("w:id", ident')] () ] + , mknode "w:commentReference" [("w:id", T.unpack ident')] () ] ] inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do stylemod <- case lookup dynamicStyleKey kvs of - Just (fromString -> sty) -> do + Just (fromString . T.unpack -> sty) -> do modify $ \s -> s{stDynamicTextProps = Set.insert sty (stDynamicTextProps s)} @@ -1208,8 +1213,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do x <- f return [ mknode "w:ins" [("w:id", show insId), - ("w:author", author), - ("w:date", date)] x ] + ("w:author", T.unpack author), + ("w:date", T.unpack date)] x ] else return id delmod <- if "deletion" `elem` classes then do @@ -1220,8 +1225,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do x <- f return [mknode "w:del" [("w:id", show delId), - ("w:author", author), - ("w:date", date)] x] + ("w:author", T.unpack author), + ("w:date", T.unpack date)] x] else return id contents <- insmod $ delmod $ dirmod $ stylemod $ pmod $ inlinesToOpenXML opts ils @@ -1264,7 +1269,7 @@ inlineToOpenXML' opts (Code attrs str) = do let alltoktypes = [KeywordTok ..] tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes let unhighlighted = intercalate [br] `fmap` - mapM formattedString (lines str) + mapM formattedString (T.lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] @@ -1278,7 +1283,7 @@ inlineToOpenXML' opts (Code attrs str) = do formatOpenXML attrs str of Right h -> return h Left msg -> do - unless (null msg) $ report $ CouldNotHighlight msg + unless (T.null msg) $ report $ CouldNotHighlight msg unhighlighted inlineToOpenXML' opts (Note bs) = do notes <- gets stFootnotes @@ -1287,7 +1292,7 @@ inlineToOpenXML' opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs @@ -1303,27 +1308,27 @@ inlineToOpenXML' opts (Note bs) = do [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: -inlineToOpenXML' opts (Link _ txt ('#':xs,_)) = do +inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return - [ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ] + [ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ] -- external link: inlineToOpenXML' opts (Link _ txt (src,_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks - id' <- case M.lookup src extlinks of + id' <- case M.lookup (T.unpack src) extlinks of Just i -> return i Nothing -> do i <- ("rId"++) `fmap` getUniqueId modify $ \st -> st{ stExternalLinks = - M.insert src i extlinks } + M.insert (T.unpack src) i extlinks } return i return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do pageWidth <- asks envPrintWidth imgs <- gets stImages let - stImage = M.lookup src imgs + stImage = M.lookup (T.unpack src) imgs generateImgElt (ident, _, _, img) = let (xpt,ypt) = desiredSizeInPoints opts attr @@ -1336,7 +1341,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do ,("noChangeAspect","1")] () nvPicPr = mknode "pic:nvPicPr" [] [ mknode "pic:cNvPr" - [("descr",src),("id","0"),("name","Picture")] () + [("descr",T.unpack src),("id","0"),("name","Picture")] () , cNvPicPr ] blipFill = mknode "pic:blipFill" [] [ mknode "a:blip" [("r:embed",ident)] () @@ -1371,8 +1376,8 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" - [ ("descr", stringify alt) - , ("title", title) + [ ("descr", T.unpack $ stringify alt) + , ("title", T.unpack title) , ("id","1") , ("name","Picture") ] () @@ -1389,7 +1394,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do let imgext = case mt >>= extensionFromMimeType of - Just x -> '.':x + Just x -> "." <> x Nothing -> case imageType img of Just Png -> ".png" Just Jpeg -> ".jpeg" @@ -1399,21 +1404,21 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just Svg -> ".svg" Just Emf -> ".emf" Nothing -> "" - imgpath = "media/" ++ ident ++ imgext + imgpath = "media/" <> ident <> T.unpack imgext mbMimeType = mt <|> getMimeType imgpath imgData = (ident, imgpath, mbMimeType, img) - if null imgext + if T.null imgext then -- without an extension there is no rule for content type inlinesToOpenXML opts alt -- return alt to avoid corrupted docx else do -- insert mime type to use in constructing [Content_Types].xml - modify $ \st -> st { stImages = M.insert src imgData $ stImages st } + modify $ \st -> st { stImages = M.insert (T.unpack src) imgData $ stImages st } return [generateImgElt imgData] ) `catchError` ( \e -> do - report $ CouldNotFetchResource src (show e) + report $ CouldNotFetchResource src $ T.pack (show e) -- emit alt text inlinesToOpenXML opts alt ) @@ -1460,22 +1465,22 @@ withDirection x = do , envTextProperties = EnvProps textStyle textProps' } -wrapBookmark :: (PandocMonad m) => String -> [Element] -> WS m [Element] -wrapBookmark [] contents = return contents +wrapBookmark :: (PandocMonad m) => T.Text -> [Element] -> WS m [Element] +wrapBookmark "" contents = return contents wrapBookmark ident contents = do id' <- getUniqueId let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') - ,("w:name", toBookmarkName ident)] () + ,("w:name", T.unpack $ toBookmarkName ident)] () bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return $ bookmarkStart : contents ++ [bookmarkEnd] -- Word imposes a 40 character limit on bookmark names and requires -- that they begin with a letter. So we just use a hash of the -- identifier when otherwise we'd have an illegal bookmark name. -toBookmarkName :: String -> String -toBookmarkName s = - case s of - (c:_) | isLetter c - , length s <= 40 -> s - _ -> 'X' : drop 1 (showDigest (sha1 (fromStringLazy s))) +toBookmarkName :: T.Text -> T.Text +toBookmarkName s + | Just (c, _) <- T.uncons s + , isLetter c + , T.length s <= 40 = s + | otherwise = T.pack $ 'X' : drop 1 (showDigest (sha1 (fromTextLazy $ TL.fromStrict s))) diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs index 4f0b0c3f9..18956ee52 100644 --- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs @@ -27,6 +27,7 @@ module Text.Pandoc.Writers.Docx.StyleMap ( StyleMaps(..) import Text.Pandoc.Readers.Docx.Parse.Styles import Codec.Archive.Zip import qualified Data.Map as M +import qualified Data.Text as T import Data.String import Data.Char (isSpace) import Prelude @@ -38,7 +39,7 @@ type CharStyleNameMap = M.Map CharStyleName CharStyle getStyleIdFromName :: (Ord sn, FromStyleName sn, IsString (StyleId sty), HasStyleId sty) => sn -> M.Map sn sty -> StyleId sty getStyleIdFromName s = maybe (fallback s) getStyleId . M.lookup s - where fallback = fromString . filter (not . isSpace) . fromStyleName + where fallback = fromString . T.unpack . T.filter (not . isSpace) . fromStyleName hasStyleName :: (Ord sn, HasStyleId sty) => sn -> M.Map sn sty -> Bool diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 8111da9ba..541939f3b 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.DokuWiki Copyright : Copyright (C) 2008-2019 John MacFarlane @@ -27,15 +28,16 @@ import Control.Monad (zipWithM) import Control.Monad.Reader (ReaderT, asks, local, runReaderT) import Control.Monad.State.Strict (StateT, evalStateT) import Data.Default (Default (..)) -import Data.List (intercalate, intersect, isPrefixOf, transpose) -import Data.Text (Text, pack) +import Data.List (intersect, transpose) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara, - removeFormatting, substitute, trimr) + removeFormatting, trimr, tshow) import Text.Pandoc.Templates (renderTemplate) import Text.DocLayout (render, literal) import Text.Pandoc.Writers.Shared (defField, metaToContext) @@ -44,7 +46,7 @@ data WriterState = WriterState { } data WriterEnvironment = WriterEnvironment { - stIndent :: String -- Indent after the marker at the beginning of list items + stIndent :: Text -- Indent after the marker at the beginning of list items , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list , stBackSlashLB :: Bool -- True if we should produce formatted strings with newlines (as in a table cell) } @@ -72,57 +74,58 @@ pandocToDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> DokuWiki m Text pandocToDokuWiki opts (Pandoc meta blocks) = do metadata <- metaToContext opts - (fmap (literal . pack . trimr) . blockListToDokuWiki opts) - (fmap (literal . pack . trimr) . inlineListToDokuWiki opts) + (fmap (literal . trimr) . blockListToDokuWiki opts) + (fmap (literal . trimr) . inlineListToDokuWiki opts) meta body <- blockListToDokuWiki opts blocks - let main = pack body - let context = defField "body" main + let context = defField "body" body $ defField "toc" (writerTableOfContents opts) metadata return $ case writerTemplate opts of - Nothing -> main + Nothing -> body Just tpl -> render Nothing $ renderTemplate tpl context -- | Escape special characters for DokuWiki. -escapeString :: String -> String -escapeString = substitute "__" "%%__%%" . - substitute "**" "%%**%%" . - substitute "//" "%%//%%" +escapeString :: Text -> Text +escapeString = T.replace "__" "%%__%%" . + T.replace "**" "%%**%%" . + T.replace "//" "%%//%%" -- | Convert Pandoc block element to DokuWiki. blockToDokuWiki :: PandocMonad m => WriterOptions -- ^ Options -> Block -- ^ Block element - -> DokuWiki m String + -> DokuWiki m Text blockToDokuWiki _ Null = return "" blockToDokuWiki opts (Div _attrs bs) = do contents <- blockListToDokuWiki opts bs - return $ contents ++ "\n" + return $ contents <> "\n" blockToDokuWiki opts (Plain inlines) = inlineListToDokuWiki opts inlines -- title beginning with fig: indicates that the image is a figure -- dokuwiki doesn't support captions - so combine together alt and caption into alt -blockToDokuWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return "" - else (" " ++) `fmap` inlineListToDokuWiki opts txt - let opt = if null txt - then "" - else "|" ++ if null tit then capt else tit ++ capt - return $ "{{" ++ src ++ imageDims opts attr ++ opt ++ "}}\n" +blockToDokuWiki opts (Para [Image attr txt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = do + capt <- if null txt + then return "" + else (" " <>) `fmap` inlineListToDokuWiki opts txt + let opt = if null txt + then "" + else "|" <> if T.null tit then capt else tit <> capt + return $ "{{" <> src <> imageDims opts attr <> opt <> "}}\n" blockToDokuWiki opts (Para inlines) = do indent <- asks stIndent useTags <- asks stUseTags contents <- inlineListToDokuWiki opts inlines return $ if useTags - then "<HTML><p></HTML>" ++ contents ++ "<HTML></p></HTML>" - else contents ++ if null indent then "\n" else "" + then "<HTML><p></HTML>" <> contents <> "<HTML></p></HTML>" + else contents <> if T.null indent then "\n" else "" blockToDokuWiki opts (LineBlock lns) = blockToDokuWiki opts $ linesToPara lns @@ -131,7 +134,7 @@ blockToDokuWiki _ b@(RawBlock f str) | f == Format "dokuwiki" = return str -- See https://www.dokuwiki.org/wiki:syntax -- use uppercase HTML tag for block-level content: - | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>" + | f == Format "html" = return $ "<HTML>\n" <> str <> "\n</HTML>" | otherwise = "" <$ report (BlockNotRendered b) @@ -141,8 +144,8 @@ blockToDokuWiki opts (Header level _ inlines) = do -- emphasis, links etc. not allowed in headers, apparently, -- so we remove formatting: contents <- inlineListToDokuWiki opts $ removeFormatting inlines - let eqs = replicate ( 7 - level ) '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + let eqs = T.replicate ( 7 - level ) "=" + return $ eqs <> " " <> contents <> " " <> eqs <> "\n" blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", @@ -154,43 +157,43 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", "visualfoxpro", "winbatch", "xml", "xpp", "z80"] - return $ "<code" ++ + return $ "<code" <> (case at of [] -> ">\n" - (x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>" + (x:_) -> " " <> x <> ">\n") <> str <> "\n</code>" blockToDokuWiki opts (BlockQuote blocks) = do contents <- blockListToDokuWiki opts blocks if isSimpleBlockQuote blocks - then return $ unlines $ map ("> " ++) $ lines contents - else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>" + then return $ T.unlines $ map ("> " <>) $ T.lines contents + else return $ "<HTML><blockquote>\n" <> contents <> "</blockquote></HTML>" blockToDokuWiki opts (Table capt aligns _ headers rows) = do captionDoc <- if null capt then return "" else do c <- inlineListToDokuWiki opts capt - return $ "" ++ c ++ "\n" + return $ "" <> c <> "\n" headers' <- if all null headers then return [] else zipWithM (tableItemToDokuWiki opts) aligns headers rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows - let widths = map (maximum . map length) $ transpose (headers':rows') + let widths = map (maximum . map T.length) $ transpose (headers':rows') let padTo (width, al) s = - case width - length s of + case width - T.length s of x | x > 0 -> if al == AlignLeft || al == AlignDefault - then s ++ replicate x ' ' + then s <> T.replicate x " " else if al == AlignRight - then replicate x ' ' ++ s - else replicate (x `div` 2) ' ' ++ - s ++ replicate (x - x `div` 2) ' ' + then T.replicate x " " <> s + else T.replicate (x `div` 2) " " <> + s <> T.replicate (x - x `div` 2) " " | otherwise -> s - let renderRow sep cells = sep ++ - intercalate sep (zipWith padTo (zip widths aligns) cells) ++ sep - return $ captionDoc ++ - (if null headers' then "" else renderRow "^" headers' ++ "\n") ++ - unlines (map (renderRow "|") rows') + let renderRow sep cells = sep <> + T.intercalate sep (zipWith padTo (zip widths aligns) cells) <> sep + return $ captionDoc <> + (if null headers' then "" else renderRow "^" headers' <> "\n") <> + T.unlines (map (renderRow "|") rows') blockToDokuWiki opts x@(BulletList items) = do oldUseTags <- asks stUseTags @@ -201,12 +204,12 @@ blockToDokuWiki opts x@(BulletList items) = do then do contents <- local (\s -> s { stUseTags = True }) (mapM (listItemToDokuWiki opts) items) - return $ "<HTML><ul></HTML>\n" ++ vcat contents ++ "<HTML></ul></HTML>\n" + return $ "<HTML><ul></HTML>\n" <> vcat contents <> "<HTML></ul></HTML>\n" else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " + contents <- local (\s -> s { stIndent = stIndent s <> " " , stBackSlashLB = backSlash}) (mapM (listItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" blockToDokuWiki opts x@(OrderedList attribs items) = do oldUseTags <- asks stUseTags @@ -217,12 +220,12 @@ blockToDokuWiki opts x@(OrderedList attribs items) = do then do contents <- local (\s -> s { stUseTags = True }) (mapM (orderedListItemToDokuWiki opts) items) - return $ "<HTML><ol" ++ listAttribsToString attribs ++ "></HTML>\n" ++ vcat contents ++ "<HTML></ol></HTML>\n" + return $ "<HTML><ol" <> listAttribsToString attribs <> "></HTML>\n" <> vcat contents <> "<HTML></ol></HTML>\n" else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " + contents <- local (\s -> s { stIndent = stIndent s <> " " , stBackSlashLB = backSlash}) (mapM (orderedListItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" -- TODO Need to decide how to make definition lists work on dokuwiki - I don't think there -- is a specific representation of them. @@ -236,76 +239,76 @@ blockToDokuWiki opts x@(DefinitionList items) = do then do contents <- local (\s -> s { stUseTags = True }) (mapM (definitionListItemToDokuWiki opts) items) - return $ "<HTML><dl></HTML>\n" ++ vcat contents ++ "<HTML></dl></HTML>\n" + return $ "<HTML><dl></HTML>\n" <> vcat contents <> "<HTML></dl></HTML>\n" else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " + contents <- local (\s -> s { stIndent = stIndent s <> " " , stBackSlashLB = backSlash}) (mapM (definitionListItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" -- Auxiliary functions for lists: -- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String +listAttribsToString :: ListAttributes -> Text listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle + let numstyle' = camelCaseToHyphenated $ tshow numstyle in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ + then " start=\"" <> tshow startnum <> "\"" + else "") <> (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + then " style=\"list-style-type: " <> numstyle' <> ";\"" else "") -- | Convert bullet list item (list of blocks) to DokuWiki. listItemToDokuWiki :: PandocMonad m - => WriterOptions -> [Block] -> DokuWiki m String + => WriterOptions -> [Block] -> DokuWiki m Text listItemToDokuWiki opts items = do useTags <- asks stUseTags if useTags then do contents <- blockListToDokuWiki opts items - return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" + return $ "<HTML><li></HTML>" <> contents <> "<HTML></li></HTML>" else do bs <- mapM (blockToDokuWiki opts) items let contents = case items of - [_, CodeBlock _ _] -> concat bs + [_, CodeBlock _ _] -> T.concat bs _ -> vcat bs indent <- asks stIndent backSlash <- asks stBackSlashLB - let indent' = if backSlash then drop 2 indent else indent - return $ indent' ++ "* " ++ contents + let indent' = if backSlash then T.drop 2 indent else indent + return $ indent' <> "* " <> contents -- | Convert ordered list item (list of blocks) to DokuWiki. -- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki -orderedListItemToDokuWiki :: PandocMonad m => WriterOptions -> [Block] -> DokuWiki m String +orderedListItemToDokuWiki :: PandocMonad m => WriterOptions -> [Block] -> DokuWiki m Text orderedListItemToDokuWiki opts items = do contents <- blockListToDokuWiki opts items useTags <- asks stUseTags if useTags - then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" + then return $ "<HTML><li></HTML>" <> contents <> "<HTML></li></HTML>" else do indent <- asks stIndent backSlash <- asks stBackSlashLB - let indent' = if backSlash then drop 2 indent else indent - return $ indent' ++ "- " ++ contents + let indent' = if backSlash then T.drop 2 indent else indent + return $ indent' <> "- " <> contents -- | Convert definition list item (label, list of blocks) to DokuWiki. definitionListItemToDokuWiki :: PandocMonad m => WriterOptions -> ([Inline],[[Block]]) - -> DokuWiki m String + -> DokuWiki m Text definitionListItemToDokuWiki opts (label, items) = do labelText <- inlineListToDokuWiki opts label contents <- mapM (blockListToDokuWiki opts) items useTags <- asks stUseTags if useTags - then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++ - intercalate "\n" (map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents) + then return $ "<HTML><dt></HTML>" <> labelText <> "<HTML></dt></HTML>\n" <> + T.intercalate "\n" (map (\d -> "<HTML><dd></HTML>" <> d <> "<HTML></dd></HTML>") contents) else do indent <- asks stIndent backSlash <- asks stBackSlashLB - let indent' = if backSlash then drop 2 indent else indent - return $ indent' ++ "* **" ++ labelText ++ "** " ++ concat contents + let indent' = if backSlash then T.drop 2 indent else indent + return $ indent' <> "* **" <> labelText <> "** " <> T.concat contents -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -333,17 +336,17 @@ isSimpleBlockQuote :: [Block] -> Bool isSimpleBlockQuote bs = all isPlainOrPara bs -- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" +vcat :: [Text] -> Text +vcat = T.intercalate "\n" -- | For each string in the input list, convert all newlines to -- dokuwiki escaped newlines. Then concat the list using double linebreaks. -backSlashLineBreaks :: [String] -> String -backSlashLineBreaks ls = vcatBackSlash $ map escape ls +backSlashLineBreaks :: [Text] -> Text +backSlashLineBreaks ls = vcatBackSlash $ map (T.pack . escape . T.unpack) ls where - vcatBackSlash = intercalate "\\\\ \\\\ " -- simulate paragraphs. - escape ['\n'] = "" -- remove trailing newlines - escape ('\n':cs) = "\\\\ " ++ escape cs + vcatBackSlash = T.intercalate "\\\\ \\\\ " -- simulate paragraphs. + escape ['\n'] = "" -- remove trailing newlines + escape ('\n':cs) = "\\\\ " <> escape cs escape (c:cs) = c : escape cs escape [] = [] @@ -353,11 +356,11 @@ tableItemToDokuWiki :: PandocMonad m => WriterOptions -> Alignment -> [Block] - -> DokuWiki m String + -> DokuWiki m Text tableItemToDokuWiki opts align' item = do let mkcell x = (if align' == AlignRight || align' == AlignCenter then " " - else "") ++ x ++ + else "") <> x <> (if align' == AlignLeft || align' == AlignCenter then " " else "") @@ -369,7 +372,7 @@ tableItemToDokuWiki opts align' item = do blockListToDokuWiki :: PandocMonad m => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> DokuWiki m String + -> DokuWiki m Text blockListToDokuWiki opts blocks = do backSlash <- asks stBackSlashLB let blocks' = consolidateRawBlocks blocks @@ -380,51 +383,51 @@ blockListToDokuWiki opts blocks = do consolidateRawBlocks :: [Block] -> [Block] consolidateRawBlocks [] = [] consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs) - | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs) + | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 <> "\n" <> b2) : xs) consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs -- | Convert list of Pandoc inline elements to DokuWiki. inlineListToDokuWiki :: PandocMonad m - => WriterOptions -> [Inline] -> DokuWiki m String + => WriterOptions -> [Inline] -> DokuWiki m Text inlineListToDokuWiki opts lst = - concat <$> mapM (inlineToDokuWiki opts) lst + T.concat <$> mapM (inlineToDokuWiki opts) lst -- | Convert Pandoc inline element to DokuWiki. inlineToDokuWiki :: PandocMonad m - => WriterOptions -> Inline -> DokuWiki m String + => WriterOptions -> Inline -> DokuWiki m Text inlineToDokuWiki opts (Span _attrs ils) = inlineListToDokuWiki opts ils inlineToDokuWiki opts (Emph lst) = do contents <- inlineListToDokuWiki opts lst - return $ "//" ++ contents ++ "//" + return $ "//" <> contents <> "//" inlineToDokuWiki opts (Strong lst) = do contents <- inlineListToDokuWiki opts lst - return $ "**" ++ contents ++ "**" + return $ "**" <> contents <> "**" inlineToDokuWiki opts (Strikeout lst) = do contents <- inlineListToDokuWiki opts lst - return $ "<del>" ++ contents ++ "</del>" + return $ "<del>" <> contents <> "</del>" inlineToDokuWiki opts (Superscript lst) = do contents <- inlineListToDokuWiki opts lst - return $ "<sup>" ++ contents ++ "</sup>" + return $ "<sup>" <> contents <> "</sup>" inlineToDokuWiki opts (Subscript lst) = do contents <- inlineListToDokuWiki opts lst - return $ "<sub>" ++ contents ++ "</sub>" + return $ "<sub>" <> contents <> "</sub>" inlineToDokuWiki opts (SmallCaps lst) = inlineListToDokuWiki opts lst inlineToDokuWiki opts (Quoted SingleQuote lst) = do contents <- inlineListToDokuWiki opts lst - return $ "\8216" ++ contents ++ "\8217" + return $ "\8216" <> contents <> "\8217" inlineToDokuWiki opts (Quoted DoubleQuote lst) = do contents <- inlineListToDokuWiki opts lst - return $ "\8220" ++ contents ++ "\8221" + return $ "\8220" <> contents <> "\8221" inlineToDokuWiki opts (Cite _ lst) = inlineListToDokuWiki opts lst @@ -438,11 +441,11 @@ inlineToDokuWiki _ (Code _ str) = -- characters. -- It does mean that if pandoc could ever read dokuwiki, and so round-trip the format, -- any formatting inside inlined code blocks would be lost, or presented incorrectly. - return $ "''%%" ++ str ++ "%%''" + return $ "''%%" <> str <> "%%''" inlineToDokuWiki _ (Str str) = return $ escapeString str -inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim +inlineToDokuWiki _ (Math mathType str) = return $ delim <> str <> delim -- note: str should NOT be escaped where delim = case mathType of DisplayMath -> "$$" @@ -450,7 +453,7 @@ inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim inlineToDokuWiki _ il@(RawInline f str) | f == Format "dokuwiki" = return str - | f == Format "html" = return $ "<html>" ++ str ++ "</html>" + | f == Format "html" = return $ "<html>" <> str <> "</html>" | otherwise = "" <$ report (InlineNotRendered il) inlineToDokuWiki _ LineBreak = do @@ -470,34 +473,34 @@ inlineToDokuWiki _ Space = return " " inlineToDokuWiki opts (Link _ txt (src, _)) = do label <- inlineListToDokuWiki opts txt case txt of - [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" + [Str s] | "mailto:" `T.isPrefixOf` src -> return $ "<" <> s <> ">" | escapeURI s == src -> return src _ -> if isURI src - then return $ "[[" ++ src ++ "|" ++ label ++ "]]" - else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page + then return $ "[[" <> src <> "|" <> label <> "]]" + else return $ "[[" <> src' <> "|" <> label <> "]]" + where src' = case T.uncons src of + Just ('/',xs) -> xs -- with leading / it's a + _ -> src -- link to a help page inlineToDokuWiki opts (Image attr alt (source, tit)) = do alt' <- inlineListToDokuWiki opts alt let txt = case (tit, alt) of ("", []) -> "" - ("", _ ) -> "|" ++ alt' - (_ , _ ) -> "|" ++ tit - return $ "{{" ++ source ++ imageDims opts attr ++ txt ++ "}}" + ("", _ ) -> "|" <> alt' + (_ , _ ) -> "|" <> tit + return $ "{{" <> source <> imageDims opts attr <> txt <> "}}" inlineToDokuWiki opts (Note contents) = do contents' <- blockListToDokuWiki opts contents - return $ "((" ++ contents' ++ "))" + return $ "((" <> contents' <> "))" -- note - may not work for notes with multiple blocks -imageDims :: WriterOptions -> Attr -> String +imageDims :: WriterOptions -> Attr -> Text imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) where toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing checkPct maybeDim = maybeDim - go (Just w) Nothing = "?" ++ w - go (Just w) (Just h) = "?" ++ w ++ "x" ++ h - go Nothing (Just h) = "?0x" ++ h + go (Just w) Nothing = "?" <> w + go (Just w) (Just h) = "?" <> w <> "x" <> h + go Nothing (Just h) = "?0x" <> h go Nothing Nothing = "" diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 37c78bba8..4a1c27ce6 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -49,7 +49,7 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), ObfuscationMethod (NoObfuscation), WrapOption (..), WriterOptions (..)) import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags', - safeRead, stringify, trim, uniqueIdent) + safeRead, stringify, trim, uniqueIdent, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.UUID (getUUID) import Text.Pandoc.Walk (query, walk, walkM) @@ -176,10 +176,10 @@ getEPUBMetadata opts meta = do let localeLang = case mLang of Just lang -> - map (\c -> if c == '_' then '-' else c) $ - takeWhile (/='.') lang + TS.map (\c -> if c == '_' then '-' else c) $ + TS.takeWhile (/='.') lang Nothing -> "en-US" - return m{ epubLanguage = localeLang } + return m{ epubLanguage = TS.unpack localeLang } else return m let fixDate m = if null (epubDate m) @@ -194,7 +194,7 @@ getEPUBMetadata opts meta = do then return m else do let authors' = map stringify $ docAuthors meta - let toAuthor name = Creator{ creatorText = name + let toAuthor name = Creator{ creatorText = TS.unpack name , creatorRole = Just "aut" , creatorFileAs = Nothing } return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } @@ -253,18 +253,18 @@ addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md = addMetadataFromXML _ md = md metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = s -metaValueToString (MetaInlines ils) = stringify ils -metaValueToString (MetaBlocks bs) = stringify bs +metaValueToString (MetaString s) = TS.unpack s +metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils +metaValueToString (MetaBlocks bs) = TS.unpack $ stringify bs metaValueToString (MetaBool True) = "true" metaValueToString (MetaBool False) = "false" metaValueToString _ = "" -metaValueToPaths:: MetaValue -> [FilePath] +metaValueToPaths :: MetaValue -> [FilePath] metaValueToPaths (MetaList xs) = map metaValueToString xs metaValueToPaths x = [metaValueToString x] -getList :: String -> Meta -> (MetaValue -> a) -> [a] +getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = case lookupMeta s meta of Just (MetaList xs) -> map handleMetaValue xs @@ -288,7 +288,7 @@ getTitle meta = getList "title" meta handleMetaValue , titleType = metaValueToString <$> M.lookup "type" m } handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing -getCreator :: String -> Meta -> [Creator] +getCreator :: TS.Text -> Meta -> [Creator] getCreator s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m @@ -296,7 +296,7 @@ getCreator s meta = getList s meta handleMetaValue , creatorRole = metaValueToString <$> M.lookup "role" m } handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing -getDate :: String -> Meta -> [Date] +getDate :: TS.Text -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Date{ dateText = fromMaybe "" $ @@ -305,7 +305,7 @@ getDate s meta = getList s meta handleMetaValue handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } -simpleList :: String -> Meta -> [String] +simpleList :: TS.Text -> Meta -> [String] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs @@ -366,11 +366,11 @@ metadataFromMeta opts meta = EPUBMetadata{ _ -> Nothing ibooksFields = case lookupMeta "ibooks" meta of Just (MetaMap mp) - -> M.toList $ M.map metaValueToString mp + -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp _ -> [] calibreFields = case lookupMeta "calibre" meta of Just (MetaMap mp) - -> M.toList $ M.map metaValueToString mp + -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp _ -> [] -- | Produce an EPUB2 file from a Pandoc document. @@ -396,9 +396,9 @@ writeEPUB :: PandocMonad m writeEPUB epubVersion opts doc = do let epubSubdir = writerEpubSubdirectory opts -- sanity check on epubSubdir - unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ throwError $ PandocEpubSubdirectoryError epubSubdir - let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = epubSubdir } + let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir } evalStateT (pandocToEPUB epubVersion opts doc) initState pandocToEPUB :: PandocMonad m @@ -422,7 +422,7 @@ pandocToEPUB version opts doc = do [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x - x -> stringify x + x -> TS.unpack $ stringify x -- stylesheet stylesheets <- case epubStylesheets metadata of @@ -468,13 +468,13 @@ pandocToEPUB version opts doc = do case imageSize opts' (B.toStrict imgContent) of Right sz -> return $ sizeInPixels sz Left err' -> (0, 0) <$ report - (CouldNotDetermineImageSize img err') + (CouldNotDetermineImageSize (TS.pack img) err') cpContent <- lift $ writeHtml opts'{ writerVariables = Context (M.fromList [ ("coverpage", toVal' "true"), - ("pagetitle", toVal' $ - escapeStringForXML plainTitle), + ("pagetitle", toVal $ + escapeStringForXML $ TS.pack plainTitle), ("cover-image", toVal' coverImage), ("cover-image-width", toVal' $ show coverImageWidth), @@ -494,8 +494,8 @@ pandocToEPUB version opts doc = do Context (M.fromList [ ("titlepage", toVal' "true"), ("body-type", toVal' "frontmatter"), - ("pagetitle", toVal' $ - escapeStringForXML plainTitle)]) + ("pagetitle", toVal $ + escapeStringForXML $ TS.pack plainTitle)]) <> cssvars True <> vars } (Pandoc meta []) tpEntry <- mkEntry "text/title_page.xhtml" tpContent @@ -504,7 +504,7 @@ pandocToEPUB version opts doc = do let matchingGlob f = do xs <- lift $ P.glob f when (null xs) $ - report $ CouldNotFetchResource f "glob did not match any font files" + report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files" return xs let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< lift (P.readFileLazy f) @@ -551,16 +551,16 @@ pandocToEPUB version opts doc = do let chapters' = secsToChapters secs - let extractLinkURL' :: Int -> Inline -> [(String, String)] + let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)] extractLinkURL' num (Span (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL' _ _ = [] - let extractLinkURL :: Int -> Block -> [(String, String)] + let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] extractLinkURL num (Div (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL num (Header _ (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL num b = query (extractLinkURL' num) b let reftable = concat $ zipWith (\(Chapter bs) num -> @@ -568,10 +568,10 @@ pandocToEPUB version opts doc = do chapters' [1..] let fixInternalReferences :: Inline -> Inline - fixInternalReferences (Link attr lab ('#':xs, tit)) = - case lookup xs reftable of + fixInternalReferences (Link attr lab (src, tit)) + | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of Just ys -> Link attr lab (ys, tit) - Nothing -> Link attr lab ('#':xs, tit) + Nothing -> Link attr lab (src, tit) fixInternalReferences x = x -- internal reference IDs change when we chunk the file, @@ -645,14 +645,14 @@ pandocToEPUB version opts doc = do ("href", makeRelative epubSubdir $ eRelativePath ent), ("media-type", - fromMaybe "application/octet-stream" + maybe "application/octet-stream" TS.unpack $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), ("href", makeRelative epubSubdir $ eRelativePath ent), - ("media-type", fromMaybe "" $ + ("media-type", maybe "" TS.unpack $ getMimeType $ eRelativePath ent)] $ () let tocTitle = fromMaybe plainTitle $ @@ -724,7 +724,7 @@ pandocToEPUB version opts doc = do let tocLevel = writerTOCDepth opts let navPointNode :: PandocMonad m - => (Int -> [Inline] -> String -> [Element] -> Element) + => (Int -> [Inline] -> TS.Text -> [Element] -> Element) -> Block -> StateT Int m [Element] navPointNode formatter (Div (ident,_,_) (Header lvl (_,_,kvs) ils : children)) = do @@ -734,29 +734,29 @@ pandocToEPUB version opts doc = do n <- get modify (+1) let num = fromMaybe "" $ lookup "number" kvs - let tit = if writerNumberSections opts && not (null num) + let tit = if writerNumberSections opts && not (TS.null num) then Span ("", ["section-header-number"], []) [Str num] : Space : ils else ils src <- case lookup ident reftable of Just x -> return x Nothing -> throwError $ PandocSomeError $ - ident ++ " not found in reftable" + ident <> " not found in reftable" subs <- concat <$> mapM (navPointNode formatter) children return [formatter n tit src subs] navPointNode formatter (Div _ bs) = concat <$> mapM (navPointNode formatter) bs navPointNode _ _ = return [] - let navMapFormatter :: Int -> [Inline] -> String -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" $ stringify tit - , unode "content" ! [("src", "text/" ++ src)] $ () + [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit + , unode "content" ! [("src", "text/" <> TS.unpack src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) + [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta) , unode "content" ! [("src", "text/title_page.xhtml")] $ () ] @@ -784,11 +784,11 @@ pandocToEPUB version opts doc = do ] tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> [Inline] -> String -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ (unode "a" ! - [("href", "text/" ++ src)] + [("href", "text/" <> TS.unpack src)] $ titElements) : case subs of [] -> [] @@ -799,12 +799,12 @@ pandocToEPUB version opts doc = do opts{ writerTemplate = Nothing , writerVariables = Context (M.fromList - [("pagetitle", toVal' $ - escapeStringForXML plainTitle)]) + [("pagetitle", toVal $ + escapeStringForXML $ TS.pack plainTitle)]) <> writerVariables opts} (Pandoc nullMeta [Plain $ walk clean tit])) of - Left _ -> TS.pack $ stringify tit + Left _ -> stringify tit Right x -> x -- can't have <a> elements inside generated links... clean (Link _ ils _) = Span ("", [], []) ils @@ -815,7 +815,7 @@ pandocToEPUB version opts doc = do tocBlocks <- lift $ evalStateT (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1 let navBlocks = [RawBlock (Format "html") - $ showElement $ -- prettyprinting introduces bad spaces + $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle @@ -836,7 +836,7 @@ pandocToEPUB version opts doc = do else [] let landmarks = if null landmarkItems then [] - else [RawBlock (Format "html") $ ppElement $ + else [RawBlock (Format "html") $ TS.pack $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("id","landmarks") ,("hidden","hidden")] $ @@ -995,49 +995,49 @@ showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" transformTag :: PandocMonad m - => Tag String - -> E m (Tag String) + => Tag TS.Text + -> E m (Tag TS.Text) transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef src - newposter <- modifyMediaRef poster + newsrc <- modifyMediaRef $ TS.unpack src + newposter <- modifyMediaRef $ TS.unpack poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", "../" ++ newsrc) | not (null newsrc)] ++ - [("poster", "../" ++ newposter) | not (null newposter)] + [("src", "../" <> newsrc) | not (TS.null newsrc)] ++ + [("poster", "../" <> newposter) | not (TS.null newposter)] return $ TagOpen name attr' transformTag tag = return tag modifyMediaRef :: PandocMonad m => FilePath - -> E m FilePath + -> E m TS.Text modifyMediaRef "" = return "" modifyMediaRef oldsrc = do media <- gets stMediaPaths case lookup oldsrc media of - Just (n,_) -> return n + Just (n,_) -> return $ TS.pack n Nothing -> catchError - (do (img, mbMime) <- P.fetchItem oldsrc - let ext = fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) + (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc + let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack + (("." <>) <$> (mbMime >>= extensionFromMimeType)) newName <- getMediaNextNewName ext let newPath = "media/" ++ newName entry <- mkEntry newPath (B.fromChunks . (:[]) $ img) modify $ \st -> st{ stMediaPaths = (oldsrc, (newPath, Just entry)):media} - return newPath) + return $ TS.pack newPath) (\e -> do - report $ CouldNotFetchResource oldsrc (show e) - return oldsrc) + report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e) + return $ TS.pack oldsrc) getMediaNextNewName :: PandocMonad m => String -> E m String getMediaNextNewName ext = do nextId <- gets stMediaNextId modify $ \st -> st { stMediaNextId = nextId + 1 } let nextName = "file" ++ show nextId ++ ext - (P.fetchItem nextName >> getMediaNextNewName ext) `catchError` const (return nextName) + (P.fetchItem (TS.pack nextName) >> getMediaNextNewName ext) `catchError` const (return nextName) transformBlock :: PandocMonad m => Block @@ -1054,14 +1054,14 @@ transformInline :: PandocMonad m -> Inline -> E m Inline transformInline _opts (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef src - return $ Image attr lab ("../" ++ newsrc, tit) + newsrc <- modifyMediaRef $ TS.unpack src + return $ Image attr lab ("../" <> newsrc, tit) transformInline opts (x@(Math t m)) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef (url ++ urlEncode m) + newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) - [Image nullAttr [x] ("../" ++ newsrc, "")] + [Image nullAttr [x] ("../" <> newsrc, "")] transformInline _opts (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw @@ -1081,7 +1081,7 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . unEntity ('&':'#':xs) = let (ds,ys) = break (==';') xs rest = drop 1 ys - in case safeRead ('\'':'\\':ds ++ "'") of + in case safeRead (TS.pack $ "'\\" <> ds <> "'") of Just x -> x : unEntity rest Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs @@ -1090,7 +1090,7 @@ mediaTypeOf :: FilePath -> Maybe MimeType mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of - Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y + Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y _ -> Nothing -- Returns filename for chapter number. @@ -1102,7 +1102,7 @@ addIdentifiers :: WriterOptions -> [Block] -> [Block] addIdentifiers opts bs = evalState (mapM go bs) Set.empty where go (Header n (ident,classes,kvs) ils) = do ids <- get - let ident' = if null ident + let ident' = if TS.null ident then uniqueIdent (writerExtensions opts) ils ids else ident modify $ Set.insert ident' @@ -1111,13 +1111,16 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM normalizeDate' :: String -> Maybe String -normalizeDate' xs = - let xs' = trim xs in - case xs' of - [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY - [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM - -> Just xs' - _ -> normalizeDate xs' +normalizeDate' = fmap TS.unpack . go . trim . TS.pack + where + go xs + | TS.length xs == 4 -- YYY + , TS.all isDigit xs = Just xs + | (y, s) <- TS.splitAt 4 xs -- YYY-MM + , Just ('-', m) <- TS.uncons s + , TS.length m == 2 + , TS.all isDigit y && TS.all isDigit m = Just xs + | otherwise = normalizeDate xs toRelator :: String -> Maybe String toRelator x diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 744eb2a06..8cb29c269 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.FB2 Copyright : Copyright (C) 2011-2012 Sergey Astanin @@ -23,11 +24,12 @@ import Control.Monad (zipWithM) import Control.Monad.Except (catchError) import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify) import Data.ByteString.Base64 (encode) -import qualified Data.ByteString.Char8 as B8 -import Data.Char (isAscii, isControl, isSpace, toLower) +import Data.Char (isAscii, isControl, isSpace) import Data.Either (lefts, rights) -import Data.List (intercalate, isPrefixOf, stripPrefix) +import Data.List (intercalate) import Data.Text (Text, pack) +import qualified Data.Text as T +import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) import Text.XML.Light import qualified Text.XML.Light as X @@ -40,15 +42,15 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, - makeSections) + makeSections, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. data FbRenderState = FbRenderState - { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text - , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path - , parentListMarker :: String -- ^ list marker of the parent ordered list + { footnotes :: [ (Int, Text, [Content]) ] -- ^ #, ID, text + , imagesToFetch :: [ (Text, Text) ] -- ^ filename, URL or path + , parentListMarker :: Text -- ^ list marker of the parent ordered list , writerOptions :: WriterOptions } deriving (Show) @@ -98,8 +100,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do description :: PandocMonad m => Meta -> FBM m Content description meta' = do let genre = case lookupMetaString "genre" meta' of - "" -> el "genre" "unrecognised" - s -> el "genre" s + "" -> el "genre" ("unrecognised" :: String) + s -> el "genre" (T.unpack s) bt <- booktitle meta' let as = authors meta' dd <- docdate meta' @@ -110,7 +112,7 @@ description meta' = do Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] Just (MetaString s) -> [el "lang" $ iso639 s] _ -> [] - where iso639 = takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 let coverimage url = do let img = Image nullAttr mempty (url, "") im <- insertImage InlineImage img @@ -122,7 +124,7 @@ description meta' = do return $ el "description" [ el "title-info" (genre : (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang)) - , el "document-info" [el "program-used" "pandoc"] + , el "document-info" [el "program-used" ("pandoc" :: String)] ] booktitle :: PandocMonad m => Meta -> FBM m [Content] @@ -178,7 +180,7 @@ renderSection lvl (Div (id',"section":_,_) (Header _ _ title : xs)) = do then return [] else list . el "title" <$> formatTitle title content <- cMapM (renderSection (lvl + 1)) xs - let sectionContent = if null id' + let sectionContent = if T.null id' then el "section" (title' ++ content) else el "section" ([uattr "id" id'], title' ++ content) return [sectionContent] @@ -213,19 +215,19 @@ renderFootnotes = do -- | Fetch images and encode them for the FictionBook XML. -- Return image data and a list of hrefs of the missing images. -fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String]) +fetchImages :: PandocMonad m => [(Text,Text)] -> m ([Content],[Text]) fetchImages links = do imgs <- mapM (uncurry fetchImage) links return (rights imgs, lefts imgs) -- | Fetch image data from disk or from network and make a <binary> XML section. -- Return either (Left hrefOfMissingImage) or (Right xmlContent). -fetchImage :: PandocMonad m => String -> String -> m (Either String Content) +fetchImage :: PandocMonad m => Text -> Text -> m (Either Text Content) fetchImage href link = do mbimg <- case (isURI link, readDataURI link) of (True, Just (mime,_,True,base64)) -> - let mime' = map toLower mime + let mime' = T.toLower mime in if mime' == "image/png" || mime' == "image/jpeg" then return (Just (mime',base64)) else return Nothing @@ -237,9 +239,9 @@ fetchImage href link = do report $ CouldNotDetermineMimeType link return Nothing Just mime -> return $ Just (mime, - B8.unpack $ encode bs)) + TE.decodeUtf8 $ encode bs)) (\e -> - do report $ CouldNotFetchResource link (show e) + do report $ CouldNotFetchResource link (tshow e) return Nothing) case mbimg of Just (imgtype, imgdata) -> @@ -247,52 +249,52 @@ fetchImage href link = do ( [uattr "id" href , uattr "content-type" imgtype] , txt imgdata ) - _ -> return (Left ('#':href)) + _ -> return (Left ("#" <> href)) -- | Extract mime type and encoded data from the Data URI. -readDataURI :: String -- ^ URI - -> Maybe (String,String,Bool,String) +readDataURI :: Text -- ^ URI + -> Maybe (Text,Text,Bool,Text) -- ^ Maybe (mime,charset,isBase64,data) readDataURI uri = - case stripPrefix "data:" uri of + case T.stripPrefix "data:" uri of Nothing -> Nothing Just rest -> - let meta = takeWhile (/= ',') rest -- without trailing ',' - uridata = drop (length meta + 1) rest - parts = split (== ';') meta + let meta = T.takeWhile (/= ',') rest -- without trailing ',' + uridata = T.drop (T.length meta + 1) rest + parts = T.split (== ';') meta (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts in Just (mime,cs,enc,uridata) where upd str m@(mime,cs,enc) - | isMimeType str = (str,cs,enc) - | Just str' <- stripPrefix "charset=" str = (mime,str',enc) - | str == "base64" = (mime,cs,True) - | otherwise = m + | isMimeType str = (str,cs,enc) + | Just str' <- T.stripPrefix "charset=" str = (mime,str',enc) + | str == "base64" = (mime,cs,True) + | otherwise = m -- Without parameters like ;charset=...; see RFC 2045, 5.1 -isMimeType :: String -> Bool +isMimeType :: Text -> Bool isMimeType s = - case split (=='/') s of + case T.split (=='/') s of [mtype,msubtype] -> - (map toLower mtype `elem` types - || "x-" `isPrefixOf` map toLower mtype) - && all valid mtype - && all valid msubtype + (T.toLower mtype `elem` types + || "x-" `T.isPrefixOf` T.toLower mtype) + && T.all valid mtype + && T.all valid msubtype _ -> False where types = ["text","image","audio","video","application","message","multipart"] valid c = isAscii c && not (isControl c) && not (isSpace c) && - c `notElem` "()<>@,;:\\\"/[]?=" + c `notElem` ("()<>@,;:\\\"/[]?=" :: String) -footnoteID :: Int -> String -footnoteID i = "n" ++ show i +footnoteID :: Int -> Text +footnoteID i = "n" <> tshow i -mkitem :: PandocMonad m => String -> [Block] -> FBM m [Content] +mkitem :: PandocMonad m => Text -> [Block] -> FBM m [Content] mkitem mrk bs = do pmrk <- gets parentListMarker - let nmrk = pmrk ++ mrk ++ " " + let nmrk = pmrk <> mrk <> " " modify (\s -> s { parentListMarker = nmrk}) item <- cMapM blockToXml $ plainToPara $ indentBlocks nmrk bs modify (\s -> s { parentListMarker = pmrk }) -- old parent marker @@ -303,11 +305,12 @@ blockToXml :: PandocMonad m => Block -> FBM m [Content] blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula -- title beginning with fig: indicates that the image is a figure -blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) = - insertImage NormalImage (Image atr alt (src,tit)) +blockToXml (Para [Image atr alt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = insertImage NormalImage (Image atr alt (src,tit)) blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code") . lines $ s + map (el "p" . el "code" . T.unpack) . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" then return $ XI.parseXML str @@ -329,7 +332,7 @@ blockToXml (DefinitionList defs) = cMapM mkdef defs where mkdef (term, bss) = do - items <- cMapM (cMapM blockToXml . plainToPara . indentBlocks (replicate 4 ' ')) bss + items <- cMapM (cMapM blockToXml . plainToPara . indentBlocks (T.replicate 4 " ")) bss t <- wrap "strong" term return (el "p" t : items) blockToXml h@Header{} = do @@ -376,13 +379,13 @@ unPlain x = x -- Simulate increased indentation level. Will not really work -- for multi-line paragraphs. -indentPrefix :: String -> Block -> Block +indentPrefix :: Text -> Block -> Block indentPrefix spacer = indentBlock where indentBlock (Plain ins) = Plain (Str spacer:ins) indentBlock (Para ins) = Para (Str spacer:ins) indentBlock (CodeBlock a s) = - let s' = unlines . map (spacer++) . lines $ s + let s' = T.unlines . map (spacer<>) . T.lines $ s in CodeBlock a s' indentBlock (BlockQuote bs) = BlockQuote (map indent bs) indentBlock (Header l attr' ins) = Header l attr' (indentLines ins) @@ -396,12 +399,12 @@ indent :: Block -> Block indent = indentPrefix spacer where -- indentation space - spacer :: String - spacer = replicate 4 ' ' + spacer :: Text + spacer = T.replicate 4 " " -indentBlocks :: String -> [Block] -> [Block] +indentBlocks :: Text -> [Block] -> [Block] indentBlocks _ [] = [] -indentBlocks prefix (x:xs) = indentPrefix prefix x : map (indentPrefix $ replicate (length prefix) ' ') xs +indentBlocks prefix (x:xs) = indentPrefix prefix x : map (indentPrefix $ T.replicate (T.length prefix) " ") xs -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: PandocMonad m => Inline -> FBM m [Content] @@ -420,7 +423,7 @@ toXml (Quoted DoubleQuote ss) = do inner <- cMapM toXml ss return $ [txt "“"] ++ inner ++ [txt "”"] toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles -toXml (Code _ s) = return [el "code" s] +toXml (Code _ s) = return [el "code" $ T.unpack s] toXml Space = return [txt " "] toXml SoftBreak = return [txt "\n"] toXml LineBreak = return [txt "\n"] @@ -438,40 +441,40 @@ toXml (Note bs) = do let fn_id = footnoteID n fn_desc <- cMapM blockToXml bs modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns }) - let fn_ref = txt $ "[" ++ show n ++ "]" - return . list $ el "a" ( [ attr ("l","href") ('#':fn_id) + let fn_ref = txt $ "[" <> tshow n <> "]" + return . list $ el "a" ( [ attr ("l","href") ("#" <> fn_id) , uattr "type" "note" ] , fn_ref ) -insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content] +insertMath :: PandocMonad m => ImageMode -> Text -> FBM m [Content] insertMath immode formula = do htmlMath <- fmap (writerHTMLMathMethod . writerOptions) get case htmlMath of WebTeX url -> do let alt = [Code nullAttr formula] - let imgurl = url ++ urlEncode formula + let imgurl = url <> T.pack (urlEncode $ T.unpack formula) let img = Image nullAttr alt (imgurl, "") insertImage immode img - _ -> return [el "code" formula] + _ -> return [el "code" $ T.unpack formula] insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do images <- imagesToFetch `liftM` get let n = 1 + length images - let fname = "image" ++ show n + let fname = "image" <> tshow n modify (\s -> s { imagesToFetch = (fname, url) : images }) - let ttlattr = case (immode, null ttl) of + let ttlattr = case (immode, T.null ttl) of (NormalImage, False) -> [ uattr "title" ttl ] _ -> [] return . list $ el "image" $ - [ attr ("l","href") ('#':fname) - , attr ("l","type") (show immode) - , uattr "alt" (cMap plain alt) ] + [ attr ("l","href") ("#" <> fname) + , attr ("l","type") (tshow immode) + , uattr "alt" (T.pack $ cMap plain alt) ] ++ ttlattr insertImage _ _ = error "unexpected inline instead of image" -replaceImagesWithAlt :: [String] -> Content -> Content +replaceImagesWithAlt :: [Text] -> Content -> Content replaceImagesWithAlt missingHrefs body = let cur = XC.fromContent body cur' = replaceAll cur @@ -507,8 +510,8 @@ replaceImagesWithAlt missingHrefs body = (Just alt', Just imtype') -> if imtype' == show NormalImage then el "p" alt' - else txt alt' - (Just alt', Nothing) -> txt alt' -- no type attribute + else txt $ T.pack alt' + (Just alt', Nothing) -> txt $ T.pack alt' -- no type attribute _ -> n -- don't replace if alt text is not found replaceNode n = n -- @@ -529,7 +532,7 @@ list = (:[]) -- | Convert an 'Inline' to plaintext. plain :: Inline -> String -plain (Str s) = s +plain (Str s) = T.unpack s plain (Emph ss) = cMap plain ss plain (Span _ ss) = cMap plain ss plain (Strong ss) = cMap plain ss @@ -539,13 +542,13 @@ plain (Subscript ss) = cMap plain ss plain (SmallCaps ss) = cMap plain ss plain (Quoted _ ss) = cMap plain ss plain (Cite _ ss) = cMap plain ss -- FIXME -plain (Code _ s) = s +plain (Code _ s) = T.unpack s plain Space = " " plain SoftBreak = " " plain LineBreak = "\n" -plain (Math _ s) = s +plain (Math _ s) = T.unpack s plain (RawInline _ _) = "" -plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"]) +plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"]) plain (Image _ alt _) = cMap plain alt plain (Note _) = "" -- FIXME @@ -563,16 +566,16 @@ spaceBeforeAfter cs = in [emptyline] ++ cs ++ [emptyline] -- | Create a plain-text XML content. -txt :: String -> Content -txt s = Text $ CData CDataText s Nothing +txt :: Text -> Content +txt s = Text $ CData CDataText (T.unpack s) Nothing -- | Create an XML attribute with an unqualified name. -uattr :: String -> String -> Text.XML.Light.Attr -uattr name = Attr (uname name) +uattr :: String -> Text -> Text.XML.Light.Attr +uattr name = Attr (uname name) . T.unpack -- | Create an XML attribute with a qualified name from given namespace. -attr :: (String, String) -> String -> Text.XML.Light.Attr -attr (ns, name) = Attr (qname ns name) +attr :: (String, String) -> Text -> Text.XML.Light.Attr +attr (ns, name) = Attr (qname ns name) . T.unpack -- | Unqualified name uname :: String -> QName diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index f042bda21..e858f3a6c 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -30,12 +30,10 @@ module Text.Pandoc.Writers.HTML ( tagWithAttributes ) where import Control.Monad.State.Strict -import Data.Char (ord, toLower) -import Data.List (intercalate, intersperse, isPrefixOf, partition, delete) -import Data.List.Split (splitWhen) +import Data.Char (ord) +import Data.List (intercalate, intersperse, partition, delete) import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe) import qualified Data.Set as Set -import Data.String (fromString) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL @@ -112,19 +110,21 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, -- Helpers to render HTML with the appropriate function. -strToHtml :: String -> Html -strToHtml ('\'':xs) = preEscapedString "\'" `mappend` strToHtml xs -strToHtml ('"' :xs) = preEscapedString "\"" `mappend` strToHtml xs -strToHtml (x:xs) | needsVariationSelector x - = preEscapedString [x, '\xFE0E'] `mappend` - case xs of - ('\xFE0E':ys) -> strToHtml ys - _ -> strToHtml xs -strToHtml xs@(_:_) = case break (\c -> c == '\'' || c == '"' || - needsVariationSelector c) xs of - (_ ,[]) -> toHtml xs - (ys,zs) -> toHtml ys `mappend` strToHtml zs -strToHtml [] = "" +strToHtml :: Text -> Html +strToHtml = strToHtml' . T.unpack + where + strToHtml' ('\'':xs) = preEscapedString "\'" `mappend` strToHtml' xs + strToHtml' ('"' :xs) = preEscapedString "\"" `mappend` strToHtml' xs + strToHtml' (x:xs) | needsVariationSelector x + = preEscapedString [x, '\xFE0E'] `mappend` + case xs of + ('\xFE0E':ys) -> strToHtml' ys + _ -> strToHtml' xs + strToHtml' xs@(_:_) = case break (\c -> c == '\'' || c == '"' || + needsVariationSelector c) xs of + (_ ,[]) -> toHtml xs + (ys,zs) -> toHtml ys `mappend` strToHtml' zs + strToHtml' [] = "" -- See #5469: this prevents iOS from substituting emojis. needsVariationSelector :: Char -> Bool @@ -223,14 +223,14 @@ writeHtmlString' st opts d = do case getField "pagetitle" context of Just (s :: Text) | not (T.null s) -> return context _ -> do - let fallback = + let fallback = T.pack $ case lookupContext "sourcefile" (writerVariables opts) of Nothing -> "Untitled" Just [] -> "Untitled" Just (x:_) -> takeBaseName $ T.unpack x report $ NoTitleElement fallback - return $ resetField "pagetitle" (T.pack fallback) context + return $ resetField "pagetitle" fallback context return $ render Nothing $ renderTemplate tpl (defField "body" (renderHtml' body) context') @@ -285,7 +285,7 @@ pandocToHtml opts (Pandoc meta blocks) = do _ -> mempty KaTeX url -> do H.script ! - A.src (toValue $ url ++ "katex.min.js") $ mempty + A.src (toValue $ url <> "katex.min.js") $ mempty nl opts let katexFlushLeft = case lookupContext "classoption" metadata of @@ -306,7 +306,7 @@ pandocToHtml opts (Pandoc meta blocks) = do ] nl opts H.link ! A.rel "stylesheet" ! - A.href (toValue $ url ++ "katex.min.css") + A.href (toValue $ url <> "katex.min.css") _ -> case lookupContext "mathml-script" (writerVariables opts) of @@ -329,7 +329,7 @@ pandocToHtml opts (Pandoc meta blocks) = do (case writerHTMLMathMethod opts of MathJax u -> defField "mathjax" True . defField "mathjaxurl" - (T.pack $ takeWhile (/='?') u) + (T.takeWhile (/='?') u) _ -> defField "mathjax" False) $ defField "quotes" (stQuotes st) $ -- for backwards compatibility we populate toc @@ -337,12 +337,12 @@ pandocToHtml opts (Pandoc meta blocks) = do -- boolean: maybe id (defField "toc") toc $ maybe id (defField "table-of-contents") toc $ - defField "author-meta" (map T.pack authsMeta) $ - maybe id (defField "date-meta" . T.pack) + defField "author-meta" authsMeta $ + maybe id (defField "date-meta") (normalizeDate dateMeta) $ defField "pagetitle" - (T.pack . stringifyHTML . docTitle $ meta) $ - defField "idprefix" (T.pack $ writerIdentifierPrefix opts) $ + (stringifyHTML . docTitle $ meta) $ + defField "idprefix" (writerIdentifierPrefix opts) $ -- these should maybe be set in pandoc.hs defField "slidy-url" ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) $ @@ -354,11 +354,11 @@ pandocToHtml opts (Pandoc meta blocks) = do return (thebody, context) -- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix -prefixedId :: WriterOptions -> String -> Attribute +prefixedId :: WriterOptions -> Text -> Attribute prefixedId opts s = case s of "" -> mempty - _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s + _ -> A.id $ toValue $ writerIdentifierPrefix opts <> s toList :: PandocMonad m => (Html -> Html) @@ -414,7 +414,7 @@ tableOfContents opts sects = do let opts' = case slideVariant of RevealJsSlides -> opts{ writerIdentifierPrefix = - '/' : writerIdentifierPrefix opts } + "/" <> writerIdentifierPrefix opts } _ -> opts case toTableOfContents opts sects of bl@(BulletList (_:_)) -> Just <$> blockToHtml opts' bl @@ -446,64 +446,64 @@ footnoteSection opts notes = do H.ol (mconcat notes >> nl opts) >> nl opts) -- | Parse a mailto link; return Just (name, domain) or Nothing. -parseMailto :: String -> Maybe (String, String) +parseMailto :: Text -> Maybe (Text, Text) parseMailto s = - case break (==':') s of - (xs,':':addr) | map toLower xs == "mailto" -> do - let (name', rest) = span (/='@') addr - let domain = drop 1 rest + case T.break (==':') s of + (xs,T.uncons -> Just (':',addr)) | T.toLower xs == "mailto" -> do + let (name', rest) = T.span (/='@') addr + let domain = T.drop 1 rest return (name', domain) _ -> Prelude.fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. obfuscateLink :: PandocMonad m - => WriterOptions -> Attr -> Html -> String + => WriterOptions -> Attr -> Html -> Text -> StateT WriterState m Html obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = addAttrs opts attr $ H.a ! A.href (toValue s) $ txt -obfuscateLink opts attr (TL.unpack . renderHtml -> txt) s = +obfuscateLink opts attr (TL.toStrict . renderHtml -> txt) s = let meth = writerEmailObfuscation opts - s' = map toLower (take 7 s) ++ drop 7 s + s' = T.toLower (T.take 7 s) <> T.drop 7 s in case parseMailto s' of (Just (name', domain)) -> - let domain' = substitute "." " dot " domain + let domain' = T.replace "." " dot " domain at' = obfuscateChar '@' (linkText, altText) = - if txt == drop 7 s' -- autolink - then ("e", name' ++ " at " ++ domain') - else ("'" ++ obfuscateString txt ++ "'", - txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") + if txt == T.drop 7 s' -- autolink + then ("e", name' <> " at " <> domain') + else ("'" <> obfuscateString txt <> "'", + txt <> " (" <> name' <> " at " <> domain' <> ")") (_, classNames, _) = attr - classNamesStr = concatMap (' ':) classNames + classNamesStr = T.concat $ map (" "<>) classNames in case meth of ReferenceObfuscation -> -- need to use preEscapedString or &'s are escaped to & in URL return $ - preEscapedString $ "<a href=\"" ++ obfuscateString s' - ++ "\" class=\"email\">" ++ obfuscateString txt ++ "</a>" + preEscapedText $ "<a href=\"" <> obfuscateString s' + <> "\" class=\"email\">" <> obfuscateString txt <> "</a>" JavascriptObfuscation -> return $ (H.script ! A.type_ "text/javascript" $ - preEscapedString ("\n<!--\nh='" ++ - obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ - obfuscateString name' ++ "';e=n+a+h;\n" ++ - "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail" ++ - classNamesStr ++ "\">'+" ++ - linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >> - H.noscript (preEscapedString $ obfuscateString altText) - _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth + preEscapedText ("\n<!--\nh='" <> + obfuscateString domain <> "';a='" <> at' <> "';n='" <> + obfuscateString name' <> "';e=n+a+h;\n" <> + "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail" <> + classNamesStr <> "\">'+" <> + linkText <> "+'<\\/'+'a'+'>');\n// -->\n")) >> + H.noscript (preEscapedText $ obfuscateString altText) + _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " <> tshow meth _ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email -- | Obfuscate character as entity. -obfuscateChar :: Char -> String +obfuscateChar :: Char -> Text obfuscateChar char = let num = ord char - numstr = if even num then show num else "x" ++ showHex num "" - in "&#" ++ numstr ++ ";" + numstr = if even num then show num else "x" <> showHex num "" + in "&#" <> T.pack numstr <> ";" -- | Obfuscate string using entities. -obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . fromEntities +obfuscateString :: Text -> Text +obfuscateString = T.concatMap obfuscateChar . fromEntities -- | Create HTML tag with attributes. tagWithAttributes :: WriterOptions @@ -525,7 +525,7 @@ addAttrs :: PandocMonad m addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr toAttrs :: PandocMonad m - => [(String, String)] -> StateT WriterState m [Attribute] + => [(Text, Text)] -> StateT WriterState m [Attribute] toAttrs kvs = do html5 <- gets stHtml5 mbEpubVersion <- gets stEPUBVersion @@ -533,18 +533,18 @@ toAttrs kvs = do if html5 then if x `Set.member` (html5Attributes <> rdfaAttributes) - || ':' `elem` x -- e.g. epub: namespace - || "data-" `isPrefixOf` x - || "aria-" `isPrefixOf` x - then Just $ customAttribute (fromString x) (toValue y) - else Just $ customAttribute (fromString ("data-" ++ x)) + || T.any (== ':') x -- e.g. epub: namespace + || "data-" `T.isPrefixOf` x + || "aria-" `T.isPrefixOf` x + then Just $ customAttribute (textTag x) (toValue y) + else Just $ customAttribute (textTag ("data-" <> x)) (toValue y) else if mbEpubVersion == Just EPUB2 && not (x `Set.member` (html4Attributes <> rdfaAttributes) || - "xml:" `isPrefixOf` x) + "xml:" `T.isPrefixOf` x) then Nothing - else Just $ customAttribute (fromString x) (toValue y)) + else Just $ customAttribute (textTag x) (toValue y)) kvs attrsToHtml :: PandocMonad m @@ -552,8 +552,8 @@ attrsToHtml :: PandocMonad m attrsToHtml opts (id',classes',keyvals) = do attrs <- toAttrs keyvals return $ - [prefixedId opts id' | not (null id')] ++ - [A.class_ (toValue $ unwords classes') | not (null classes')] ++ attrs + [prefixedId opts id' | not (T.null id')] ++ + [A.class_ (toValue $ T.unwords classes') | not (null classes')] ++ attrs imgAttrsToHtml :: PandocMonad m => WriterOptions -> Attr -> StateT WriterState m [Attribute] @@ -568,23 +568,23 @@ imgAttrsToHtml opts attr = do isNotDim ("height", _) = False isNotDim _ = True -dimensionsToAttrList :: Attr -> [(String, String)] +dimensionsToAttrList :: Attr -> [(Text, Text)] dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height where - consolidateStyles :: [(String, String)] -> [(String, String)] + consolidateStyles :: [(Text, Text)] -> [(Text, Text)] consolidateStyles xs = case partition isStyle xs of ([], _) -> xs - (ss, rest) -> ("style", intercalate ";" $ map snd ss) : rest + (ss, rest) -> ("style", T.intercalate ";" $ map snd ss) : rest isStyle ("style", _) = True isStyle _ = False go dir = case dimension dir attr of - (Just (Pixel a)) -> [(show dir, show a)] - (Just x) -> [("style", show dir ++ ":" ++ show x)] + (Just (Pixel a)) -> [(tshow dir, tshow a)] + (Just x) -> [("style", tshow dir <> ":" <> tshow x)] Nothing -> [] figure :: PandocMonad m - => WriterOptions -> Attr -> [Inline] -> (String, String) + => WriterOptions -> Attr -> [Inline] -> (Text, Text) -> StateT WriterState m Html figure opts attr txt (s,tit) = do img <- inlineToHtml opts (Image attr [Str ""] (s,tit)) @@ -601,14 +601,14 @@ figure opts attr txt (s,tit) = do else H.div ! A.class_ "figure" $ mconcat [nl opts, img, nl opts, capt, nl opts] -showSecNum :: [Int] -> String -showSecNum = intercalate "." . map show +showSecNum :: [Int] -> Text +showSecNum = T.intercalate "." . map tshow -getNumber :: WriterOptions -> Attr -> String +getNumber :: WriterOptions -> Attr -> Text getNumber opts (_,_,kvs) = showSecNum $ zipWith (+) num (writerNumberOffset opts ++ repeat 0) where - num = maybe [] (map (fromMaybe 0 . safeRead) . splitWhen (=='.')) $ + num = maybe [] (map (fromMaybe 0 . safeRead) . T.split (=='.')) $ lookup "number" kvs -- | Convert Pandoc block element to HTML. @@ -625,7 +625,7 @@ blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) inlineToHtml opts (Image attr txt (src, tit)) _ -> figure opts attr txt (src, tit) -- title beginning with fig: indicates that the image is a figure -blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = +blockToHtml opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) = figure opts attr txt (s,tit) blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst @@ -661,7 +661,7 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs) RevealJsSlides -> "fragment" _ -> "incremental" let inDiv zs = (RawBlock (Format "html") ("<div class=\"" - ++ fragmentClass ++ "\">")) : + <> fragmentClass <> "\">")) : (zs ++ [RawBlock (Format "html") "</div>"]) let (titleBlocks, innerSecs) = if titleSlide @@ -675,8 +675,8 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs) let classes' = ["title-slide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && not html5 ] ++ - ["level" ++ show level | slide || writerSectionDivs opts ] - ++ dclasses + ["level" <> tshow level | slide || writerSectionDivs opts ] + <> dclasses let secttag = if html5 then H5.section else H.div @@ -709,11 +709,11 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do html5 <- gets stHtml5 slideVariant <- gets stSlideVariant let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++ - [("style", "width:" ++ w ++ ";") + [("style", "width:" <> w <> ";") | ("width",w) <- kvs', "column" `elem` classes] ++ [("role", "doc-bibliography") | ident == "refs" && html5] ++ [("role", "doc-biblioentry") - | "ref-item" `isPrefixOf` ident && html5] + | "ref-item" `T.isPrefixOf` ident && html5] let speakerNotes = "notes" `elem` classes -- we don't want incremental output inside speaker notes, see #1394 let opts' = if | speakerNotes -> opts{ writerIncremental = False } @@ -751,7 +751,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do blockToHtml opts (RawBlock f str) = do ishtml <- isRawHtml f if ishtml - then return $ preEscapedString str + then return $ preEscapedText str else if (f == Format "latex" || f == Format "tex") && allowsMathEnvironments (writerHTMLMathMethod opts) && isMathEnvironment str @@ -763,22 +763,22 @@ blockToHtml _ HorizontalRule = do html5 <- gets stHtml5 return $ if html5 then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do - id'' <- if null id' + id'' <- if T.null id' then do modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 } codeblocknum <- gets stCodeBlockNum - return (writerIdentifierPrefix opts ++ "cb" ++ show codeblocknum) - else return (writerIdentifierPrefix opts ++ id') + return (writerIdentifierPrefix opts <> "cb" <> tshow codeblocknum) + else return (writerIdentifierPrefix opts <> id') let tolhs = isEnabled Ext_literate_haskell opts && - any (\c -> map toLower c == "haskell") classes && - any (\c -> map toLower c == "literate") classes + any (\c -> T.toLower c == "haskell") classes && + any (\c -> T.toLower c == "literate") classes classes' = if tolhs - then map (\c -> if map toLower c == "haskell" + then map (\c -> if T.toLower c == "haskell" then "literatehaskell" else c) classes else classes adjCode = if tolhs - then unlines . map ("> " ++) . lines $ rawCode + then T.unlines . map ("> " <>) . T.lines $ rawCode else rawCode hlCode = if isJust (writerHighlightStyle opts) then highlight (writerSyntaxMap opts) formatHtmlBlock @@ -786,7 +786,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do else Left "" case hlCode of Left msg -> do - unless (null msg) $ + unless (T.null msg) $ report $ CouldNotHighlight msg addAttrs opts (id',classes,keyvals) $ H.pre $ H.code $ toHtml adjCode @@ -819,7 +819,7 @@ blockToHtml opts (BlockQuote blocks) = do blockToHtml opts (Header level attr@(_,classes,_) lst) = do contents <- inlineListToHtml opts lst let secnum = getNumber opts attr - let contents' = if writerNumberSections opts && not (null secnum) + let contents' = if writerNumberSections opts && not (T.null secnum) && "unnumbered" `notElem` classes then (H.span ! A.class_ "header-section-number" $ toHtml secnum) >> strToHtml " " >> contents @@ -841,7 +841,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do html5 <- gets stHtml5 let numstyle' = case numstyle of Example -> "decimal" - _ -> camelCaseToHyphenated $ show numstyle + _ -> camelCaseToHyphenated $ tshow numstyle let attribs = [A.start $ toValue startnum | startnum /= 1] ++ [A.class_ "example" | numstyle == Example] ++ (if numstyle /= DefaultStyle @@ -854,7 +854,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do LowerRoman -> "i" UpperRoman -> "I" _ -> "1"] - else [A.style $ toValue $ "list-style-type: " ++ + else [A.style $ toValue $ "list-style-type: " <> numstyle'] else []) l <- ordList opts contents @@ -874,7 +874,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do cs <- inlineListToHtml opts capt return $ H.caption cs >> nl opts html5 <- gets stHtml5 - let percent w = show (truncate (100*w) :: Integer) ++ "%" + let percent w = show (truncate (100*w) :: Integer) <> "%" let coltags = if all (== 0.0) widths then mempty else do @@ -882,7 +882,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do nl opts mapM_ (\w -> do if html5 - then H.col ! A.style (toValue $ "width: " ++ + then H.col ! A.style (toValue $ "width: " <> percent w) else H.col ! A.width (toValue $ percent w) nl opts) widths @@ -901,8 +901,8 @@ blockToHtml opts (Table capt aligns widths headers rows') = do -- table, or some browsers give us skinny columns with lots of space between: return $ if totalWidth == 0 || totalWidth == 1 then tbl - else tbl ! A.style (toValue $ "width:" ++ - show (round (totalWidth * 100) :: Int) ++ "%;") + else tbl ! A.style (toValue $ "width:" <> + show (round (totalWidth * 100) :: Int) <> "%;") tableRowToHtml :: PandocMonad m => WriterOptions @@ -940,7 +940,7 @@ tableItemToHtml opts tag' align' item = do html5 <- gets stHtml5 let alignStr = alignmentToString align' let attribs = if html5 - then A.style (toValue $ "text-align: " ++ alignStr ++ ";") + then A.style (toValue $ "text-align: " <> alignStr <> ";") else A.align (toValue alignStr) let tag'' = if null alignStr then tag' @@ -967,8 +967,8 @@ inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . mconcat -- | Annotates a MathML expression with the tex source -annotateMML :: XML.Element -> String -> XML.Element -annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)]) +annotateMML :: XML.Element -> Text -> XML.Element +annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, T.unpack tex)]) where cs = case elChildren e of [] -> unode "mrow" () @@ -989,9 +989,9 @@ inlineToHtml opts inline = do (Str str) -> return $ strToHtml str Space -> return $ strToHtml " " SoftBreak -> return $ case writerWrapText opts of - WrapNone -> preEscapedString " " - WrapAuto -> preEscapedString " " - WrapPreserve -> preEscapedString "\n" + WrapNone -> preEscapedText " " + WrapAuto -> preEscapedText " " + WrapPreserve -> preEscapedText "\n" LineBreak -> return $ do if html5 then H5.br else H.br strToHtml "\n" @@ -999,9 +999,8 @@ inlineToHtml opts inline = do (Span (id',classes,kvs) ils) -> let spanLikeTag = case classes of (c:_) -> do - let c' = T.pack c - guard (c' `Set.member` htmlSpanLikeElements) - pure $ customParent (textTag c') + guard (c `Set.member` htmlSpanLikeElements) + pure $ customParent (textTag c) _ -> Nothing in case spanLikeTag of Just tag -> do @@ -1019,7 +1018,7 @@ inlineToHtml opts inline = do | "csl-no-smallcaps" `elem` classes] kvs' = if null styles then kvs - else ("style", concat styles) : kvs + else ("style", T.concat styles) : kvs classes' = [ c | c <- classes , c `notElem` [ "csl-no-emph" , "csl-no-strong" @@ -1032,7 +1031,7 @@ inlineToHtml opts inline = do (Code attr@(ids,cs,kvs) str) -> case hlCode of Left msg -> do - unless (null msg) $ + unless (T.null msg) $ report $ CouldNotHighlight msg addAttrs opts (ids,cs',kvs) $ maybe H.code id sampOrVar $ @@ -1079,7 +1078,7 @@ inlineToHtml opts inline = do `fmap` inlineListToHtml opts lst (Math t str) -> do modify (\st -> st {stMath = True}) - let mathClass = toValue $ ("math " :: String) ++ + let mathClass = toValue $ ("math " :: Text) <> if t == InlineMath then "inline" else "display" case writerHTMLMathMethod opts of WebTeX url -> do @@ -1088,7 +1087,7 @@ inlineToHtml opts inline = do InlineMath -> "\\textstyle " DisplayMath -> "\\displaystyle " let m = imtag ! A.style "vertical-align:middle" - ! A.src (toValue $ url ++ urlEncode (s ++ str)) + ! A.src (toValue $ url <> T.pack (urlEncode (T.unpack $ s <> str))) ! A.alt (toValue str) ! A.title (toValue str) let brtag = if html5 then H5.br else H.br @@ -1113,8 +1112,8 @@ inlineToHtml opts inline = do inlineToHtml opts il MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ case t of - InlineMath -> "\\(" ++ str ++ "\\)" - DisplayMath -> "\\[" ++ str ++ "\\]" + InlineMath -> "\\(" <> str <> "\\)" + DisplayMath -> "\\[" <> str <> "\\]" KaTeX _ -> return $ H.span ! A.class_ mathClass $ toHtml $ case t of InlineMath -> str @@ -1129,7 +1128,7 @@ inlineToHtml opts inline = do (RawInline f str) -> do ishtml <- isRawHtml f if ishtml - then return $ preEscapedString str + then return $ preEscapedText str else if (f == Format "latex" || f == Format "tex") && allowsMathEnvironments (writerHTMLMathMethod opts) && isMathEnvironment str @@ -1137,21 +1136,21 @@ inlineToHtml opts inline = do else do report $ InlineNotRendered inline return mempty - (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do + (Link attr txt (s,_)) | "mailto:" `T.isPrefixOf` s -> do linkText <- inlineListToHtml opts txt obfuscateLink opts attr linkText s (Link (ident,classes,kvs) txt (s,tit)) -> do linkText <- inlineListToHtml opts txt slideVariant <- gets stSlideVariant - let s' = case s of - '#':xs -> let prefix = if slideVariant == RevealJsSlides + let s' = case T.uncons s of + Just ('#',xs) -> let prefix = if slideVariant == RevealJsSlides then "/" else writerIdentifierPrefix opts - in '#' : prefix ++ xs + in "#" <> prefix <> xs _ -> s let link = H.a ! A.href (toValue s') $ linkText link' <- addAttrs opts (ident, classes, kvs) link - return $ if null tit + return $ if T.null tit then link' else link' ! A.title (toValue tit) (Image attr txt (s,tit)) -> do @@ -1164,7 +1163,7 @@ inlineToHtml opts inline = do (if isReveal then customAttribute "data-src" $ toValue s else A.src $ toValue s) : - [A.title $ toValue tit | not (null tit)] ++ + [A.title $ toValue tit | not (T.null tit)] ++ attrs imageTag = (if html5 then H5.img else H.img , [A.alt $ toValue alternate | not (null txt)] ) @@ -1174,7 +1173,7 @@ inlineToHtml opts inline = do else alternate in (tg $ H.a ! A.href (toValue s) $ toHtml linkTxt , [A5.controls ""] ) - normSrc = maybe s uriPath (parseURIReference s) + normSrc = maybe (T.unpack s) uriPath (parseURIReference $ T.unpack s) (tag, specAttrs) = case mediaCategory normSrc of Just "image" -> imageTag Just "video" -> mediaTag H5.video "Video" @@ -1186,18 +1185,18 @@ inlineToHtml opts inline = do (Note contents) -> do notes <- gets stNotes let number = length notes + 1 - let ref = show number + let ref = tshow number htmlContents <- blockListToNote opts ref contents epubVersion <- gets stEPUBVersion -- push contents onto front of notes modify $ \st -> st {stNotes = htmlContents:notes} slideVariant <- gets stSlideVariant - let revealSlash = ['/' | slideVariant == RevealJsSlides] - let link = H.a ! A.href (toValue $ "#" ++ - revealSlash ++ - writerIdentifierPrefix opts ++ "fn" ++ ref) + let revealSlash = T.pack ['/' | slideVariant == RevealJsSlides] + let link = H.a ! A.href (toValue $ "#" <> + revealSlash <> + writerIdentifierPrefix opts <> "fn" <> ref) ! A.class_ "footnote-ref" - ! prefixedId opts ("fnref" ++ ref) + ! prefixedId opts ("fnref" <> ref) $ (if isJust epubVersion then id else H.sup) @@ -1208,7 +1207,7 @@ inlineToHtml opts inline = do "role" "doc-noteref" _ -> link (Cite cits il)-> do contents <- inlineListToHtml opts (walk addRoleToLink il) - let citationIds = unwords $ map citationId cits + let citationIds = T.unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents return $ if html5 then result ! customAttribute "data-cites" (toValue citationIds) @@ -1220,7 +1219,7 @@ addRoleToLink (Link (id',classes,kvs) ils (src,tit)) = addRoleToLink x = x blockListToNote :: PandocMonad m - => WriterOptions -> String -> [Block] + => WriterOptions -> Text -> [Block] -> StateT WriterState m Html blockListToNote opts ref blocks = do html5 <- gets stHtml5 @@ -1228,7 +1227,7 @@ blockListToNote opts ref blocks = do -- that block. Otherwise, insert a new Plain block with the backlink. let kvs = if html5 then [("role","doc-backlink")] else [] let backlink = [Link ("",["footnote-back"],kvs) - [Str "↩"] ("#" ++ "fnref" ++ ref,[])] + [Str "↩"] ("#" <> "fnref" <> ref,"")] let blocks' = if null blocks then [] else let lastBlock = last blocks @@ -1241,7 +1240,7 @@ blockListToNote opts ref blocks = do _ -> otherBlocks ++ [lastBlock, Plain backlink] contents <- blockListToHtml opts blocks' - let noteItem = H.li ! prefixedId opts ("fn" ++ ref) $ contents + let noteItem = H.li ! prefixedId opts ("fn" <> ref) $ contents epubVersion <- gets stEPUBVersion let noteItem' = case epubVersion of Just EPUB3 -> noteItem ! @@ -1251,10 +1250,10 @@ blockListToNote opts ref blocks = do _ -> noteItem return $ nl opts >> noteItem' -isMathEnvironment :: String -> Bool -isMathEnvironment s = "\\begin{" `isPrefixOf` s && +isMathEnvironment :: Text -> Bool +isMathEnvironment s = "\\begin{" `T.isPrefixOf` s && envName `elem` mathmlenvs - where envName = takeWhile (/= '}') (drop 7 s) + where envName = T.takeWhile (/= '}') (T.drop 7 s) mathmlenvs = [ "align" , "align*" , "alignat" @@ -1295,7 +1294,7 @@ isRawHtml f = do return $ f == Format "html" || ((html5 && f == Format "html5") || f == Format "html4") -html5Attributes :: Set.Set String +html5Attributes :: Set.Set Text html5Attributes = Set.fromList [ "abbr" , "accept" @@ -1504,7 +1503,7 @@ html5Attributes = Set.fromList ] -- See https://en.wikipedia.org/wiki/RDFa, https://www.w3.org/TR/rdfa-primer/ -rdfaAttributes :: Set.Set String +rdfaAttributes :: Set.Set Text rdfaAttributes = Set.fromList [ "about" , "rel" @@ -1520,7 +1519,7 @@ rdfaAttributes = Set.fromList , "prefix" ] -html4Attributes :: Set.Set String +html4Attributes :: Set.Set Text html4Attributes = Set.fromList [ "abbr" , "accept" diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 1d70913c5..e6c07aaf7 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -19,6 +19,7 @@ import Prelude import Control.Monad.State.Strict import Data.Default import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -71,7 +72,7 @@ notesToHaddock opts notes = return $ text "#notes#" <> blankline <> contents -- | Escape special characters for Haddock. -escapeString :: String -> String +escapeString :: Text -> Text escapeString = escapeStringUsing haddockEscapes where haddockEscapes = backslashEscapes "\\/'`\"@<" @@ -88,8 +89,9 @@ blockToHaddock opts (Plain inlines) = do contents <- inlineListToHaddock opts inlines return $ contents <> cr -- title beginning with fig: indicates figure -blockToHaddock opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = - blockToHaddock opts (Para [Image attr alt (src,tit)]) +blockToHaddock opts (Para [Image attr alt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = blockToHaddock opts (Para [Image attr alt (src,tit)]) blockToHaddock opts (Para inlines) = -- TODO: if it contains linebreaks, we need to use a @...@ block (<> blankline) `fmap` blockToHaddock opts (Plain inlines) @@ -97,7 +99,7 @@ blockToHaddock opts (LineBlock lns) = blockToHaddock opts $ linesToPara lns blockToHaddock _ b@(RawBlock f str) | f == "haddock" = - return $ text str <> text "\n" + return $ literal str <> text "\n" | otherwise = do report $ BlockNotRendered b return empty @@ -105,13 +107,13 @@ blockToHaddock opts HorizontalRule = return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline blockToHaddock opts (Header level (ident,_,_) inlines) = do contents <- inlineListToHaddock opts inlines - let attr' = if null ident + let attr' = if T.null ident then empty - else cr <> text "#" <> text ident <> text "#" + else cr <> text "#" <> literal ident <> text "#" return $ nowrap (text (replicate level '=') <> space <> contents) <> attr' <> blankline blockToHaddock _ (CodeBlock (_,_,_) str) = - return $ prefixed "> " (text str) <> blankline + return $ prefixed "> " (literal str) <> blankline -- Nothing in haddock corresponds to block quotes: blockToHaddock opts (BlockQuote blocks) = blockListToHaddock opts blocks @@ -130,8 +132,8 @@ blockToHaddock opts (BulletList items) = do blockToHaddock opts (OrderedList (start,_,delim) items) = do let attribs = (start, Decimal, delim) let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' + let markers' = map (\m -> if T.length m < 3 + then m <> T.replicate (3 - T.length m) " " else m) markers contents <- zipWithM (orderedListItemToHaddock opts) markers' items return $ (if isTightList items then vcat else vsep) contents <> blankline @@ -154,15 +156,15 @@ bulletListItemToHaddock opts items = do -- | Convert ordered list item (a list of blocks) to haddock orderedListItemToHaddock :: PandocMonad m => WriterOptions -- ^ options - -> String -- ^ list item marker + -> Text -- ^ list item marker -> [Block] -- ^ list item (list of blocks) -> StateT WriterState m (Doc Text) orderedListItemToHaddock opts marker items = do contents <- blockListToHaddock opts items - let sps = case length marker - writerTabStop opts of + let sps = case T.length marker - writerTabStop opts of n | n > 0 -> text $ replicate n ' ' _ -> text " " - let start = text marker <> sps + let start = literal marker <> sps return $ hang (writerTabStop opts) start contents $$ if endsWithPlain items then cr @@ -202,8 +204,8 @@ inlineToHaddock :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m (Doc Text) inlineToHaddock opts (Span (ident,_,_) ils) = do contents <- inlineListToHaddock opts ils - if not (null ident) && null ils - then return $ "#" <> text ident <> "#" + if not (T.null ident) && null ils + then return $ "#" <> literal ident <> "#" else return contents inlineToHaddock opts (Emph lst) = do contents <- inlineListToHaddock opts lst @@ -228,15 +230,15 @@ inlineToHaddock opts (Quoted DoubleQuote lst) = do contents <- inlineListToHaddock opts lst return $ "“" <> contents <> "”" inlineToHaddock _ (Code _ str) = - return $ "@" <> text (escapeString str) <> "@" + return $ "@" <> literal (escapeString str) <> "@" inlineToHaddock _ (Str str) = - return $ text $ escapeString str + return $ literal $ escapeString str inlineToHaddock _ (Math mt str) = return $ case mt of - DisplayMath -> cr <> "\\[" <> text str <> "\\]" <> cr - InlineMath -> "\\(" <> text str <> "\\)" + DisplayMath -> cr <> "\\[" <> literal str <> "\\]" <> cr + InlineMath -> "\\(" <> literal str <> "\\)" inlineToHaddock _ il@(RawInline f str) - | f == "haddock" = return $ text str + | f == "haddock" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty @@ -250,12 +252,12 @@ inlineToHaddock opts SoftBreak = inlineToHaddock _ Space = return space inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst inlineToHaddock _ (Link _ txt (src, _)) = do - let linktext = text $ escapeString $ stringify txt + let linktext = literal $ escapeString $ stringify txt let useAuto = isURI src && case txt of [Str s] | escapeURI s == src -> True _ -> False - return $ nowrap $ "<" <> text src <> + return $ nowrap $ "<" <> literal src <> (if useAuto then empty else space <> linktext) <> ">" inlineToHaddock opts (Image attr alternate (source, tit)) = do linkhaddock <- inlineToHaddock opts (Link attr alternate (source, tit)) @@ -264,5 +266,5 @@ inlineToHaddock opts (Image attr alternate (source, tit)) = do inlineToHaddock opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get - let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st) + let ref = literal $ writerIdentifierPrefix opts <> tshow (length $ stNotes st) return $ "<#notes [" <> ref <> "]>" diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 84a48d8b4..9c367dd73 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -2,6 +2,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ICML @@ -20,10 +21,10 @@ module Text.Pandoc.Writers.ICML (writeICML) where import Prelude import Control.Monad.Except (catchError) import Control.Monad.State.Strict -import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix) +import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set -import Data.Text as Text (breakOnAll, pack) +import qualified Data.Text as Text import Data.Text (Text) import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P @@ -32,18 +33,18 @@ import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout -import Text.Pandoc.Shared (isURI, linesToPara, splitBy) +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.Writers.Shared import Text.Pandoc.XML -type Style = [String] -type Hyperlink = [(Int, String)] +type Style = [Text] +type Hyperlink = [(Int, Text)] data WriterState = WriterState{ - blockStyles :: Set.Set String - , inlineStyles :: Set.Set String + blockStyles :: Set.Set Text + , inlineStyles :: Set.Set Text , links :: Hyperlink , listDepth :: Int , maxListDepth :: Int @@ -61,14 +62,14 @@ defaultWriterState = WriterState{ } -- inline names (appear in InDesign's character styles pane) -emphName :: String -strongName :: String -strikeoutName :: String -superscriptName :: String -subscriptName :: String -smallCapsName :: String -codeName :: String -linkName :: String +emphName :: Text +strongName :: Text +strikeoutName :: Text +superscriptName :: Text +subscriptName :: Text +smallCapsName :: Text +codeName :: Text +linkName :: Text emphName = "Italic" strongName = "Bold" strikeoutName = "Strikeout" @@ -79,31 +80,31 @@ codeName = "Code" linkName = "Link" -- block element names (appear in InDesign's paragraph styles pane) -paragraphName :: String -figureName :: String -imgCaptionName :: String -codeBlockName :: String -blockQuoteName :: String -orderedListName :: String -bulletListName :: String -defListTermName :: String -defListDefName :: String -headerName :: String -tableName :: String -tableHeaderName :: String -tableCaptionName :: String -alignLeftName :: String -alignRightName :: String -alignCenterName :: String -firstListItemName :: String -beginsWithName :: String -lowerRomanName :: String -upperRomanName :: String -lowerAlphaName :: String -upperAlphaName :: String -subListParName :: String -footnoteName :: String -citeName :: String +paragraphName :: Text +figureName :: Text +imgCaptionName :: Text +codeBlockName :: Text +blockQuoteName :: Text +orderedListName :: Text +bulletListName :: Text +defListTermName :: Text +defListDefName :: Text +headerName :: Text +tableName :: Text +tableHeaderName :: Text +tableCaptionName :: Text +alignLeftName :: Text +alignRightName :: Text +alignCenterName :: Text +firstListItemName :: Text +beginsWithName :: Text +lowerRomanName :: Text +upperRomanName :: Text +lowerAlphaName :: Text +upperAlphaName :: Text +subListParName :: Text +footnoteName :: Text +citeName :: Text paragraphName = "Paragraph" figureName = "Figure" imgCaptionName = "Caption" @@ -153,9 +154,9 @@ writeICML opts (Pandoc meta blocks) = do Just tpl -> renderTemplate tpl context -- | Auxiliary functions for parStylesToDoc and charStylesToDoc. -contains :: String -> (String, (String, String)) -> [(String, String)] +contains :: Text -> (Text, (Text, Text)) -> [(Text, Text)] contains s rule = - [snd rule | (fst rule) `isInfixOf` s] + [snd rule | (fst rule) `Text.isInfixOf` s] -- | The monospaced font to use as default. monospacedFont :: Doc Text @@ -170,7 +171,7 @@ defaultListIndent :: Int defaultListIndent = 10 -- other constants -lineSeparator :: String +lineSeparator :: Text lineSeparator = "
" -- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles. @@ -178,7 +179,7 @@ parStylesToDoc :: WriterState -> Doc Text parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st where makeStyle s = - let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) + let countSubStrs sub str = length $ Text.breakOnAll sub str attrs = concatMap (contains s) [ (defListTermName, ("BulletsAndNumberingListType", "BulletList")) , (defListTermName, ("FontStyle", "Bold")) @@ -186,14 +187,14 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st , (alignLeftName, ("Justification", "LeftAlign")) , (alignRightName, ("Justification", "RightAlign")) , (alignCenterName, ("Justification", "CenterAlign")) - , (headerName++"1", ("PointSize", "36")) - , (headerName++"2", ("PointSize", "30")) - , (headerName++"3", ("PointSize", "24")) - , (headerName++"4", ("PointSize", "18")) - , (headerName++"5", ("PointSize", "14")) + , (headerName<>"1", ("PointSize", "36")) + , (headerName<>"2", ("PointSize", "30")) + , (headerName<>"3", ("PointSize", "24")) + , (headerName<>"4", ("PointSize", "18")) + , (headerName<>"5", ("PointSize", "14")) ] -- what is the most nested list type, if any? - (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s + (isBulletList, isOrderedList) = findList $ reverse $ splitTextBy (==' ') s where findList [] = (False, False) findList (x:xs) | x == bulletListName = (True, False) @@ -201,23 +202,23 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st | otherwise = findList xs nBuls = countSubStrs bulletListName s nOrds = countSubStrs orderedListName s - attrs' = numbering ++ listType ++ indent ++ attrs + attrs' = numbering <> listType <> indent <> attrs where - numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] + numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", tshow nOrds)] | otherwise = [] - listType | isOrderedList && not (subListParName `isInfixOf` s) + listType | isOrderedList && not (subListParName `Text.isInfixOf` s) = [("BulletsAndNumberingListType", "NumberedList")] - | isBulletList && not (subListParName `isInfixOf` s) + | isBulletList && not (subListParName `Text.isInfixOf` s) = [("BulletsAndNumberingListType", "BulletList")] | otherwise = [] - indent = [("LeftIndent", show indt)] + indent = [("LeftIndent", tshow indt)] where nBlockQuotes = countSubStrs blockQuoteName s nDefLists = countSubStrs defListDefName s indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) props = inTags True "Properties" [] (basedOn $$ tabList $$ numbForm) where - font = if codeBlockName `isInfixOf` s + font = if codeBlockName `Text.isInfixOf` s then monospacedFont else empty basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font @@ -232,12 +233,12 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st ] else empty makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name) - numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." - | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." - | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." - | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." + numbForm | Text.isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." + | Text.isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." + | Text.isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." + | Text.isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." | otherwise = empty - in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props + in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"<>s), ("Name", s)] ++ attrs') props -- | Convert a WriterState with its inline styles to the ICML listing of Character Styles. charStylesToDoc :: WriterState -> Doc Text @@ -250,25 +251,25 @@ charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st , (subscriptName, ("Position", "Subscript")) , (smallCapsName, ("Capitalization", "SmallCaps")) ] - attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs - | isInfixOf strongName s = ("FontStyle", "Bold") : attrs - | isInfixOf emphName s = ("FontStyle", "Italic") : attrs - | otherwise = attrs + attrs' | Text.isInfixOf emphName s && Text.isInfixOf strongName s + = ("FontStyle", "Bold Italic") : attrs + | Text.isInfixOf strongName s = ("FontStyle", "Bold") : attrs + | Text.isInfixOf emphName s = ("FontStyle", "Italic") : attrs + | otherwise = attrs props = inTags True "Properties" [] $ inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font where font = - if codeName `isInfixOf` s + if codeName `Text.isInfixOf` s then monospacedFont else empty - in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props + in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"<>s), ("Name", s)] ++ attrs') props -- | Escape colon characters as %3a -escapeColons :: String -> String -escapeColons (x:xs) - | x == ':' = "%3a" ++ escapeColons xs - | otherwise = x : escapeColons xs -escapeColons [] = [] +escapeColons :: Text -> Text +escapeColons = Text.concatMap $ \x -> case x of + ':' -> "%3a" + _ -> Text.singleton x -- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks. hyperlinksToDoc :: Hyperlink -> Doc Text @@ -278,15 +279,15 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs hyp (ident, url) = hdest $$ hlink where hdest = selfClosingTag "HyperlinkURLDestination" - [("Self", "HyperlinkURLDestination/"++escapeColons url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 - hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), - ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] + [("Self", "HyperlinkURLDestination/"<>escapeColons url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 + hlink = inTags True "Hyperlink" [("Self","uf-"<>tshow ident), ("Name",url), + ("Source","htss-"<>tshow ident), ("Visible","true"), ("DestinationUniqueKey","1")] $ inTags True "Properties" [] $ inTags False "BorderColor" [("type","enumeration")] (text "Black") - $$ inTags False "Destination" [("type","object")] (text $ "HyperlinkURLDestination/"++escapeColons (escapeStringForXML url)) -- HyperlinkURLDestination with more than one colon crashes CS6 + $$ inTags False "Destination" [("type","object")] (literal $ "HyperlinkURLDestination/"<>escapeColons (escapeStringForXML url)) -- HyperlinkURLDestination with more than one colon crashes CS6 -- | Key for specifying user-defined styles -dynamicStyleKey :: String +dynamicStyleKey :: Text dynamicStyleKey = "custom-style" -- | Convert a list of Pandoc blocks to ICML. @@ -299,7 +300,7 @@ blocksToICML opts style lst = do blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m (Doc Text) blockToICML opts style (Plain lst) = parStyle opts style lst -- title beginning with fig: indicates that the image is a figure -blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do +blockToICML opts style (Para img@[Image _ txt (_,Text.stripPrefix "fig:" -> Just _)]) = do figure <- parStyle opts (figureName:style) img caption <- parStyle opts (imgCaptionName:style) txt return $ intersperseBrs [figure, caption] @@ -308,7 +309,7 @@ blockToICML opts style (LineBlock lns) = blockToICML opts style $ linesToPara lns blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) [Str str] blockToICML _ _ b@(RawBlock f str) - | f == Format "icml" = return $ text str + | f == Format "icml" = return $ literal str | otherwise = do report $ BlockNotRendered b return empty @@ -317,7 +318,7 @@ blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedL blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst blockToICML opts style (DefinitionList lst) = intersperseBrs `fmap` mapM (definitionListItemToICML opts style) lst blockToICML opts style (Header lvl (_, cls, _) lst) = - let stl = (headerName ++ show lvl ++ unnumbered):style + let stl = (headerName <> tshow lvl <> unnumbered):style unnumbered = if "unnumbered" `elem` cls then " (unnumbered)" else "" @@ -348,7 +349,7 @@ blockToICML opts style (Table caption aligns widths headers rows) = | otherwise = stl c <- blocksToICML opts stl' cell let cl = return $ inTags True "Cell" - [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c + [("Name", tshow colNr <>":"<> tshow rowNr), ("AppliedCellStyle","CellStyle/Cell")] c liftM2 ($$) cl $ colsToICML rest restAligns rowNr (colNr+1) in do let tabl = if noHeader @@ -356,14 +357,14 @@ blockToICML opts style (Table caption aligns widths headers rows) = else headers:rows cells <- rowsToICML tabl (0::Int) let colWidths w = - [("SingleColumnWidth",show $ 500 * w) | w > 0] - let tupToDoc tup = selfClosingTag "Column" $ ("Name",show $ fst tup) : colWidths (snd tup) + [("SingleColumnWidth",tshow $ 500 * w) | w > 0] + let tupToDoc tup = selfClosingTag "Column" $ ("Name",tshow $ fst tup) : colWidths (snd tup) let colDescs = vcat $ zipWith (curry tupToDoc) [0..nrCols-1] widths let tableDoc = return $ inTags True "Table" [ ("AppliedTableStyle","TableStyle/Table") , ("HeaderRowCount", nrHeaders) - , ("BodyRowCount", show nrRows) - , ("ColumnCount", show nrCols) + , ("BodyRowCount", tshow nrRows) + , ("ColumnCount", tshow nrCols) ] (colDescs $$ cells) liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption blockToICML opts style (Div (_, _, kvs) lst) = @@ -372,7 +373,7 @@ blockToICML opts style (Div (_, _, kvs) lst) = blockToICML _ _ Null = return empty -- | Convert a list of lists of blocks to ICML list items. -listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m (Doc Text) +listItemsToICML :: PandocMonad m => WriterOptions -> Text -> Style -> Maybe ListAttributes -> [[Block]] -> WS m (Doc Text) listItemsToICML _ _ _ _ [] = return empty listItemsToICML opts listType style attribs (first:rest) = do st <- get @@ -397,7 +398,7 @@ listItemToICML opts style isFirst attribs item = doN UpperAlpha = [upperAlphaName] doN _ = [] bw = - [beginsWithName ++ show beginsWith | beginsWith > 1] + [beginsWithName <> tshow beginsWith | beginsWith > 1] in doN numbStl ++ bw makeNumbStart Nothing = [] stl = if isFirst @@ -426,7 +427,7 @@ inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (merge -- | Convert an inline element to ICML. inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m (Doc Text) -inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str +inlineToICML _ style (Str str) = charStyle style $ literal $ escapeStringForXML str inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst @@ -438,19 +439,19 @@ inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ mergeStrings opts $ [Str "“"] ++ lst ++ [Str "”"] inlineToICML opts style (Cite _ lst) = inlinesToICML opts (citeName:style) lst -inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str +inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ literal $ escapeStringForXML str inlineToICML _ style Space = charStyle style space inlineToICML opts style SoftBreak = case writerWrapText opts of WrapAuto -> charStyle style space WrapNone -> charStyle style space WrapPreserve -> charStyle style cr -inlineToICML _ style LineBreak = charStyle style $ text lineSeparator +inlineToICML _ style LineBreak = charStyle style $ literal lineSeparator inlineToICML opts style (Math mt str) = lift (texMathToInlines mt str) >>= (fmap mconcat . mapM (inlineToICML opts style)) inlineToICML _ _ il@(RawInline f str) - | f == Format "icml" = return $ text str + | f == Format "icml" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty @@ -462,7 +463,7 @@ inlineToICML opts style (Link _ lst (url, title)) = do else 1 + fst (head $ links st) newst = st{ links = (ident, url):links st } cont = inTags True "HyperlinkTextSource" - [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content + [("Self","htss-"<>tshow ident), ("Name",title), ("Hidden","false")] content in (cont, newst) inlineToICML opts style (Image attr _ target) = imageICML opts style attr target inlineToICML opts style (Note lst) = footnoteToICML opts style lst @@ -492,7 +493,7 @@ mergeStrings opts = mergeStrings' . map spaceToStr _ -> Str " " spaceToStr x = x - mergeStrings' (Str x : Str y : zs) = mergeStrings' (Str (x ++ y) : zs) + mergeStrings' (Str x : Str y : zs) = mergeStrings' (Str (x <> y) : zs) mergeStrings' (x : xs) = x : mergeStrings' xs mergeStrings' [] = [] @@ -503,20 +504,21 @@ intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isE -- | Wrap a list of inline elements in an ICML Paragraph Style parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m (Doc Text) parStyle opts style lst = - let slipIn x y = if null y + let slipIn x y = if Text.null y then x - else x ++ " > " ++ y - stlStr = foldr slipIn [] $ reverse style - stl = if null stlStr + else x <> " > " <> y + stlStr = foldr slipIn "" $ reverse style + stl = if Text.null stlStr then "" - else "ParagraphStyle/" ++ stlStr + else "ParagraphStyle/" <> stlStr attrs = ("AppliedParagraphStyle", stl) attrs' = if firstListItemName `elem` style then let ats = attrs : [("NumberingContinue", "false")] - begins = filter (isPrefixOf beginsWithName) style + begins = filter (Text.isPrefixOf beginsWithName) style in if null begins then ats - else let i = fromMaybe "" $ stripPrefix beginsWithName $ head begins + else let i = fromMaybe "" $ Text.stripPrefix beginsWithName + $ head begins in ("NumberingStartAt", i) : ats else [attrs] in do @@ -531,18 +533,18 @@ charStyle style content = doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content in state $ \st -> - let styles = if null stlStr + let styles = if Text.null stlStr then st else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } in (doc, styles) -- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. -styleToStrAttr :: Style -> (String, [(String, String)]) +styleToStrAttr :: Style -> (Text, [(Text, Text)]) styleToStrAttr style = - let stlStr = unwords $ Set.toAscList $ Set.fromList style + let stlStr = Text.unwords $ Set.toAscList $ Set.fromList style stl = if null style then "$ID/NormalCharacterStyle" - else "CharacterStyle/" ++ stlStr + else "CharacterStyle/" <> stlStr attrs = [("AppliedCharacterStyle", stl)] in (stlStr, attrs) @@ -557,35 +559,35 @@ imageICML opts style attr (src, _) = do report $ CouldNotDetermineImageSize src msg return def) (\e -> do - report $ CouldNotFetchResource src (show e) + report $ CouldNotFetchResource src $ tshow e return def) let (ow, oh) = sizeInPoints imgS (imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS hw = showFl $ ow / 2 hh = showFl $ oh / 2 - scale = showFl (imgWidth / ow) ++ " 0 0 " ++ showFl (imgHeight / oh) - src' = if isURI src then src else "file:" ++ src + scale = showFl (imgWidth / ow) <> " 0 0 " <> showFl (imgHeight / oh) + src' = if isURI src then src else "file:" <> src (stlStr, attrs) = styleToStrAttr style props = inTags True "Properties" [] $ inTags True "PathGeometry" [] $ inTags True "GeometryPathType" [("PathOpen","false")] $ inTags True "PathPointArray" [] $ vcat [ - selfClosingTag "PathPointType" [("Anchor", "-"++hw++" -"++hh), - ("LeftDirection", "-"++hw++" -"++hh), ("RightDirection", "-"++hw++" -"++hh)] - , selfClosingTag "PathPointType" [("Anchor", "-"++hw++" "++hh), - ("LeftDirection", "-"++hw++" "++hh), ("RightDirection", "-"++hw++" "++hh)] - , selfClosingTag "PathPointType" [("Anchor", hw++" "++hh), - ("LeftDirection", hw++" "++hh), ("RightDirection", hw++" "++hh)] - , selfClosingTag "PathPointType" [("Anchor", hw++" -"++hh), - ("LeftDirection", hw++" -"++hh), ("RightDirection", hw++" -"++hh)] + selfClosingTag "PathPointType" [("Anchor", "-"<>hw<>" -"<>hh), + ("LeftDirection", "-"<>hw<>" -"<>hh), ("RightDirection", "-"<>hw<>" -"<>hh)] + , selfClosingTag "PathPointType" [("Anchor", "-"<>hw<>" "<>hh), + ("LeftDirection", "-"<>hw<>" "<>hh), ("RightDirection", "-"<>hw<>" "<>hh)] + , selfClosingTag "PathPointType" [("Anchor", hw<>" "<>hh), + ("LeftDirection", hw<>" "<>hh), ("RightDirection", hw<>" "<>hh)] + , selfClosingTag "PathPointType" [("Anchor", hw<>" -"<>hh), + ("LeftDirection", hw<>" -"<>hh), ("RightDirection", hw<>" -"<>hh)] ] image = inTags True "Image" - [("Self","ue6"), ("ItemTransform", scale++" -"++hw++" -"++hh)] + [("Self","ue6"), ("ItemTransform", scale<>" -"<>hw<>" -"<>hh)] $ vcat [ inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')] ] doc = inTags True "CharacterStyleRange" attrs $ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"), - ("ItemTransform", scale++" "++hw++" -"++hh)] (props $$ image) + ("ItemTransform", scale<>" "<>hw<>" -"<>hh)] (props $$ image) state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs index c58afed9d..75d3d8f9b 100644 --- a/src/Text/Pandoc/Writers/Ipynb.hs +++ b/src/Text/Pandoc/Writers/Ipynb.hs @@ -19,7 +19,6 @@ where import Prelude import Control.Monad.State import qualified Data.Map as M -import Data.Char (toLower) import Data.Maybe (catMaybes, fromMaybe) import Text.Pandoc.Options import Text.Pandoc.Definition @@ -30,6 +29,7 @@ import Text.Pandoc.Class import Text.Pandoc.Logging import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Data.Aeson as Aeson import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Shared (safeRead, isURI) @@ -94,8 +94,8 @@ addAttachment :: PandocMonad m addAttachment (Image attr lab (src,tit)) | not (isURI src) = do (img, mbmt) <- fetchItem src - let mt = maybe "application/octet-stream" (T.pack) mbmt - modify $ M.insert (T.pack src) + let mt = fromMaybe "application/octet-stream" mbmt + modify $ M.insert src (MimeBundle (M.insert mt (BinaryData img) mempty)) return $ Image attr lab ("attachment:" <> src, tit) addAttachment x = return x @@ -121,7 +121,7 @@ extractCells opts (Div (_id,classes,kvs) xs : bs) , "code" `elem` classes = do let (codeContent, rest) = case xs of - (CodeBlock _ t : ys) -> (T.pack t, ys) + (CodeBlock _ t : ys) -> (t, ys) ys -> (mempty, ys) let meta = pairsToJSONMeta kvs outputs <- catMaybes <$> mapM blockToOutput rest @@ -139,7 +139,7 @@ extractCells opts (Div (_id,classes,kvs) xs : bs) case consolidateAdjacentRawBlocks xs of [RawBlock (Format f) raw] -> do let format' = - case map toLower f of + case T.toLower f of "html" -> "text/html" "revealjs" -> "text/html" "latex" -> "text/latex" @@ -148,11 +148,11 @@ extractCells opts (Div (_id,classes,kvs) xs : bs) _ -> f (Cell{ cellType = Raw - , cellSource = Source $ breakLines $ T.pack raw + , cellSource = Source $ breakLines raw , cellMetadata = if format' == "ipynb" -- means no format given then mempty else M.insert "format" - (Aeson.String $ T.pack format') mempty + (Aeson.String format') mempty , cellAttachments = Nothing } :) <$> extractCells opts bs _ -> extractCells opts bs extractCells opts (CodeBlock (_id,classes,kvs) raw : bs) @@ -164,7 +164,7 @@ extractCells opts (CodeBlock (_id,classes,kvs) raw : bs) codeExecutionCount = exeCount , codeOutputs = [] } - , cellSource = Source $ breakLines $ T.pack raw + , cellSource = Source $ breakLines raw , cellMetadata = meta , cellAttachments = Nothing } :) <$> extractCells opts bs extractCells opts (b:bs) = do @@ -177,13 +177,13 @@ extractCells opts (b:bs) = do blockToOutput :: PandocMonad m => Block -> m (Maybe (Output a)) blockToOutput (Div (_,["output","stream",sname],_) (CodeBlock _ t:_)) = return $ Just - $ Stream{ streamName = T.pack sname - , streamText = Source (breakLines $ T.pack t) } + $ Stream{ streamName = sname + , streamText = Source (breakLines t) } blockToOutput (Div (_,["output","error"],kvs) (CodeBlock _ t:_)) = return $ Just - $ Err{ errName = maybe mempty T.pack (lookup "ename" kvs) - , errValue = maybe mempty T.pack (lookup "evalue" kvs) - , errTraceback = breakLines $ T.pack t } + $ Err{ errName = fromMaybe mempty (lookup "ename" kvs) + , errValue = fromMaybe mempty (lookup "evalue" kvs) + , errTraceback = breakLines t } blockToOutput (Div (_,["output","execute_result"],kvs) bs) = do (data', metadata') <- extractData bs return $ Just @@ -207,28 +207,28 @@ extractData bs = do (img, mbmt) <- fetchItem src case mbmt of Just mt -> return - (M.insert (T.pack mt) (BinaryData img) mmap, + (M.insert mt (BinaryData img) mmap, meta <> pairsToJSONMeta kvs) Nothing -> (mmap, meta) <$ report (BlockNotRendered b) go (mmap, meta) b@(CodeBlock (_,["json"],_) code) = - case decode (UTF8.fromStringLazy code) of + case decode (UTF8.fromTextLazy $ TL.fromStrict code) of Just v -> return (M.insert "application/json" (JsonData v) mmap, meta) Nothing -> (mmap, meta) <$ report (BlockNotRendered b) go (mmap, meta) (CodeBlock ("",[],[]) code) = - return (M.insert "text/plain" (TextualData (T.pack code)) mmap, meta) + return (M.insert "text/plain" (TextualData code) mmap, meta) go (mmap, meta) (RawBlock (Format "html") raw) = - return (M.insert "text/html" (TextualData (T.pack raw)) mmap, meta) + return (M.insert "text/html" (TextualData raw) mmap, meta) go (mmap, meta) (RawBlock (Format "latex") raw) = - return (M.insert "text/latex" (TextualData (T.pack raw)) mmap, meta) + return (M.insert "text/latex" (TextualData raw) mmap, meta) go (mmap, meta) (Div _ bs') = foldM go (mmap, meta) bs' go (mmap, meta) b = (mmap, meta) <$ report (BlockNotRendered b) -pairsToJSONMeta :: [(String, String)] -> JSONMeta +pairsToJSONMeta :: [(Text, Text)] -> JSONMeta pairsToJSONMeta kvs = - M.fromList [(T.pack k, case Aeson.decode (UTF8.fromStringLazy v) of + M.fromList [(k, case Aeson.decode (UTF8.fromTextLazy $ TL.fromStrict v) of Just val -> val - Nothing -> String (T.pack v)) + Nothing -> String v) | (k,v) <- kvs , k /= "execution_count" ] diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 44ddba9a0..14df21ea8 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.JATS Copyright : Copyright (C) 2017-2019 John MacFarlane @@ -18,9 +19,8 @@ module Text.Pandoc.Writers.JATS ( writeJATS ) where import Prelude import Control.Monad.Reader import Control.Monad.State -import Data.Char (toLower) import Data.Generics (everywhere, mkT) -import Data.List (partition, isPrefixOf) +import Data.List (partition) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime) @@ -88,7 +88,7 @@ docToJATS opts (Pandoc meta blocks) = do case getField "date" metadata of Nothing -> NullVal Just (SimpleVal (x :: Doc Text)) -> - case parseDate (T.unpack $ render Nothing x) of + case parseDate (render Nothing x) of Nothing -> NullVal Just day -> let (y,m,d) = toGregorian day @@ -158,7 +158,7 @@ deflistItemToJATS opts term defs = do -- | Convert a list of lists of blocks to a list of JATS list items. listItemsToJATS :: PandocMonad m - => WriterOptions -> Maybe [String] -> [[Block]] -> JATS m (Doc Text) + => WriterOptions -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text) listItemsToJATS opts markers items = case markers of Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items @@ -166,41 +166,41 @@ listItemsToJATS opts markers items = -- | Convert a list of blocks into a JATS list item. listItemToJATS :: PandocMonad m - => WriterOptions -> Maybe String -> [Block] -> JATS m (Doc Text) + => WriterOptions -> Maybe Text -> [Block] -> JATS m (Doc Text) listItemToJATS opts mbmarker item = do contents <- wrappedBlocksToJATS (not . isParaOrList) opts (walk demoteHeaderAndRefs item) return $ inTagsIndented "list-item" $ - maybe empty (\lbl -> inTagsSimple "label" (text lbl)) mbmarker + maybe empty (\lbl -> inTagsSimple "label" (text $ T.unpack lbl)) mbmarker $$ contents -imageMimeType :: String -> [(String, String)] -> (String, String) +imageMimeType :: Text -> [(Text, Text)] -> (Text, Text) imageMimeType src kvs = - let mbMT = getMimeType src + let mbMT = getMimeType (T.unpack src) maintype = fromMaybe "image" $ lookup "mimetype" kvs `mplus` - (takeWhile (/='/') <$> mbMT) + (T.takeWhile (/='/') <$> mbMT) subtype = fromMaybe "" $ lookup "mime-subtype" kvs `mplus` - ((drop 1 . dropWhile (/='/')) <$> mbMT) + ((T.drop 1 . T.dropWhile (/='/')) <$> mbMT) in (maintype, subtype) -languageFor :: [String] -> String +languageFor :: [Text] -> Text languageFor classes = case langs of (l:_) -> escapeStringForXML l [] -> "" - where isLang l = map toLower l `elem` map (map toLower) languages + where isLang l = T.toLower l `elem` map T.toLower languages langsFrom s = if isLang s then [s] - else languagesByExtension . map toLower $ s + else languagesByExtension . T.toLower $ s langs = concatMap langsFrom classes -codeAttr :: Attr -> (String, [(String, String)]) +codeAttr :: Attr -> (Text, [(Text, Text)]) codeAttr (ident,classes,kvs) = (lang, attr) where - attr = [("id",ident) | not (null ident)] ++ - [("language",lang) | not (null lang)] ++ + attr = [("id",ident) | not (T.null ident)] ++ + [("language",lang) | not (T.null lang)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["code-type", "code-version", "executable", "language-version", "orientation", @@ -211,7 +211,7 @@ codeAttr (ident,classes,kvs) = (lang, attr) blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text) blockToJATS _ Null = return empty blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do - let idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')] + let idAttr = [("id", writerIdentifierPrefix opts <> id') | not (T.null id')] let otherAttrs = ["sec-type", "specific-use"] let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] title' <- inlinesToJATS opts ils @@ -219,21 +219,21 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do return $ inTags True "sec" attribs $ inTagsSimple "title" title' $$ contents -- Bibliography reference: -blockToJATS opts (Div ('r':'e':'f':'-':_,_,_) [Para lst]) = +blockToJATS opts (Div (T.stripPrefix "ref-" -> Just _,_,_) [Para lst]) = inlinesToJATS opts lst blockToJATS opts (Div ("refs",_,_) xs) = do contents <- blocksToJATS opts xs return $ inTagsIndented "ref-list" contents blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] return $ inTags True cls attr contents blockToJATS opts (Div (ident,_,kvs) bs) = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] @@ -245,13 +245,13 @@ blockToJATS opts (Header _ _ title) = do blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToJATS opts (Para [Image (ident,_,kvs) txt - (src,'f':'i':'g':':':tit)]) = do + (src,T.stripPrefix "fig:" -> Just tit)]) = do alt <- inlinesToJATS opts txt let (maintype, subtype) = imageMimeType src kvs let capt = if null txt then empty else inTagsSimple "caption" $ inTagsSimple "p" alt - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation", "position", "specific-use"]] let graphicattr = [("mimetype",maintype), @@ -262,11 +262,11 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt capt $$ selfClosingTag "graphic" graphicattr blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do let (maintype, subtype) = imageMimeType src kvs - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ - [("xlink:title", tit) | not (null tit)] ++ + [("xlink:title", tit) | not (T.null tit)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", "content-type", "specific-use", "xlink:actuate", "xlink:href", "xlink:role", "xlink:show", @@ -279,9 +279,9 @@ blockToJATS opts (LineBlock lns) = blockToJATS opts (BlockQuote blocks) = inTagsIndented "disp-quote" <$> blocksToJATS opts blocks blockToJATS _ (CodeBlock a str) = return $ - inTags False tag attr (flush (text (escapeStringForXML str))) + inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str))) where (lang, attr) = codeAttr a - tag = if null lang then "preformat" else "code" + tag = if T.null lang then "preformat" else "code" blockToJATS _ (BulletList []) = return empty blockToJATS opts (BulletList lst) = inTags True "list" [("list-type", "bullet")] <$> @@ -307,16 +307,16 @@ blockToJATS opts (OrderedList (start, numstyle, delimstyle) items) = do blockToJATS opts (DefinitionList lst) = inTags True "def-list" [] <$> deflistItemsToJATS opts lst blockToJATS _ b@(RawBlock f str) - | f == "jats" = return $ text str -- raw XML block + | f == "jats" = return $ text $ T.unpack str -- raw XML block | otherwise = do report $ BlockNotRendered b return empty blockToJATS _ HorizontalRule = return empty -- not semantic blockToJATS opts (Table [] aligns widths headers rows) = do - let percent w = show (truncate (100*w) :: Integer) ++ "*" + let percent w = tshow (truncate (100*w) :: Integer) <> "*" let coltags = vcat $ zipWith (\w al -> selfClosingTag "col" ([("width", percent w) | w > 0] ++ - [("align", alignmentToString al)])) widths aligns + [("align", alignmentToText al)])) widths aligns thead <- if all null headers then return empty else inTagsIndented "thead" <$> tableRowToJATS opts True headers @@ -328,8 +328,8 @@ blockToJATS opts (Table caption aligns widths headers rows) = do tbl <- blockToJATS opts (Table [] aligns widths headers rows) return $ inTags True "table-wrap" [] $ captionDoc $$ tbl -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of +alignmentToText :: Alignment -> Text +alignmentToText alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" @@ -364,7 +364,7 @@ inlinesToJATS opts lst = hcat <$> mapM (inlineToJATS opts) (fixCitations lst) x : Str (stringify ys) : fixCitations zs where needsFixing (RawInline (Format "jats") z) = - "<pub-id pub-id-type=" `isPrefixOf` z + "<pub-id pub-id-type=" `T.isPrefixOf` z needsFixing _ = False isRawInline (RawInline{}) = True isRawInline _ = False @@ -373,7 +373,7 @@ inlinesToJATS opts lst = hcat <$> mapM (inlineToJATS opts) (fixCitations lst) -- | Convert an inline element to JATS. inlineToJATS :: PandocMonad m => WriterOptions -> Inline -> JATS m (Doc Text) -inlineToJATS _ (Str str) = return $ text $ escapeStringForXML str +inlineToJATS _ (Str str) = return $ text $ T.unpack $ escapeStringForXML str inlineToJATS opts (Emph lst) = inTagsSimple "italic" <$> inlinesToJATS opts lst inlineToJATS opts (Strong lst) = @@ -393,11 +393,11 @@ inlineToJATS opts (Quoted DoubleQuote lst) = do contents <- inlinesToJATS opts lst return $ char '“' <> contents <> char '”' inlineToJATS _ (Code a str) = - return $ inTags False tag attr $ text (escapeStringForXML str) + return $ inTags False tag attr $ literal (escapeStringForXML str) where (lang, attr) = codeAttr a - tag = if null lang then "monospace" else "code" + tag = if T.null lang then "monospace" else "code" inlineToJATS _ il@(RawInline f x) - | f == "jats" = return $ text x + | f == "jats" = return $ literal x | otherwise = do report $ InlineNotRendered il return empty @@ -412,12 +412,12 @@ inlineToJATS opts (Note contents) = do let notenum = case notes of (n, _):_ -> n + 1 [] -> 1 - thenote <- inTags True "fn" [("id","fn" ++ show notenum)] + thenote <- inTags True "fn" [("id","fn" <> tshow notenum)] <$> wrappedBlocksToJATS (not . isPara) opts (walk demoteHeaderAndRefs contents) modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes } return $ inTags False "xref" [("ref-type", "fn"), - ("rid", "fn" ++ show notenum)] + ("rid", "fn" <> tshow notenum)] $ text (show notenum) inlineToJATS opts (Cite _ lst) = -- TODO revisit this after examining the jats.csl pipeline @@ -425,7 +425,7 @@ inlineToJATS opts (Cite _ lst) = inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils inlineToJATS opts (Span (ident,_,kvs) ils) = do contents <- inlinesToJATS opts ils - let attr = [("id",ident) | not (null ident)] ++ + let attr = [("id",ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs , k `elem` ["content-type", "rationale", @@ -447,7 +447,7 @@ inlineToJATS _ (Math t str) = do InlineMath -> "inline-formula" let rawtex = inTagsSimple "tex-math" $ text "<![CDATA[" <> - text str <> + literal str <> text "]]>" return $ inTagsSimple tagtype $ case res of @@ -455,11 +455,11 @@ inlineToJATS _ (Math t str) = do cr <> rawtex $$ text (Xml.ppcElement conf $ fixNS r) Left _ -> rawtex -inlineToJATS _ (Link _attr [Str t] ('m':'a':'i':'l':'t':'o':':':email, _)) +inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _)) | escapeURI t == email = - return $ inTagsSimple "email" $ text (escapeStringForXML email) -inlineToJATS opts (Link (ident,_,kvs) txt ('#':src, _)) = do - let attr = [("id", ident) | not (null ident)] ++ + return $ inTagsSimple "email" $ literal (escapeStringForXML email) +inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do + let attr = [("id", ident) | not (T.null ident)] ++ [("alt", stringify txt) | not (null txt)] ++ [("rid", src)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] @@ -469,10 +469,10 @@ inlineToJATS opts (Link (ident,_,kvs) txt ('#':src, _)) = do contents <- inlinesToJATS opts txt return $ inTags False "xref" attr contents inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [("ext-link-type", "uri"), ("xlink:href", src)] ++ - [("xlink:title", tit) | not (null tit)] ++ + [("xlink:title", tit) | not (T.null tit)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["assigning-authority", "specific-use", "xlink:actuate", "xlink:role", "xlink:show", @@ -480,18 +480,18 @@ inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do contents <- inlinesToJATS opts txt return $ inTags False "ext-link" attr contents inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do - let mbMT = getMimeType src + let mbMT = getMimeType (T.unpack src) let maintype = fromMaybe "image" $ lookup "mimetype" kvs `mplus` - (takeWhile (/='/') <$> mbMT) + (T.takeWhile (/='/') <$> mbMT) let subtype = fromMaybe "" $ lookup "mime-subtype" kvs `mplus` - ((drop 1 . dropWhile (/='/')) <$> mbMT) - let attr = [("id", ident) | not (null ident)] ++ + ((T.drop 1 . T.dropWhile (/='/')) <$> mbMT) + let attr = [("id", ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ - [("xlink:title", tit) | not (null tit)] ++ + [("xlink:title", tit) | not (T.null tit)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", "content-type", "specific-use", "xlink:actuate", "xlink:href", "xlink:role", "xlink:show", @@ -517,8 +517,8 @@ demoteHeaderAndRefs (Div ("refs",cls,kvs) bs) = Div ("",cls,kvs) bs demoteHeaderAndRefs x = x -parseDate :: String -> Maybe Day -parseDate s = msum (map (\fs -> parsetimeWith fs s) formats) :: Maybe Day +parseDate :: Text -> Maybe Day +parseDate s = msum (map (\fs -> parsetimeWith fs $ T.unpack s) formats) :: Maybe Day where parsetimeWith = parseTimeM True defaultTimeLocale formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", "%e %B %Y", "%b. %e, %Y", "%B %e, %Y", diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index b610dd8bf..d26dae4c7 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -17,7 +17,6 @@ JIRA: module Text.Pandoc.Writers.Jira ( writeJira ) where import Prelude import Control.Monad.State.Strict -import Data.Char (toLower) import Data.Foldable (find) import Data.Text (Text, pack) import Text.Pandoc.Class (PandocMonad, report) @@ -97,7 +96,7 @@ anchor :: Attr -> Text anchor (ident,_,_) = if ident == "" then "" - else "{anchor:" <> pack ident <> "}" + else "{anchor:" <> ident <> "}" -- | Append a newline character unless we are in a list. appendNewlineUnlessInList :: PandocMonad m @@ -130,7 +129,7 @@ blockToJira opts (LineBlock lns) = blockToJira _ b@(RawBlock f str) = if f == Format "jira" - then return (pack str) + then return str else "" <$ report (BlockNotRendered b) blockToJira _ HorizontalRule = return "----\n" @@ -141,14 +140,14 @@ blockToJira opts (Header level attr inlines) = do return $ prefix <> anchor attr <> contents <> "\n" blockToJira _ (CodeBlock attr@(_,classes,_) str) = do - let lang = find (\c -> map toLower c `elem` knownLanguages) classes + let lang = find (\c -> T.toLower c `elem` knownLanguages) classes let start = case lang of Nothing -> "{code}" - Just l -> "{code:" <> pack l <> "}" + Just l -> "{code:" <> l <> "}" let anchorMacro = anchor attr appendNewlineUnlessInList . T.intercalate "\n" $ (if anchorMacro == "" then id else (anchorMacro :)) - [start, pack str, "{code}"] + [start, str, "{code}"] blockToJira opts (BlockQuote [p@(Para _)]) = do contents <- blockToJira opts p @@ -274,9 +273,9 @@ inlineToJira opts (Quoted DoubleQuote lst) = do inlineToJira opts (Cite _ lst) = inlineListToJira opts lst inlineToJira _ (Code attr str) = - return (anchor attr <> "{{" <> pack str <> "}}") + return (anchor attr <> "{{" <> str <> "}}") -inlineToJira _ (Str str) = return $ escapeStringForJira (pack str) +inlineToJira _ (Str str) = return $ escapeStringForJira str inlineToJira opts (Math InlineMath str) = lift (texMathToInlines InlineMath str) >>= inlineListToJira opts @@ -288,7 +287,7 @@ inlineToJira opts (Math DisplayMath str) = do inlineToJira _opts il@(RawInline f str) = if f == Format "jira" - then return (pack str) + then return str else "" <$ report (InlineNotRendered il) inlineToJira _ LineBreak = return "\n" @@ -302,12 +301,12 @@ inlineToJira opts (Link _attr txt (src, _title)) = do return $ T.concat [ "[" , if null txt then "" else linkText <> "|" - , pack src + , src , "]" ] inlineToJira _opts (Image attr _alt (src, _title)) = - return . T.concat $ [anchor attr, "!", pack src, "!"] + return . T.concat $ [anchor attr, "!", src, "!"] inlineToJira opts (Note contents) = do curNotes <- gets stNotes @@ -318,7 +317,7 @@ inlineToJira opts (Note contents) = do return $ "[" <> pack (show newnum) <> "]" -- | Language codes recognized by jira -knownLanguages :: [String] +knownLanguages :: [Text] knownLanguages = [ "actionscript", "ada", "applescript", "bash", "c", "c#", "c++" , "css", "erlang", "go", "groovy", "haskell", "html", "javascript" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f56b3a657..8b46edfef 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -2,6 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.LaTeX Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -22,9 +23,8 @@ import Control.Applicative ((<|>)) import Control.Monad.State.Strict import Data.Monoid (Any(..)) import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, - isPunctuation, ord, toLower) -import Data.List (foldl', intercalate, intersperse, nubBy, - stripPrefix, (\\), uncons) + isPunctuation, ord) +import Data.List (foldl', intersperse, nubBy, (\\), uncons) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) import qualified Data.Map as M import Data.Text (Text) @@ -70,7 +70,7 @@ data WriterState = , stCsquotes :: Bool -- true if document uses csquotes , stHighlighting :: Bool -- true if document has highlighted code , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit - , stInternalLinks :: [String] -- list of internal link targets + , stInternalLinks :: [Text] -- list of internal link targets , stBeamer :: Bool -- produce beamer , stEmptyLine :: Bool -- true if no content on line , stHasCslRefs :: Bool -- has a Div with class refs @@ -132,8 +132,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do _ -> blocks else blocks -- see if there are internal links - let isInternalLink (Link _ _ ('#':xs,_)) = [xs] - isInternalLink _ = [] + let isInternalLink (Link _ _ (s,_)) + | Just ('#', xs) <- T.uncons s = [xs] + isInternalLink _ = [] modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options @@ -149,7 +150,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do let documentClass = case (lookupContext "documentclass" (writerVariables options)) `mplus` - (T.pack . stringify <$> lookupMeta "documentclass" meta) of + (stringify <$> lookupMeta "documentclass" meta) of Just x -> x Nothing | beamer -> "beamer" | otherwise -> case writerTopLevelDivision options of @@ -188,8 +189,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do ] let toPolyObj :: Lang -> Val Text toPolyObj lang = MapVal $ Context $ - M.fromList [ ("name" , SimpleVal $ text name) - , ("options" , SimpleVal $ text opts) ] + M.fromList [ ("name" , SimpleVal $ literal name) + , ("options" , SimpleVal $ literal opts) ] where (name, opts) = toPolyglossia lang mblang <- toLang $ case getLang options meta of @@ -201,15 +202,15 @@ pandocToLaTeX options (Pandoc meta blocks) = do let dirs = query (extract "dir") blocks let context = defField "toc" (writerTableOfContents options) $ - defField "toc-depth" (T.pack . show $ + defField "toc-depth" (tshow $ (writerTOCDepth options - if stHasChapters st then 1 else 0)) $ defField "body" main $ - defField "title-meta" (T.pack titleMeta) $ + defField "title-meta" titleMeta $ defField "author-meta" - (T.pack $ intercalate "; " authorsMeta) $ + (T.intercalate "; " authorsMeta) $ defField "documentclass" documentClass $ defField "verbatim-in-note" (stVerbInNote st) $ defField "tables" (stTable st) $ @@ -245,42 +246,42 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "csl-refs" (stHasCslRefs st) $ defField "csl-hanging-indent" (stCslHangingIndent st) $ defField "geometry" geometryFromMargins $ - (case T.unpack . render Nothing <$> + (case T.uncons . render Nothing <$> getField "papersize" metadata of - -- uppercase a4, a5, etc. - Just (('A':d:ds) :: String) - | all isDigit (d:ds) -> resetField "papersize" - (T.pack ('a':d:ds)) - _ -> id) + -- uppercase a4, a5, etc. + Just (Just ('A', ds)) + | not (T.null ds) && T.all isDigit ds + -> resetField "papersize" ("a" <> ds) + _ -> id) metadata let context' = -- note: lang is used in some conditionals in the template, -- so we need to set it if we have any babel/polyglossia: maybe id (\l -> defField "lang" - ((text $ renderLang l) :: Doc Text)) mblang + (literal $ renderLang l)) mblang $ maybe id (\l -> defField "babel-lang" - ((text $ toBabel l) :: Doc Text)) mblang + (literal $ toBabel l)) mblang $ defField "babel-otherlangs" - (map ((text . toBabel) :: Lang -> Doc Text) docLangs) + (map (literal . toBabel) docLangs) $ defField "babel-newcommands" (vcat $ - map (\(poly, babel) -> (text :: String -> Doc Text) $ + map (\(poly, babel) -> literal $ -- \textspanish and \textgalician are already used by babel -- save them as \oritext... and let babel use that if poly `elem` ["spanish", "galician"] - then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++ - "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++ - "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext" - ++ poly ++ "}}\n" ++ - "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++ - "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" - ++ poly ++ "}{##2}}}" + then "\\let\\oritext" <> poly <> "\\text" <> poly <> "\n" <> + "\\AddBabelHook{" <> poly <> "}{beforeextras}" <> + "{\\renewcommand{\\text" <> poly <> "}{\\oritext" + <> poly <> "}}\n" <> + "\\AddBabelHook{" <> poly <> "}{afterextras}" <> + "{\\renewcommand{\\text" <> poly <> "}[2][]{\\foreignlanguage{" + <> poly <> "}{##2}}}" else (if poly == "latin" -- see #4161 then "\\providecommand{\\textlatin}{}\n\\renewcommand" - else "\\newcommand") ++ "{\\text" ++ poly ++ - "}[2][]{\\foreignlanguage{" ++ babel ++ "}{#2}}\n" ++ - "\\newenvironment{" ++ poly ++ - "}[2][]{\\begin{otherlanguage}{" ++ - babel ++ "}}{\\end{otherlanguage}}" + else "\\newcommand") <> "{\\text" <> poly <> + "}[2][]{\\foreignlanguage{" <> babel <> "}{#2}}\n" <> + "\\newenvironment{" <> poly <> + "}[2][]{\\begin{otherlanguage}{" <> + babel <> "}}{\\end{otherlanguage}}" ) -- eliminate duplicates that have same polyglossia name $ nubBy (\a b -> fst a == fst b) @@ -305,15 +306,16 @@ data StringContext = TextString deriving (Eq) -- escape things as needed for LaTeX -stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String +stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text stringToLaTeX context zs = do opts <- gets stOptions - return $ - foldr (go opts context) mempty $ + return $ T.pack $ + foldr (go opts context) mempty $ T.unpack $ if writerPreferAscii opts - then T.unpack $ Normalize.normalize Normalize.NFD $ T.pack zs + then Normalize.normalize Normalize.NFD zs else zs where + go :: WriterOptions -> StringContext -> Char -> String -> String go opts ctx x xs = let ligatures = isEnabled Ext_smart opts && ctx == TextString isUrl = ctx == URLString @@ -324,12 +326,12 @@ stringToLaTeX context zs = do emits s = case mbAccentCmd of Just cmd -> - cmd ++ "{" ++ s ++ "}" ++ drop 1 xs -- drop combining accent - Nothing -> s ++ xs + cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent + Nothing -> s <> xs emitc c = case mbAccentCmd of Just cmd -> - cmd ++ "{" ++ [c] ++ "}" ++ drop 1 xs -- drop combining accent + cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent Nothing -> c : xs emitcseq cs = do case xs of @@ -434,17 +436,17 @@ accents = M.fromList , ('\8413', "\\textcircled") ] -toLabel :: PandocMonad m => String -> LW m String +toLabel :: PandocMonad m => Text -> LW m Text toLabel z = go `fmap` stringToLaTeX URLString z - where go [] = "" - go (x:xs) - | (isLetter x || isDigit x) && isAscii x = x:go xs - | x `elem` ("_-+=:;." :: String) = x:go xs - | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs + where + go = T.concatMap $ \x -> case x of + _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x + | x `elemText` "_-+=:;." -> T.singleton x + | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) -- | Puts contents into LaTeX command. -inCmd :: String -> Doc Text -> Doc Text -inCmd cmd contents = char '\\' <> text cmd <> braces contents +inCmd :: Text -> Doc Text -> Doc Text +inCmd cmd contents = char '\\' <> literal cmd <> braces contents toSlides :: PandocMonad m => [Block] -> LW m [Block] toSlides bs = do @@ -475,10 +477,10 @@ blockToLaTeX :: PandocMonad m blockToLaTeX Null = return empty blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do ref <- toLabel identifier - let anchor = if null identifier + let anchor = if T.null identifier then empty else cr <> "\\protect\\hypertarget" <> - braces (text ref) <> braces empty + braces (literal ref) <> braces empty title' <- inlineListToLaTeX ils contents <- blockListToLaTeX bs wrapDiv attr $ ("\\begin{block}" <> braces title' <> anchor) $$ @@ -502,23 +504,23 @@ blockToLaTeX (Div (identifier,"slide":dclasses,dkvs) , isNothing (lookup "fragile" kvs) , "fragile" `notElem` classes] ++ [k | k <- classes, k `elem` frameoptions] ++ - [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions] + [k <> "=" <> v | (k,v) <- kvs, k `elem` frameoptions] let options = if null optionslist then empty - else brackets (text (intercalate "," optionslist)) + else brackets (literal (T.intercalate "," optionslist)) slideTitle <- if ils == [Str "\0"] -- marker for hrule then return empty else braces <$> inlineListToLaTeX ils ref <- toLabel identifier - let slideAnchor = if null identifier + let slideAnchor = if T.null identifier then empty else cr <> "\\protect\\hypertarget" <> - braces (text ref) <> braces empty + braces (literal ref) <> braces empty contents <- blockListToLaTeX bs >>= wrapDiv (identifier,classes,kvs) return $ ("\\begin{frame}" <> options <> slideTitle <> slideAnchor) $$ contents $$ "\\end{frame}" -blockToLaTeX (Div (identifier@(_:_),dclasses,dkvs) +blockToLaTeX (Div (identifier@(T.uncons -> Just (_,_)),dclasses,dkvs) (Header lvl ("",hclasses,hkvs) ils : bs)) = do -- move identifier from div to header blockToLaTeX (Div ("",dclasses,dkvs) @@ -557,21 +559,23 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do blockToLaTeX (Plain lst) = inlineListToLaTeX lst -- title beginning with fig: indicates that the image is a figure -blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do - (capt, captForLof, footnotes) <- getCaption True txt - lab <- labelFor ident - let caption = "\\caption" <> captForLof <> braces capt <> lab - img <- inlineToLaTeX (Image attr txt (src,tit)) - innards <- hypertarget True ident $ - "\\centering" $$ img $$ caption <> cr - let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}" - st <- get - return $ (if stInMinipage st - -- can't have figures in notes or minipage (here, table cell) - -- http://www.tex.ac.uk/FAQ-ouparmd.html - then cr <> "\\begin{center}" $$ img $+$ capt $$ - "\\end{center}" - else figure) $$ footnotes +blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = do + (capt, captForLof, footnotes) <- getCaption True txt + lab <- labelFor ident + let caption = "\\caption" <> captForLof <> braces capt <> lab + img <- inlineToLaTeX (Image attr txt (src,tit)) + innards <- hypertarget True ident $ + "\\centering" $$ img $$ caption <> cr + let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}" + st <- get + return $ (if stInMinipage st + -- can't have figures in notes or minipage (here, table cell) + -- http://www.tex.ac.uk/FAQ-ouparmd.html + then cr <> "\\begin{center}" $$ img $+$ capt $$ + "\\end{center}" + else figure) $$ footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- gets stBeamer @@ -606,7 +610,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do else linkAnchor' <> "%" let lhsCodeBlock = do modify $ \s -> s{ stLHS = True } - return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ + return $ flush (linkAnchor $$ "\\begin{code}" $$ literal str $$ "\\end{code}") $$ cr let rawCodeBlock = do st <- get @@ -614,41 +618,41 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do then modify (\s -> s{ stVerbInNote = True }) >> return "Verbatim" else return "verbatim" - return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ - text str $$ text ("\\end{" ++ env ++ "}")) <> cr + return $ flush (linkAnchor $$ literal ("\\begin{" <> env <> "}") $$ + literal str $$ literal ("\\end{" <> env <> "}")) <> cr let listingsCodeBlock = do st <- get ref <- toLabel identifier let params = if writerListings (stOptions st) then (case getListingsLanguage classes of - Just l -> [ "language=" ++ mbBraced l ] + Just l -> [ "language=" <> mbBraced l ] Nothing -> []) ++ [ "numbers=left" | "numberLines" `elem` classes || "number" `elem` classes || "number-lines" `elem` classes ] ++ [ (if key == "startFrom" then "firstnumber" - else key) ++ "=" ++ mbBraced attr | + else key) <> "=" <> mbBraced attr | (key,attr) <- keyvalAttr, key `notElem` ["exports", "tangle", "results"] -- see #4889 ] ++ (if identifier == "" then [] - else [ "label=" ++ ref ]) + else [ "label=" <> ref ]) else [] printParams | null params = empty | otherwise = brackets $ hcat (intersperse ", " - (map text params)) - return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ + (map literal params)) + return $ flush ("\\begin{lstlisting}" <> printParams $$ literal str $$ "\\end{lstlisting}") $$ cr let highlightedCodeBlock = case highlight (writerSyntaxMap opts) formatLaTeXBlock ("",classes,keyvalAttr) str of Left msg -> do - unless (null msg) $ + unless (T.null msg) $ report $ CouldNotHighlight msg rawCodeBlock Right h -> do @@ -667,7 +671,7 @@ blockToLaTeX b@(RawBlock f x) = do beamer <- gets stBeamer if (f == Format "latex" || f == Format "tex" || (f == Format "beamer" && beamer)) - then return $ text x + then return $ literal x else do report $ BlockNotRendered b return empty @@ -680,7 +684,7 @@ blockToLaTeX (BulletList lst) = do let spacing = if isTightList lst then text "\\tightlist" else empty - return $ text ("\\begin{itemize}" ++ inc) $$ spacing $$ vcat items $$ + return $ text ("\\begin{itemize}" <> inc) $$ spacing $$ vcat items $$ "\\end{itemize}" blockToLaTeX (OrderedList _ []) = return empty -- otherwise latex error blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do @@ -712,7 +716,7 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do LowerAlpha -> "a" Example -> "1" DefaultStyle -> "1" - let enum = text $ "enum" ++ map toLower (toRomanNumeral oldlevel) + let enum = literal $ "enum" <> T.toLower (toRomanNumeral oldlevel) let stylecommand | numstyle == DefaultStyle && numdelim == DefaultDelim = empty | beamer && numstyle == Decimal && numdelim == Period = empty @@ -726,7 +730,7 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do let spacing = if isTightList lst then text "\\tightlist" else empty - return $ text ("\\begin{enumerate}" ++ inc) + return $ text ("\\begin{enumerate}" <> inc) $$ stylecommand $$ resetcounter $$ spacing @@ -741,7 +745,7 @@ blockToLaTeX (DefinitionList lst) = do let spacing = if all isTightList (map snd lst) then text "\\tightlist" else empty - return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ + return $ text ("\\begin{description}" <> inc) $$ spacing $$ vcat items $$ "\\end{description}" blockToLaTeX HorizontalRule = return @@ -771,7 +775,7 @@ blockToLaTeX (Table caption aligns widths heads rows) = do else "\\caption" <> captForLof <> braces captionText <> "\\tabularnewline" rows' <- mapM (tableRowToLaTeX False aligns widths) rows - let colDescriptors = text $ concatMap toColDescriptor aligns + let colDescriptors = literal $ T.concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } notes <- notesToLaTeX <$> gets stNotes return $ "\\begin{longtable}[]" <> @@ -806,7 +810,7 @@ getCaption externalNotes txt = do else return empty return (capt, captForLof, footnotes) -toColDescriptor :: Alignment -> String +toColDescriptor :: Alignment -> Text toColDescriptor align = case align of AlignLeft -> "l" @@ -853,9 +857,9 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of [] -> [] [xs] -> xs chunks -> RawInline "tex" "\\vtop{" : - concatMap tohbox chunks ++ + concatMap tohbox chunks <> [RawInline "tex" "}"] - where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ + where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <> [RawInline "tex" "}"] -- We also change display math to inline math, since display @@ -933,8 +937,9 @@ defListItemToLaTeX (term, defs) = do modify $ \s -> s{stInItem = False} -- put braces around term if it contains an internal link, -- since otherwise we get bad bracket interactions: \item[\hyperref[..] - let isInternalLink (Link _ _ ('#':_,_)) = True - isInternalLink _ = False + let isInternalLink (Link _ _ (src,_)) + | Just ('#', _) <- T.uncons src = True + isInternalLink _ = False let term'' = if any isInternalLink term then braces term' else term' @@ -949,8 +954,8 @@ defListItemToLaTeX (term, defs) = do -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: PandocMonad m - => [String] -- classes - -> [Char] + => [Text] -- classes + -> Text -> Int -> [Inline] -> LW m (Doc Text) @@ -958,9 +963,9 @@ sectionHeader classes ident level lst = do let unnumbered = "unnumbered" `elem` classes let unlisted = "unlisted" `elem` classes txt <- inlineListToLaTeX lst - plain <- stringToLaTeX TextString $ concatMap stringify lst + plain <- stringToLaTeX TextString $ T.concat $ map stringify lst let removeInvalidInline (Note _) = [] - removeInvalidInline (Span (id', _, _) _) | not (null id') = [] + removeInvalidInline (Span (id', _, _) _) | not (T.null id') = [] removeInvalidInline Image{} = [] removeInvalidInline x = [x] let lstNoNotes = foldr (mappend . (\x -> walkM removeInvalidInline x)) mempty lst @@ -972,11 +977,11 @@ sectionHeader classes ident level lst = do then return empty else return $ brackets txtNoNotes - let contents = if render Nothing txt == T.pack plain + let contents = if render Nothing txt == plain then braces txt else braces (text "\\texorpdfstring" <> braces txt - <> braces (text plain)) + <> braces (literal plain)) book <- gets stHasChapters opts <- gets stOptions let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault @@ -1036,45 +1041,45 @@ wrapDiv (_,classes,kvs) t = do then \contents -> let w = maybe "0.48" fromPct (lookup "width" kvs) in inCmd "begin" "column" <> - braces (text w <> "\\textwidth") + braces (literal w <> "\\textwidth") $$ contents $$ inCmd "end" "column" else id fromPct xs = - case reverse xs of - '%':ds -> case safeRead (reverse ds) of - Just digits -> showFl (digits / 100 :: Double) - Nothing -> xs - _ -> xs + case T.unsnoc xs of + Just (ds, '%') -> case safeRead ds of + Just digits -> showFl (digits / 100 :: Double) + Nothing -> xs + _ -> xs wrapDir = case lookup "dir" kvs of Just "rtl" -> align "RTL" Just "ltr" -> align "LTR" _ -> id wrapLang txt = case lang of Just lng -> let (l, o) = toPolyglossiaEnv lng - ops = if null o + ops = if T.null o then "" - else brackets $ text o - in inCmd "begin" (text l) <> ops + else brackets $ literal o + in inCmd "begin" (literal l) <> ops $$ blankline <> txt <> blankline - $$ inCmd "end" (text l) + $$ inCmd "end" (literal l) Nothing -> txt return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t -hypertarget :: PandocMonad m => Bool -> String -> Doc Text -> LW m (Doc Text) +hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text) hypertarget _ "" x = return x hypertarget addnewline ident x = do - ref <- text `fmap` toLabel ident + ref <- literal `fmap` toLabel ident return $ text "\\hypertarget" <> braces ref <> braces ((if addnewline && not (isEmpty x) then ("%" <> cr) else empty) <> x) -labelFor :: PandocMonad m => String -> LW m (Doc Text) +labelFor :: PandocMonad m => Text -> LW m (Doc Text) labelFor "" = return empty labelFor ident = do - ref <- text `fmap` toLabel ident + ref <- literal `fmap` toLabel ident return $ text "\\label" <> braces ref -- | Convert list of inline elements to LaTeX. @@ -1088,11 +1093,12 @@ inlineListToLaTeX lst = -- so we turn nbsps after hard breaks to \hspace commands. -- this is mostly used in verse. where fixLineInitialSpaces [] = [] - fixLineInitialSpaces (LineBreak : Str s@('\160':_) : xs) = - LineBreak : fixNbsps s ++ fixLineInitialSpaces xs + fixLineInitialSpaces (LineBreak : Str s : xs) + | Just ('\160', _) <- T.uncons s + = LineBreak : fixNbsps s <> fixLineInitialSpaces xs fixLineInitialSpaces (x:xs) = x : fixLineInitialSpaces xs - fixNbsps s = let (ys,zs) = span (=='\160') s - in replicate (length ys) hspace ++ [Str zs] + fixNbsps s = let (ys,zs) = T.span (=='\160') s + in replicate (T.length ys) hspace <> [Str zs] hspace = RawInline "latex" "\\hspace*{0.333em}" -- We need \hfill\break for a line break at the start -- of a paragraph. See #5591. @@ -1119,11 +1125,11 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do ["LR" | ("dir", "ltr") `elem` kvs] ++ (case lang of Just lng -> let (l, o) = toPolyglossia lng - ops = if null o then "" else ("[" ++ o ++ "]") - in ["text" ++ l ++ ops] + ops = if T.null o then "" else ("[" <> o <> "]") + in ["text" <> l <> ops] Nothing -> []) contents <- inlineListToLaTeX ils - return $ (if null id' + return $ (if T.null id' then empty else "\\protect" <> linkAnchor) <> (if null cmds @@ -1167,13 +1173,13 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do , k `notElem` ["exports","tangle","results"]] let listingsopt = if null listingsopts then "" - else "[" ++ - intercalate ", " - (map (\(k,v) -> k ++ "=" ++ v) - listingsopts) ++ "]" + else "[" <> + T.intercalate ", " + (map (\(k,v) -> k <> "=" <> v) + listingsopts) <> "]" inNote <- gets stInNote when inNote $ modify $ \s -> s{ stVerbInNote = True } - let chr = case "!\"'()*,-./:;?@" \\ str of + let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of (c:_) -> c [] -> '!' let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#") str @@ -1181,16 +1187,17 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do -- (defined in the default template) so that we don't have -- to change the way we escape characters depending on whether -- the lstinline is inside another command. See #1629: - return $ text $ "\\passthrough{\\lstinline" ++ listingsopt ++ [chr] ++ str' ++ [chr] ++ "}" - let rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) + return $ literal $ "\\passthrough{\\lstinline" <> + listingsopt <> T.singleton chr <> str' <> T.singleton chr <> "}" + let rawCode = liftM (literal . (\s -> "\\texttt{" <> escapeSpaces s <> "}")) $ stringToLaTeX CodeString str - where escapeSpaces = concatMap - (\c -> if c == ' ' then "\\ " else [c]) + where escapeSpaces = T.concatMap + (\c -> if c == ' ' then "\\ " else T.singleton c) let highlightCode = case highlight (writerSyntaxMap opts) formatLaTeXInline ("",classes,[]) str of Left msg -> do - unless (null msg) $ report $ CouldNotHighlight msg + unless (T.null msg) $ report $ CouldNotHighlight msg rawCode Right h -> modify (\st -> st{ stHighlighting = True }) >> return (text (T.unpack h)) @@ -1225,20 +1232,20 @@ inlineToLaTeX (Quoted qt lst) = do else char '\x2018' <> inner <> char '\x2019' inlineToLaTeX (Str str) = do setEmptyLine False - liftM text $ stringToLaTeX TextString str + liftM literal $ stringToLaTeX TextString str inlineToLaTeX (Math InlineMath str) = do setEmptyLine False - return $ "\\(" <> text (handleMathComment str) <> "\\)" + return $ "\\(" <> literal (handleMathComment str) <> "\\)" inlineToLaTeX (Math DisplayMath str) = do setEmptyLine False - return $ "\\[" <> text (handleMathComment str) <> "\\]" + return $ "\\[" <> literal (handleMathComment str) <> "\\]" inlineToLaTeX il@(RawInline f str) = do beamer <- gets stBeamer if (f == Format "latex" || f == Format "tex" || (f == Format "beamer" && beamer)) then do setEmptyLine False - return $ text str + return $ literal str else do report $ InlineNotRendered il return empty @@ -1253,30 +1260,33 @@ inlineToLaTeX SoftBreak = do WrapNone -> return space WrapPreserve -> return cr inlineToLaTeX Space = return space -inlineToLaTeX (Link _ txt ('#':ident, _)) = do - contents <- inlineListToLaTeX txt - lab <- toLabel ident - return $ text "\\protect\\hyperlink" <> braces (text lab) <> braces contents -inlineToLaTeX (Link _ txt (src, _)) = +inlineToLaTeX (Link _ txt (src,_)) + | Just ('#', ident) <- T.uncons src + = do + contents <- inlineListToLaTeX txt + lab <- toLabel ident + return $ text "\\protect\\hyperlink" <> braces (literal lab) <> braces contents + | otherwise = case txt of - [Str x] | unEscapeString x == unEscapeString src -> -- autolink + [Str x] | unEscapeString (T.unpack x) == unEscapeString (T.unpack src) -> -- autolink do modify $ \s -> s{ stUrl = True } src' <- stringToLaTeX URLString (escapeURI src) - return $ text $ "\\url{" ++ src' ++ "}" - [Str x] | Just rest <- stripPrefix "mailto:" src, - unEscapeString x == unEscapeString rest -> -- email autolink + return $ literal $ "\\url{" <> src' <> "}" + [Str x] | Just rest <- T.stripPrefix "mailto:" src, + unEscapeString (T.unpack x) == unEscapeString (T.unpack rest) -> -- email autolink do modify $ \s -> s{ stUrl = True } src' <- stringToLaTeX URLString (escapeURI src) contents <- inlineListToLaTeX txt - return $ "\\href" <> braces (text src') <> + return $ "\\href" <> braces (literal src') <> braces ("\\nolinkurl" <> braces contents) _ -> do contents <- inlineListToLaTeX txt src' <- stringToLaTeX URLString (escapeURI src) - return $ text ("\\href{" ++ src' ++ "}{") <> + return $ literal ("\\href{" <> src' <> "}{") <> contents <> char '}' -inlineToLaTeX il@(Image _ _ ('d':'a':'t':'a':':':_, _)) = do - report $ InlineNotRendered il - return empty +inlineToLaTeX il@(Image _ _ (src, _)) + | Just _ <- T.stripPrefix "data:" src = do + report $ InlineNotRendered il + return empty inlineToLaTeX (Image attr _ (source, _)) = do setEmptyLine False modify $ \s -> s{ stGraphics = True } @@ -1284,9 +1294,9 @@ inlineToLaTeX (Image attr _ (source, _)) = do let showDim dir = let d = text (show dir) <> "=" in case dimension dir attr of Just (Pixel a) -> - [d <> text (showInInch opts (Pixel a)) <> "in"] + [d <> literal (showInInch opts (Pixel a)) <> "in"] Just (Percent a) -> - [d <> text (showFl (a / 100)) <> + [d <> literal (showFl (a / 100)) <> case dir of Width -> "\\textwidth" Height -> "\\textheight" @@ -1300,18 +1310,18 @@ inlineToLaTeX (Image attr _ (source, _)) = do Height | isJust (dimension Width attr) -> [d <> "\\textheight"] _ -> [] - dimList = showDim Width ++ showDim Height + dimList = showDim Width <> showDim Height dims = if null dimList then empty else brackets $ mconcat (intersperse "," dimList) source' = if isURI source then source - else unEscapeString source + else T.pack $ unEscapeString $ T.unpack source source'' <- stringToLaTeX URLString source' inHeading <- gets stInHeading return $ (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <> - dims <> braces (text source'') + dims <> braces (literal source'') inlineToLaTeX (Note contents) = do setEmptyLine False externalNotes <- gets stExternalNotes @@ -1336,13 +1346,14 @@ inlineToLaTeX (Note contents) = do -- A comment at the end of math needs to be followed by a newline, -- or the closing delimiter gets swallowed. -handleMathComment :: String -> String +handleMathComment :: Text -> Text handleMathComment s = - let (_, ys) = break (\c -> c == '\n' || c == '%') $ reverse s - in case ys of - '%':'\\':_ -> s - '%':_ -> s ++ "\n" - _ -> s + let (_, ys) = T.break (\c -> c == '\n' || c == '%') $ T.reverse s -- no T.breakEnd + in case T.uncons ys of + Just ('%', ys') -> case T.uncons ys' of + Just ('\\', _) -> s + _ -> s <> "\n" + _ -> s protectCode :: Inline -> [Inline] protectCode x@(Code _ _) = [ltx "\\mbox{" , x , ltx "}"] @@ -1379,7 +1390,7 @@ citationsToNatbib cits head cits s = citationSuffix $ last cits - ks = intercalate ", " $ map citationId cits + ks = T.intercalate ", " $ map citationId cits citationsToNatbib (c:cs) | citationMode c == AuthorInText = do author <- citeCommand "citeauthor" [] [] (citationId c) @@ -1403,31 +1414,34 @@ citationsToNatbib cits = do NormalCitation -> citeCommand "citealp" p s k citeCommand :: PandocMonad m - => String -> [Inline] -> [Inline] -> String -> LW m (Doc Text) + => Text -> [Inline] -> [Inline] -> Text -> LW m (Doc Text) citeCommand c p s k = do args <- citeArguments p s k - return $ text ("\\" ++ c) <> args + return $ literal ("\\" <> c) <> args citeArguments :: PandocMonad m - => [Inline] -> [Inline] -> String -> LW m (Doc Text) + => [Inline] -> [Inline] -> Text -> LW m (Doc Text) citeArguments p s k = do let s' = stripLocatorBraces $ case s of - (Str - [x] : r) | isPunctuation x -> dropWhile (== Space) r - (Str (x:xs) : r) | isPunctuation x -> Str xs : r - _ -> s + (Str t : r) -> case T.uncons t of + Just (x, xs) + | T.null xs + , isPunctuation x -> dropWhile (== Space) r + | isPunctuation x -> Str xs : r + _ -> s + _ -> s pdoc <- inlineListToLaTeX p sdoc <- inlineListToLaTeX s' let optargs = case (isEmpty pdoc, isEmpty sdoc) of (True, True ) -> empty (True, False) -> brackets sdoc (_ , _ ) -> brackets pdoc <> brackets sdoc - return $ optargs <> braces (text k) + return $ optargs <> braces (literal k) -- strip off {} used to define locator in pandoc-citeproc; see #5722 stripLocatorBraces :: [Inline] -> [Inline] stripLocatorBraces = walk go - where go (Str xs) = Str $ filter (\c -> c /= '{' && c /= '}') xs + where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs go x = x citationsToBiblatex :: PandocMonad m => [Citation] -> LW m (Doc Text) @@ -1453,7 +1467,7 @@ citationsToBiblatex (c:cs) AuthorInText -> "\\textcite" NormalCitation -> "\\autocite" return $ text cmd <> - braces (text (intercalate "," (map citationId (c:cs)))) + braces (literal (T.intercalate "," (map citationId (c:cs)))) | otherwise = do let cmd = case citationMode c of SuppressAuthor -> "\\autocites*" @@ -1470,17 +1484,17 @@ citationsToBiblatex (c:cs) citationsToBiblatex _ = return empty -- Determine listings language from list of class attributes. -getListingsLanguage :: [String] -> Maybe String +getListingsLanguage :: [Text] -> Maybe Text getListingsLanguage xs = foldr ((<|>) . toListingsLanguage) Nothing xs -mbBraced :: String -> String -mbBraced x = if not (all isAlphaNum x) +mbBraced :: Text -> Text +mbBraced x = if not (T.all isAlphaNum x) then "{" <> x <> "}" else x -- Extract a key from divs and spans -extract :: String -> Block -> [String] +extract :: Text -> Block -> [Text] extract key (Div attr _) = lookKey key attr extract key (Plain ils) = query (extractInline key) ils extract key (Para ils) = query (extractInline key) ils @@ -1488,16 +1502,16 @@ extract key (Header _ _ ils) = query (extractInline key) ils extract _ _ = [] -- Extract a key from spans -extractInline :: String -> Inline -> [String] +extractInline :: Text -> Inline -> [Text] extractInline key (Span attr _) = lookKey key attr extractInline _ _ = [] -- Look up a key in an attribute and give a list of its values -lookKey :: String -> Attr -> [String] -lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs +lookKey :: Text -> Attr -> [Text] +lookKey key (_,_,kvs) = maybe [] T.words $ lookup key kvs -- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: Lang -> (String, String) +toPolyglossiaEnv :: Lang -> (Text, Text) toPolyglossiaEnv l = case toPolyglossia l of ("arabic", o) -> ("Arabic", o) @@ -1506,7 +1520,7 @@ toPolyglossiaEnv l = -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: Lang -> (String, String) +toPolyglossia :: Lang -> (Text, Text) toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") @@ -1546,7 +1560,7 @@ toPolyglossia x = (commonFromBcp47 x, "") -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: Lang -> String +toBabel :: Lang -> Text toBabel (Lang "de" _ "AT" vars) | "1901" `elem` vars = "austrian" | otherwise = "naustrian" @@ -1578,7 +1592,7 @@ toBabel x = commonFromBcp47 x -- Takes a list of the constituents of a BCP 47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: Lang -> String +commonFromBcp47 :: Lang -> Text commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil" -- Note: documentation says "brazilian" works too, but it doesn't seem to work -- on some systems. See #2953. diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index f8c895e3c..d9eeb3bfa 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Man Copyright : Copyright (C) 2007-2019 John MacFarlane @@ -12,10 +13,10 @@ Conversion of 'Pandoc' documents to roff man page format. -} -module Text.Pandoc.Writers.Man ( writeMan) where +module Text.Pandoc.Writers.Man ( writeMan ) where import Prelude import Control.Monad.State.Strict -import Data.List (intersperse, stripPrefix) +import Data.List (intersperse) import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -73,13 +74,13 @@ pandocToMan opts (Pandoc meta blocks) = do $ setFieldsFromTitle $ defField "has-tables" hasTables $ defField "hyphenate" True - $ defField "pandoc-version" (T.pack pandocVersion) metadata + $ defField "pandoc-version" pandocVersion metadata return $ render colwidth $ case writerTemplate opts of Nothing -> main Just tpl -> renderTemplate tpl context -escString :: WriterOptions -> String -> String +escString :: WriterOptions -> Text -> Text escString _ = escapeString AsciiOnly -- for better portability -- | Return man representation of notes. @@ -117,30 +118,30 @@ blockToMan opts (Para inlines) = do blockToMan opts (LineBlock lns) = blockToMan opts $ linesToPara lns blockToMan _ b@(RawBlock f str) - | f == Format "man" = return $ text str + | f == Format "man" = return $ literal str | otherwise = do report $ BlockNotRendered b return empty -blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" +blockToMan _ HorizontalRule = return $ literal ".PP" $$ literal " * * * * *" blockToMan opts (Header level _ inlines) = do contents <- inlineListToMan opts inlines let heading = case level of 1 -> ".SH " _ -> ".SS " - return $ nowrap $ text heading <> contents + return $ nowrap $ literal heading <> contents blockToMan opts (CodeBlock _ str) = return $ - text ".IP" $$ - text ".nf" $$ - text "\\f[C]" $$ - ((case str of - '.':_ -> text "\\&" - _ -> mempty) <> - text (escString opts str)) $$ - text "\\f[R]" $$ - text ".fi" + literal ".IP" $$ + literal ".nf" $$ + literal "\\f[C]" $$ + ((case T.uncons str of + Just ('.',_) -> literal "\\&" + _ -> mempty) <> + literal (escString opts str)) $$ + literal "\\f[R]" $$ + literal ".fi" blockToMan opts (BlockQuote blocks) = do contents <- blockListToMan opts blocks - return $ text ".RS" $$ contents $$ text ".RE" + return $ literal ".RS" $$ contents $$ literal ".RE" blockToMan opts (Table caption alignments widths headers rows) = let aligncode AlignLeft = "l" aligncode AlignRight = "r" @@ -151,24 +152,24 @@ blockToMan opts (Table caption alignments widths headers rows) = modify $ \st -> st{ stHasTables = True } let iwidths = if all (== 0) widths then repeat "" - else map (printf "w(%0.1fn)" . (70 *)) widths + else map (T.pack . printf "w(%0.1fn)" . (70 *)) widths -- 78n default width - 8n indent = 70n - let coldescriptions = text $ unwords - (zipWith (\align width -> aligncode align ++ width) - alignments iwidths) ++ "." + let coldescriptions = literal $ T.unwords + (zipWith (\align width -> aligncode align <> width) + alignments iwidths) <> "." colheadings <- mapM (blockListToMan opts) headers - let makeRow cols = text "T{" $$ - vcat (intersperse (text "T}@T{") cols) $$ - text "T}" + let makeRow cols = literal "T{" $$ + vcat (intersperse (literal "T}@T{") cols) $$ + literal "T}" let colheadings' = if all null headers then empty else makeRow colheadings $$ char '_' body <- mapM (\row -> do cols <- mapM (blockListToMan opts) row return $ makeRow cols) rows - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "tab(@);" $$ coldescriptions $$ - colheadings' $$ vcat body $$ text ".TE" + return $ literal ".PP" $$ caption' $$ + literal ".TS" $$ literal "tab(@);" $$ coldescriptions $$ + colheadings' $$ vcat body $$ literal ".TE" blockToMan opts (BulletList items) = do contents <- mapM (bulletListItemToMan opts) items @@ -176,7 +177,7 @@ blockToMan opts (BulletList items) = do blockToMan opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs let indent = 1 + - maximum (map length markers) + maximum (map T.length markers) contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ zip markers items return (vcat contents) @@ -192,20 +193,20 @@ bulletListItemToMan opts (Para first:rest) = bulletListItemToMan opts (Plain first:rest) = do first' <- blockToMan opts (Plain first) rest' <- blockListToMan opts rest - let first'' = text ".IP \\[bu] 2" $$ first' + let first'' = literal ".IP \\[bu] 2" $$ first' let rest'' = if null rest then empty - else text ".RS 2" $$ rest' $$ text ".RE" + else literal ".RS 2" $$ rest' $$ literal ".RE" return (first'' $$ rest'') bulletListItemToMan opts (first:rest) = do first' <- blockToMan opts first rest' <- blockListToMan opts rest - return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" + return $ literal "\\[bu] .RS 2" $$ first' $$ rest' $$ literal ".RE" -- | Convert ordered list item (a list of blocks) to man. orderedListItemToMan :: PandocMonad m => WriterOptions -- ^ options - -> String -- ^ order marker for list item + -> Text -- ^ order marker for list item -> Int -- ^ number of spaces to indent -> [Block] -- ^ list item (list of blocks) -> StateT WriterState m (Doc Text) @@ -216,10 +217,10 @@ orderedListItemToMan opts num indent (first:rest) = do first' <- blockToMan opts first rest' <- blockListToMan opts rest let num' = printf ("%" ++ show (indent - 1) ++ "s") num - let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' + let first'' = literal (".IP \"" <> T.pack num' <> "\" " <> tshow indent) $$ first' let rest'' = if null rest then empty - else text ".RS 4" $$ rest' $$ text ".RE" + else literal ".RS 4" $$ rest' $$ literal ".RE" return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to man. @@ -245,9 +246,9 @@ definitionListItemToMan opts (label, defs) = do return $ first' $$ if null xs then empty - else text ".RS" $$ rest' $$ text ".RE" + else literal ".RS" $$ rest' $$ literal ".RE" [] -> return empty - return $ text ".TP" $$ nowrap labelText $$ contents + return $ literal ".TP" $$ nowrap labelText $$ contents makeCodeBold :: [Inline] -> [Inline] makeCodeBold = walk go @@ -275,7 +276,7 @@ inlineToMan opts (Strong lst) = withFontFeature 'B' (inlineListToMan opts lst) inlineToMan opts (Strikeout lst) = do contents <- inlineListToMan opts lst - return $ text "[STRIKEOUT:" <> contents <> char ']' + return $ literal "[STRIKEOUT:" <> contents <> char ']' inlineToMan opts (Superscript lst) = do contents <- inlineListToMan opts lst return $ char '^' <> contents <> char '^' @@ -288,48 +289,48 @@ inlineToMan opts (Quoted SingleQuote lst) = do return $ char '`' <> contents <> char '\'' inlineToMan opts (Quoted DoubleQuote lst) = do contents <- inlineListToMan opts lst - return $ text "\\[lq]" <> contents <> text "\\[rq]" + return $ literal "\\[lq]" <> contents <> literal "\\[rq]" inlineToMan opts (Cite _ lst) = inlineListToMan opts lst inlineToMan opts (Code _ str) = - withFontFeature 'C' (return (text $ escString opts str)) -inlineToMan opts (Str str@('.':_)) = - return $ afterBreak "\\&" <> text (escString opts str) -inlineToMan opts (Str str) = return $ text $ escString opts str + withFontFeature 'C' (return (literal $ escString opts str)) +inlineToMan opts (Str str@(T.uncons -> Just ('.',_))) = + return $ afterBreak "\\&" <> literal (escString opts str) +inlineToMan opts (Str str) = return $ literal $ escString opts str inlineToMan opts (Math InlineMath str) = lift (texMathToInlines InlineMath str) >>= inlineListToMan opts inlineToMan opts (Math DisplayMath str) = do contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts - return $ cr <> text ".RS" $$ contents $$ text ".RE" + return $ cr <> literal ".RS" $$ contents $$ literal ".RE" inlineToMan _ il@(RawInline f str) - | f == Format "man" = return $ text str + | f == Format "man" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty inlineToMan _ LineBreak = return $ - cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr + cr <> literal ".PD 0" $$ literal ".P" $$ literal ".PD" <> cr inlineToMan _ SoftBreak = return space inlineToMan _ Space = return space inlineToMan opts (Link _ txt (src, _)) | not (isURI src) = inlineListToMan opts txt -- skip relative links | otherwise = do linktext <- inlineListToMan opts txt - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) return $ case txt of [Str s] | escapeURI s == srcSuffix -> - char '<' <> text srcSuffix <> char '>' - _ -> linktext <> text " (" <> text src <> char ')' + char '<' <> literal srcSuffix <> char '>' + _ -> linktext <> literal " (" <> literal src <> char ')' inlineToMan opts (Image attr alternate (source, tit)) = do let txt = if null alternate || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks then [Str "image"] else alternate linkPart <- inlineToMan opts (Link attr txt (source, tit)) - return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' + return $ char '[' <> literal "IMAGE: " <> linkPart <> char ']' inlineToMan _ (Note contents) = do -- add to notes in state modify $ \st -> st{ stNotes = contents : stNotes st } notes <- gets stNotes - let ref = show (length notes) - return $ char '[' <> text ref <> char ']' + let ref = tshow (length notes) + return $ char '[' <> literal ref <> char ']' diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 06b6da3a5..0d89c0004 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,8 +1,9 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Markdown Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -22,8 +23,7 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Char (isSpace, isAlphaNum) import Data.Default -import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose, - isPrefixOf) +import Data.List (find, intersperse, sortBy, transpose) import qualified Data.Map as M import Data.Maybe (fromMaybe, catMaybes) import Data.Ord (comparing) @@ -48,7 +48,7 @@ import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.XML (toHtml5Entities) type Notes = [[Block]] -type Ref = (String, Target, Attr) +type Ref = (Text, Target, Attr) type Refs = [Ref] type MD m = ReaderT WriterEnv (StateT WriterState m) @@ -77,7 +77,7 @@ data WriterState = WriterState { stNotes :: Notes , stKeys :: M.Map Key (M.Map (Target, Attr) Int) , stLastIdx :: Int - , stIds :: Set.Set String + , stIds :: Set.Set Text , stNoteNum :: Int } @@ -246,11 +246,11 @@ keyToMarkdown :: PandocMonad m -> Ref -> MD m (Doc Text) keyToMarkdown opts (label', (src, tit), attr) = do - let tit' = if null tit + let tit' = if T.null tit then empty - else space <> "\"" <> text tit <> "\"" + else space <> "\"" <> literal tit <> "\"" return $ nest 2 $ hang 2 - ("[" <> text label' <> "]:" <> space) (text src <> tit') + ("[" <> literal label' <> "]:" <> space) (literal src <> tit') <+> linkAttributes opts attr -- | Return markdown representation of notes. @@ -265,24 +265,24 @@ notesToMarkdown opts notes = do noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m (Doc Text) noteToMarkdown opts num blocks = do contents <- blockListToMarkdown opts blocks - let num' = text $ writerIdentifierPrefix opts ++ show num + let num' = literal $ writerIdentifierPrefix opts <> tshow num let marker = if isEnabled Ext_footnotes opts - then text "[^" <> num' <> text "]:" - else text "[" <> num' <> text "]" + then literal "[^" <> num' <> literal "]:" + else literal "[" <> num' <> literal "]" let markerSize = 4 + offset num' let spacer = case writerTabStop opts - markerSize of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " + n | n > 0 -> literal $ T.replicate n " " + _ -> literal " " return $ if isEnabled Ext_footnotes opts then hang (writerTabStop opts) (marker <> spacer) contents else marker <> spacer <> contents -- | Escape special characters for Markdown. -escapeString :: WriterOptions -> String -> String -escapeString opts = +escapeText :: WriterOptions -> Text -> Text +escapeText opts = (if writerPreferAscii opts - then T.unpack . toHtml5Entities . T.pack - else id) . go + then toHtml5Entities + else id) . T.pack . go . T.unpack where go [] = [] go (c:cs) = @@ -321,12 +321,12 @@ escapeString opts = attrsToMarkdown :: Attr -> Doc Text attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] where attribId = case attribs of - ([],_,_) -> empty + ("",_,_) -> empty (i,_,_) -> "#" <> escAttr i attribClasses = case attribs of (_,[],_) -> empty (_,cs,_) -> hsep $ - map (escAttr . ('.':)) + map (escAttr . ("."<>)) cs attribKeys = case attribs of (_,_,[]) -> empty @@ -334,10 +334,10 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] map (\(k,v) -> escAttr k <> "=\"" <> escAttr v <> "\"") ks - escAttr = mconcat . map escAttrChar - escAttrChar '"' = text "\\\"" - escAttrChar '\\' = text "\\\\" - escAttrChar c = text [c] + escAttr = mconcat . map escAttrChar . T.unpack + escAttrChar '"' = literal "\\\"" + escAttrChar '\\' = literal "\\\\" + escAttrChar c = literal $ T.singleton c linkAttributes :: WriterOptions -> Attr -> Doc Text linkAttributes opts attr = @@ -346,7 +346,7 @@ linkAttributes opts attr = else empty -- | Ordered list start parser for use in Para below. -olMarker :: Parser [Char] ParserState Char +olMarker :: Parser Text ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && @@ -355,9 +355,9 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker else spaceChar -- | True if string begins with an ordered list marker -beginsWithOrderedListMarker :: String -> Bool +beginsWithOrderedListMarker :: Text -> Bool beginsWithOrderedListMarker str = - case runParser olMarker defaultParserState "para start" (take 10 str) of + case runParser olMarker defaultParserState "para start" (T.take 10 str) of Left _ -> False Right _ -> True @@ -403,9 +403,9 @@ blockToMarkdown' opts (Div attrs ils) = do case () of _ | isEnabled Ext_fenced_divs opts && attrs /= nullAttr -> - nowrap (text ":::" <+> attrsToMarkdown attrs) $$ + nowrap (literal ":::" <+> attrsToMarkdown attrs) $$ chomp contents $$ - text ":::" <> blankline + literal ":::" <> blankline | isEnabled Ext_native_divs opts || (isEnabled Ext_raw_html opts && isEnabled Ext_markdown_in_html_blocks opts) -> @@ -425,38 +425,36 @@ blockToMarkdown' opts (Plain inlines) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let rendered = T.unpack $ render colwidth contents - let escapeMarker (x:xs) | x `elem` (".()" :: String) = '\\':x:xs - | otherwise = x : escapeMarker xs - escapeMarker [] = [] + let rendered = render colwidth contents + let escapeMarker = T.concatMap $ \x -> if x `elemText` ".()" + then T.pack ['\\', x] + else T.singleton x + let spaceOrNothing = (not isPlain &&) . maybe True (isSpace . fst) . T.uncons let contents' = - case rendered of - '%':_ | isEnabled Ext_pandoc_title_block opts && - isEnabled Ext_all_symbols_escapable opts -> - "\\" <> contents - '+':s:_ | not isPlain && isSpace s -> "\\" <> contents - '*':s:_ | not isPlain && isSpace s -> "\\" <> contents - '-':s:_ | not isPlain && isSpace s -> "\\" <> contents - '+':[] | not isPlain -> "\\" <> contents - '*':[] | not isPlain -> "\\" <> contents - '-':[] | not isPlain -> "\\" <> contents - '|':_ | (isEnabled Ext_line_blocks opts || - isEnabled Ext_pipe_tables opts) - && isEnabled Ext_all_symbols_escapable opts - -> "\\" <> contents - _ | not isPlain && beginsWithOrderedListMarker rendered - && isEnabled Ext_all_symbols_escapable opts - -> text $ escapeMarker rendered - | otherwise -> contents + case T.uncons rendered of + Just ('%', _) + | isEnabled Ext_pandoc_title_block opts && + isEnabled Ext_all_symbols_escapable opts -> "\\" <> contents + Just ('+', s) | spaceOrNothing s -> "\\" <> contents + Just ('*', s) | spaceOrNothing s -> "\\" <> contents + Just ('-', s) | spaceOrNothing s -> "\\" <> contents + Just ('|', _) | (isEnabled Ext_line_blocks opts || + isEnabled Ext_pipe_tables opts) + && isEnabled Ext_all_symbols_escapable opts + -> "\\" <> contents + _ | not isPlain && beginsWithOrderedListMarker rendered + && isEnabled Ext_all_symbols_escapable opts + -> literal $ escapeMarker rendered + | otherwise -> contents return $ contents' <> cr -- title beginning with fig: indicates figure -blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) +blockToMarkdown' opts (Para [Image attr alt (src,tgt@(T.stripPrefix "fig:" -> Just tit))]) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - ((<> blankline) . text . T.unpack . T.strip) <$> + ((<> blankline) . literal . T.strip) <$> writeHtml5String opts{ writerTemplate = Nothing } - (Pandoc nullMeta [Para [Image attr alt (src,"fig:" ++ tit)]]) + (Pandoc nullMeta [Para [Image attr alt (src,tgt)]]) | otherwise = blockToMarkdown opts (Para [Image attr alt (src,tit)]) blockToMarkdown' opts (Para inlines) = (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) @@ -464,39 +462,39 @@ blockToMarkdown' opts (LineBlock lns) = if isEnabled Ext_line_blocks opts then do mdLines <- mapM (inlineListToMarkdown opts) lns - return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline + return $ (vcat $ map (hang 2 (literal "| ")) mdLines) <> blankline else blockToMarkdown opts $ linesToPara lns blockToMarkdown' opts b@(RawBlock f str) = do plain <- asks envPlain let Format fmt = f let rawAttribBlock = return $ - (text "```{=" <> text fmt <> "}") $$ - text str $$ - (text "```" <> text "\n") + (literal "```{=" <> literal fmt <> "}") $$ + literal str $$ + (literal "```" <> literal "\n") let renderEmpty = mempty <$ report (BlockNotRendered b) case () of _ | plain -> renderEmpty | isEnabled Ext_raw_attribute opts -> rawAttribBlock | f `elem` ["markdown", "markdown_github", "markdown_phpextra", "markdown_mmd", "markdown_strict"] -> - return $ text str <> text "\n" + return $ literal str <> literal "\n" | f `elem` ["html", "html5", "html4"] -> case () of _ | isEnabled Ext_markdown_attribute opts -> return $ - text (addMarkdownAttribute str) <> text "\n" + literal (addMarkdownAttribute str) <> literal "\n" | isEnabled Ext_raw_html opts -> return $ - text str <> text "\n" + literal str <> literal "\n" | isEnabled Ext_raw_attribute opts -> rawAttribBlock | otherwise -> renderEmpty | f `elem` ["latex", "tex"] -> case () of _ | isEnabled Ext_raw_tex opts -> return $ - text str <> text "\n" + literal str <> literal "\n" | isEnabled Ext_raw_attribute opts -> rawAttribBlock | otherwise -> renderEmpty | otherwise -> renderEmpty blockToMarkdown' opts HorizontalRule = do - return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline + return $ blankline <> literal (T.replicate (writerColumns opts) "-") <> blankline blockToMarkdown' opts (Header level attr inlines) = do -- first, if we're putting references at the end of a section, we -- put them here. @@ -516,7 +514,7 @@ blockToMarkdown' opts (Header level attr inlines) = do (id',[],[]) | isEnabled Ext_auto_identifiers opts && id' == autoId -> empty (id',_,_) | isEnabled Ext_mmd_header_identifiers opts -> - space <> brackets (text id') + space <> brackets (literal id') _ | isEnabled Ext_header_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty @@ -533,44 +531,44 @@ blockToMarkdown' opts (Header level attr inlines) = do then blanklines 3 <> contents <> blanklines 2 else contents <> blankline | setext -> - contents <> attr' <> cr <> text (replicate (offset contents) '=') <> + contents <> attr' <> cr <> literal (T.replicate (offset contents) "=") <> blankline 2 | plain -> if isEnabled Ext_gutenberg opts then blanklines 2 <> contents <> blankline else contents <> blankline | setext -> - contents <> attr' <> cr <> text (replicate (offset contents) '-') <> + contents <> attr' <> cr <> literal (T.replicate (offset contents) "-") <> blankline -- ghc interprets '#' characters in column 1 as linenum specifiers. _ | plain || isEnabled Ext_literate_haskell opts -> contents <> blankline - _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline + _ -> literal (T.replicate level "#") <> space <> contents <> attr' <> blankline return $ refs <> hdr blockToMarkdown' opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && "literate" `elem` classes && isEnabled Ext_literate_haskell opts = - return $ prefixed "> " (text str) <> blankline + return $ prefixed "> " (literal str) <> blankline blockToMarkdown' opts (CodeBlock attribs str) = return $ case attribs == nullAttr of False | isEnabled Ext_backtick_code_blocks opts -> - backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline + backticks <> attrs <> cr <> literal str <> cr <> backticks <> blankline | isEnabled Ext_fenced_code_blocks opts -> - tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline - _ -> nest (writerTabStop opts) (text str) <> blankline - where endline c = text $ case [length ln - | ln <- map trim (lines str) - , [c,c,c] `isPrefixOf` ln - , all (== c) ln] of - [] -> replicate 3 c - xs -> replicate (maximum xs + 1) c + tildes <> attrs <> cr <> literal str <> cr <> tildes <> blankline + _ -> nest (writerTabStop opts) (literal str) <> blankline + where endline c = literal $ case [T.length ln + | ln <- map trim (T.lines str) + , T.pack [c,c,c] `T.isPrefixOf` ln + , T.all (== c) ln] of + [] -> T.replicate 3 $ T.singleton c + xs -> T.replicate (maximum xs + 1) $ T.singleton c backticks = endline '`' tildes = endline '~' attrs = if isEnabled Ext_fenced_code_attributes opts then nowrap $ " " <> attrsToMarkdown attribs else case attribs of - (_,(cls:_),_) -> " " <> text cls + (_,(cls:_),_) -> " " <> literal cls _ -> empty blockToMarkdown' opts (BlockQuote blocks) = do plain <- asks envPlain @@ -635,9 +633,9 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do rows (id,) <$> pipeTable (all null headers) aligns' rawHeaders rawRows | isEnabled Ext_raw_html opts -> fmap (id,) $ - (text . T.unpack) <$> + literal <$> (writeHtml5String opts{ writerTemplate = Nothing } $ Pandoc nullMeta [t]) - | otherwise -> return $ (id, text "[TABLE]") + | otherwise -> return $ (id, literal "[TABLE]") return $ nst (tbl $$ caption'') $$ blankline blockToMarkdown' opts (BulletList items) = do contents <- inList $ mapM (bulletListItemToMarkdown opts) items @@ -648,8 +646,8 @@ blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim let attribs = (start', sty', delim') let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' + let markers' = map (\m -> if T.length m < 3 + then m <> T.replicate (3 - T.length m) " " else m) markers contents <- inList $ mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ @@ -662,7 +660,7 @@ blockToMarkdown' opts (DefinitionList items) = do inList :: Monad m => MD m a -> MD m a inList p = local (\env -> env {envInList = True}) p -addMarkdownAttribute :: String -> String +addMarkdownAttribute :: Text -> Text addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of (xs,(TagOpen t attrs:rest)) -> @@ -675,29 +673,29 @@ pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc Text] -> [[Doc Text]] -> MD m (Doc Text) pipeTable headless aligns rawHeaders rawRows = do - let sp = text " " + let sp = literal " " let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) - let torow cs = nowrap $ text "|" <> - hcat (intersperse (text "|") $ + let torow cs = nowrap $ literal "|" <> + hcat (intersperse (literal "|") $ zipWith3 blockFor aligns widths (map chomp cs)) - <> text "|" - let toborder (a, w) = text $ case a of - AlignLeft -> ':':replicate (w + 1) '-' - AlignCenter -> ':':replicate w '-' ++ ":" - AlignRight -> replicate (w + 1) '-' ++ ":" - AlignDefault -> replicate (w + 2) '-' + <> literal "|" + let toborder (a, w) = literal $ case a of + AlignLeft -> ":" <> T.replicate (w + 1) "-" + AlignCenter -> ":" <> T.replicate w "-" <> ":" + AlignRight -> T.replicate (w + 1) "-" <> ":" + AlignDefault -> T.replicate (w + 2) "-" -- note: pipe tables can't completely lack a -- header; for a headerless table, we need a header of empty cells. -- see jgm/pandoc#1996. let header = if headless then torow (replicate (length aligns) empty) else torow rawHeaders - let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ - map toborder $ zip aligns widths) <> text "|" + let border = nowrap $ literal "|" <> hcat (intersperse (literal "|") $ + map toborder $ zip aligns widths) <> literal "|" let body = vcat $ map torow rawRows return $ header $$ border $$ body @@ -729,15 +727,15 @@ pandocTable opts multiline headless aligns widths rawHeaders rawRows = do let widthsInChars | isSimple = map numChars columns | otherwise = zipWith relWidth widths columns - let makeRow = hcat . intersperse (lblock 1 (text " ")) . + let makeRow = hcat . intersperse (lblock 1 (literal " ")) . (zipWith3 alignHeader aligns widthsInChars) let rows' = map makeRow rawRows let head' = makeRow rawHeaders - let underline = mconcat $ intersperse (text " ") $ - map (\width -> text (replicate width '-')) widthsInChars + let underline = mconcat $ intersperse (literal " ") $ + map (\width -> literal (T.replicate width "-")) widthsInChars let border = if multiline - then text (replicate (sum widthsInChars + - length widthsInChars - 1) '-') + then literal (T.replicate (sum widthsInChars + + length widthsInChars - 1) "-") else if headless then underline else empty @@ -767,8 +765,8 @@ bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m (D bulletListItemToMarkdown opts bs = do let exts = writerExtensions opts contents <- blockListToMarkdown opts $ taskListItemToAscii exts bs - let sps = replicate (writerTabStop opts - 2) ' ' - let start = text ('-' : ' ' : sps) + let sps = T.replicate (writerTabStop opts - 2) " " + let start = literal $ "- " <> sps -- remove trailing blank line if item ends with a tight list let contents' = if itemEndsWithTightList bs then chomp contents <> cr @@ -778,19 +776,19 @@ bulletListItemToMarkdown opts bs = do -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: PandocMonad m => WriterOptions -- ^ options - -> String -- ^ list item marker + -> Text -- ^ list item marker -> [Block] -- ^ list item (list of blocks) -> MD m (Doc Text) orderedListItemToMarkdown opts marker bs = do let exts = writerExtensions opts contents <- blockListToMarkdown opts $ taskListItemToAscii exts bs - let sps = case writerTabStop opts - length marker of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " + let sps = case writerTabStop opts - T.length marker of + n | n > 0 -> literal $ T.replicate n " " + _ -> literal " " let ind = if isEnabled Ext_four_space_rule opts then writerTabStop opts - else max (writerTabStop opts) (length marker + 1) - let start = text marker <> sps + else max (writerTabStop opts) (T.length marker + 1) + let start = literal marker <> sps -- remove trailing blank line if item ends with a tight list let contents' = if itemEndsWithTightList bs then chomp contents <> cr @@ -811,8 +809,8 @@ definitionListItemToMarkdown opts (label, defs) = do isPlain <- asks envPlain let leader = if isPlain then " " else ": " let sps = case writerTabStop opts - 3 of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " + n | n > 0 -> literal $ T.replicate n " " + _ -> literal " " let isTight = case defs of ((Plain _ : _): _) -> True _ -> False @@ -828,7 +826,7 @@ definitionListItemToMarkdown opts (label, defs) = do return $ blankline <> nowrap labelText $$ (if isTight then empty else blankline) <> contents <> blankline else do - return $ nowrap (chomp labelText <> text " " <> cr) <> + return $ nowrap (chomp labelText <> literal " " <> cr) <> vsep (map vsep defs') <> blankline -- | Convert list of Pandoc block elements to markdown. @@ -860,12 +858,12 @@ blockListToMarkdown opts blocks = do fixBlocks (Plain ils : bs) = Para ils : fixBlocks bs fixBlocks (r@(RawBlock f raw) : b : bs) - | not (null raw) - , last raw /= '\n' = + | not (T.null raw) + , T.last raw /= '\n' = case b of Plain{} -> r : fixBlocks (b:bs) RawBlock{} -> r : fixBlocks (b:bs) - _ -> RawBlock f (raw ++ "\n") : fixBlocks (b:bs) -- #4629 + _ -> RawBlock f (raw <> "\n") : fixBlocks (b:bs) -- #4629 fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True @@ -880,10 +878,10 @@ blockListToMarkdown opts blocks = do mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . mconcat getKey :: Doc Text -> Key -getKey = toKey . T.unpack . render Nothing +getKey = toKey . render Nothing -findUsableIndex :: [String] -> Int -> Int -findUsableIndex lbls i = if (show i) `elem` lbls +findUsableIndex :: [Text] -> Int -> Int +findUsableIndex lbls i = if (tshow i) `elem` lbls then findUsableIndex lbls (i + 1) else i @@ -897,7 +895,7 @@ getNextIndex = do -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. -getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m String +getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text getReference attr label target = do refs <- gets stRefs case find (\(_,t,a) -> t == target && a == attr) refs of @@ -910,9 +908,9 @@ getReference attr label target = do then do i <- getNextIndex modify $ \s -> s{ stLastIdx = i } - return (show i, i) + return (tshow i, i) else - return (T.unpack (render Nothing label), 0) + return (render Nothing label, 0) modify (\s -> s{ stRefs = (lab', target, attr) : refs, stKeys = M.insert (getKey label) @@ -923,10 +921,10 @@ getReference attr label target = do Just km -> do -- we have refs with this label case M.lookup (target, attr) km of Just i -> do - let lab' = T.unpack $ render Nothing $ + let lab' = render Nothing $ label <> if i == 0 then mempty - else text (show i) + else literal (tshow i) -- make sure it's in stRefs; it may be -- a duplicate that was printed in a previous -- block: @@ -937,7 +935,7 @@ getReference attr label target = do Nothing -> do -- but this one is to a new target i <- getNextIndex modify $ \s -> s{ stLastIdx = i } - let lab' = show i + let lab' = tshow i modify (\s -> s{ stRefs = (lab', target, attr) : refs, stKeys = M.insert (getKey label) @@ -955,28 +953,28 @@ inlineListToMarkdown opts lst = do (Link _ _ _) -> case is of -- If a link is followed by another link, or '[', '(' or ':' -- then we don't shortcut - (Link _ _ _):_ -> unshortcutable - Space:(Link _ _ _):_ -> unshortcutable - Space:(Str('[':_)):_ -> unshortcutable - Space:(RawInline _ ('[':_)):_ -> unshortcutable - Space:(Cite _ _):_ -> unshortcutable - SoftBreak:(Link _ _ _):_ -> unshortcutable - SoftBreak:(Str('[':_)):_ -> unshortcutable - SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable - SoftBreak:(Cite _ _):_ -> unshortcutable - LineBreak:(Link _ _ _):_ -> unshortcutable - LineBreak:(Str('[':_)):_ -> unshortcutable - LineBreak:(RawInline _ ('[':_)):_ -> unshortcutable - LineBreak:(Cite _ _):_ -> unshortcutable - (Cite _ _):_ -> unshortcutable - Str ('[':_):_ -> unshortcutable - Str ('(':_):_ -> unshortcutable - Str (':':_):_ -> unshortcutable - (RawInline _ ('[':_)):_ -> unshortcutable - (RawInline _ ('(':_)):_ -> unshortcutable - (RawInline _ (':':_)):_ -> unshortcutable - (RawInline _ (' ':'[':_)):_ -> unshortcutable - _ -> shortcutable + (Link _ _ _):_ -> unshortcutable + Space:(Link _ _ _):_ -> unshortcutable + Space:(Str(thead -> Just '[')):_ -> unshortcutable + Space:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + SoftBreak:(Link _ _ _):_ -> unshortcutable + SoftBreak:(Str(thead -> Just '[')):_ -> unshortcutable + SoftBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + SoftBreak:(Cite _ _):_ -> unshortcutable + LineBreak:(Link _ _ _):_ -> unshortcutable + LineBreak:(Str(thead -> Just '[')):_ -> unshortcutable + LineBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + LineBreak:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str (thead -> Just '['):_ -> unshortcutable + Str (thead -> Just '('):_ -> unshortcutable + Str (thead -> Just ':'):_ -> unshortcutable + (RawInline _ (thead -> Just '[')):_ -> unshortcutable + (RawInline _ (thead -> Just '(')):_ -> unshortcutable + (RawInline _ (thead -> Just ':')):_ -> unshortcutable + (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable + _ -> shortcutable _ -> shortcutable where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) unshortcutable = do @@ -984,6 +982,7 @@ inlineListToMarkdown opts lst = do (\env -> env { envRefShortcutable = False }) (inlineToMarkdown opts i) fmap (iMark <>) (go is) + thead = fmap fst . T.uncons isSp :: Inline -> Bool isSp Space = True @@ -992,22 +991,22 @@ isSp _ = False avoidBadWrapsInList :: [Inline] -> [Inline] avoidBadWrapsInList [] = [] -avoidBadWrapsInList (s:Str ('>':cs):xs) | isSp s = - Str (' ':'>':cs) : avoidBadWrapsInList xs -avoidBadWrapsInList (s:Str [c]:[]) - | isSp s && c `elem` ['-','*','+'] = Str [' ', c] : [] -avoidBadWrapsInList (s:Str [c]:Space:xs) - | isSp s && c `elem` ['-','*','+'] = - Str [' ', c] : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s = + Str (" >" <> cs) : avoidBadWrapsInList xs +avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):[]) + | T.null cs && isSp s && c `elem` ['-','*','+'] = Str (T.pack [' ', c]) : [] +avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs) + | T.null cs && isSp s && c `elem` ['-','*','+'] = + Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs avoidBadWrapsInList (s:Str cs:Space:xs) | isSp s && isOrderedListMarker cs = - Str (' ':cs) : Space : avoidBadWrapsInList xs + Str (" " <> cs) : Space : avoidBadWrapsInList xs avoidBadWrapsInList (s:Str cs:[]) - | isSp s && isOrderedListMarker cs = Str (' ':cs) : [] + | isSp s && isOrderedListMarker cs = Str (" " <> cs) : [] avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs -isOrderedListMarker :: String -> Bool -isOrderedListMarker xs = not (null xs) && (last xs `elem` ['.',')']) && +isOrderedListMarker :: Text -> Bool +isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) && isRight (runParser (anyOrderedListMarker >> eof) defaultParserState "" xs) @@ -1020,7 +1019,7 @@ inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text) inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = do case lookup "data-emoji" kvs of Just emojiname | isEnabled Ext_emoji opts -> - return $ ":" <> text emojiname <> ":" + return $ ":" <> literal emojiname <> ":" _ -> inlineToMarkdown opts (Str s) inlineToMarkdown opts (Span attrs ils) = do plain <- asks envPlain @@ -1035,7 +1034,7 @@ inlineToMarkdown opts (Span attrs ils) = do in "[" <> contents <> "]" <> attrs' | isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts -> - tagWithAttrs "span" attrs <> contents <> text "</span>" + tagWithAttrs "span" attrs <> contents <> literal "</span>" | otherwise -> contents inlineToMarkdown _ (Emph []) = return empty inlineToMarkdown opts (Emph lst) = do @@ -1074,10 +1073,10 @@ inlineToMarkdown opts (Superscript lst) = else if isEnabled Ext_raw_html opts then "<sup>" <> contents <> "</sup>" else - let rendered = T.unpack $ render Nothing contents - in case mapM toSuperscript rendered of - Just r -> text r - Nothing -> text $ "^(" ++ rendered ++ ")" + let rendered = render Nothing contents + in case mapM toSuperscript (T.unpack rendered) of + Just r -> literal $ T.pack r + Nothing -> literal $ "^(" <> rendered <> ")" inlineToMarkdown _ (Subscript []) = return empty inlineToMarkdown opts (Subscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do @@ -1087,10 +1086,10 @@ inlineToMarkdown opts (Subscript lst) = else if isEnabled Ext_raw_html opts then "<sub>" <> contents <> "</sub>" else - let rendered = T.unpack $ render Nothing contents - in case mapM toSubscript rendered of - Just r -> text r - Nothing -> text $ "_(" ++ rendered ++ ")" + let rendered = render Nothing contents + in case mapM toSubscript (T.unpack rendered) of + Just r -> literal $ T.pack r + Nothing -> literal $ "_(" <> rendered <> ")" inlineToMarkdown opts (SmallCaps lst) = do plain <- asks envPlain if not plain && @@ -1114,19 +1113,19 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do then "“" <> contents <> "”" else "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = do - let tickGroups = filter (\s -> '`' `elem` s) $ group str + let tickGroups = filter (T.any (== '`')) $ T.group str let longest = if null tickGroups then 0 - else maximum $ map length tickGroups - let marker = replicate (longest + 1) '`' + else maximum $ map T.length tickGroups + let marker = T.replicate (longest + 1) "`" let spacer = if (longest == 0) then "" else " " let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr then attrsToMarkdown attr else empty plain <- asks envPlain if plain - then return $ text str - else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs + then return $ literal str + else return $ literal (marker <> spacer <> str <> spacer <> marker) <> attrs inlineToMarkdown opts (Str str) = do isPlain <- asks envPlain let str' = (if isEnabled Ext_smart opts @@ -1134,18 +1133,18 @@ inlineToMarkdown opts (Str str) = do else id) $ if isPlain then str - else escapeString opts str - return $ text str' + else escapeText opts str + return $ literal str' inlineToMarkdown opts (Math InlineMath str) = case writerHTMLMathMethod opts of WebTeX url -> inlineToMarkdown opts - (Image nullAttr [Str str] (url ++ urlEncode str, str)) + (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$" <> text str <> "$" + return $ "$" <> literal str <> "$" | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\(" <> text str <> "\\)" + return $ "\\(" <> literal str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\(" <> text str <> "\\\\)" + return $ "\\\\(" <> literal str <> "\\\\)" | otherwise -> do plain <- asks envPlain texMathToInlines InlineMath str >>= @@ -1155,40 +1154,40 @@ inlineToMarkdown opts (Math DisplayMath str) = case writerHTMLMathMethod opts of WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` inlineToMarkdown opts (Image nullAttr [Str str] - (url ++ urlEncode str, str)) + (url <> T.pack (urlEncode $ T.unpack str), str)) _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$$" <> text str <> "$$" + return $ "$$" <> literal str <> "$$" | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\[" <> text str <> "\\]" + return $ "\\[" <> literal str <> "\\]" | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\[" <> text str <> "\\\\]" + return $ "\\\\[" <> literal str <> "\\\\]" | otherwise -> (\x -> cr <> x <> cr) `fmap` (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) inlineToMarkdown opts il@(RawInline f str) = do - let tickGroups = filter (\s -> '`' `elem` s) $ group str + let tickGroups = filter (T.any (== '`')) $ T.group str let numticks = if null tickGroups then 1 - else 1 + maximum (map length tickGroups) + else 1 + maximum (map T.length tickGroups) plain <- asks envPlain let Format fmt = f let rawAttribInline = return $ - text (replicate numticks '`') <> text str <> - text (replicate numticks '`') <> text "{=" <> text fmt <> text "}" + literal (T.replicate numticks "`") <> literal str <> + literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}" let renderEmpty = mempty <$ report (InlineNotRendered il) case () of _ | plain -> renderEmpty | f `elem` ["markdown", "markdown_github", "markdown_phpextra", "markdown_mmd", "markdown_strict"] -> - return $ text str + return $ literal str | isEnabled Ext_raw_attribute opts -> rawAttribInline | f `elem` ["html", "html5", "html4"] -> case () of - _ | isEnabled Ext_raw_html opts -> return $ text str + _ | isEnabled Ext_raw_html opts -> return $ literal str | isEnabled Ext_raw_attribute opts -> rawAttribInline | otherwise -> renderEmpty | f `elem` ["latex", "tex"] -> case () of - _ | isEnabled Ext_raw_tex opts -> return $ text str + _ | isEnabled Ext_raw_tex opts -> return $ literal str | isEnabled Ext_raw_attribute opts -> rawAttribInline | otherwise -> renderEmpty | otherwise -> renderEmpty @@ -1220,12 +1219,12 @@ inlineToMarkdown opts (Cite (c:cs) lst) rest <- mapM convertOne cs let inbr = suffs <+> joincits rest br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ text ("@" ++ citationId c) <+> br + return $ literal ("@" <> citationId c) <+> br else do cits <- mapM convertOne (c:cs) - return $ text "[" <> joincits cits <> text "]" + return $ literal "[" <> joincits cits <> literal "]" where - joincits = hcat . intersperse (text "; ") . filter (not . isEmpty) + joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty) convertOne Citation { citationId = k , citationPrefix = pinlines , citationSuffix = sinlines @@ -1233,9 +1232,9 @@ inlineToMarkdown opts (Cite (c:cs) lst) = do pdoc <- inlineListToMarkdown opts pinlines sdoc <- inlineListToMarkdown opts sinlines - let k' = text (modekey m ++ "@" ++ k) + let k' = literal (modekey m <> "@" <> k) r = case sinlines of - Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc + Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc _ -> k' <+> sdoc return $ pdoc <+> r modekey SuppressAuthor = "-" @@ -1244,15 +1243,15 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . T.unpack . T.strip) <$> + (literal . T.strip) <$> writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt - let linktitle = if null tit + let linktitle = if T.null tit then empty - else text $ " \"" ++ tit ++ "\"" - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) + else literal $ " \"" <> tit <> "\"" + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True @@ -1262,12 +1261,12 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) let useShortcutRefLinks = shortcutable && isEnabled Ext_shortcut_reference_links opts reftext <- if useRefLinks - then text <$> getReference attr linktext (src, tit) + then literal <$> getReference attr linktext (src, tit) else return mempty return $ if useAuto then if plain - then text srcSuffix - else "<" <> text srcSuffix <> ">" + then literal srcSuffix + else "<" <> literal srcSuffix <> ">" else if useRefLinks then let first = "[" <> linktext <> "]" second = if getKey linktext == getKey reftext @@ -1279,13 +1278,13 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) else if plain then linktext else "[" <> linktext <> "](" <> - text src <> linktitle <> ")" <> + literal src <> linktitle <> ")" <> linkAttributes opts attr inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . T.unpack . T.strip) <$> + (literal . T.strip) <$> writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain @@ -1300,7 +1299,7 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get - let ref = text $ writerIdentifierPrefix opts ++ show (stNoteNum st + (length $ stNotes st) - 1) + let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + (length $ stNotes st) - 1) if isEnabled Ext_footnotes opts then return $ "[^" <> ref <> "]" else return $ "[" <> ref <> "]" diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs index 3905a3abc..feb4b6dea 100644 --- a/src/Text/Pandoc/Writers/Math.hs +++ b/src/Text/Pandoc/Writers/Math.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Writers.Math ( texMathToInlines , convertMath @@ -8,6 +9,7 @@ module Text.Pandoc.Writers.Math where import Prelude +import qualified Data.Text as T import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -19,7 +21,7 @@ import Text.Pandoc.Options (defaultMathJaxURL, defaultKaTeXURL) -- can't be converted. texMathToInlines :: PandocMonad m => MathType - -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> T.Text -- ^ String to parse (assumes @'\n'@ line endings) -> m [Inline] texMathToInlines mt inp = do res <- convertMath writePandoc mt inp @@ -30,8 +32,8 @@ texMathToInlines mt inp = do return [mkFallback mt inp] Left il -> return [il] -mkFallback :: MathType -> String -> Inline -mkFallback mt str = Str (delim ++ str ++ delim) +mkFallback :: MathType -> T.Text -> Inline +mkFallback mt str = Str (delim <> str <> delim) where delim = case mt of DisplayMath -> "$$" InlineMath -> "$" @@ -40,7 +42,7 @@ mkFallback mt str = Str (delim ++ str ++ delim) -- issuing a warning and producing a fallback (a raw string) -- on failure. convertMath :: PandocMonad m - => (DisplayType -> [Exp] -> a) -> MathType -> String + => (DisplayType -> [Exp] -> a) -> MathType -> T.Text -> m (Either Inline a) convertMath writer mt str = case writer dt <$> readTeX str of diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index dc7b2575e..ad292200c 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.MediaWiki Copyright : Copyright (C) 2008-2019 John MacFarlane @@ -16,9 +18,10 @@ module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki, highlightingLangs ) where import Prelude import Control.Monad.Reader import Control.Monad.State.Strict -import Data.List (intercalate) +import Data.Maybe (fromMaybe) import qualified Data.Set as Set -import Data.Text (Text, pack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -37,7 +40,7 @@ data WriterState = WriterState { data WriterReader = WriterReader { options :: WriterOptions -- Writer options - , listLevel :: String -- String at beginning of list items, e.g. "**" + , listLevel :: [Char] -- String at beginning of list items, e.g. "**" , useTags :: Bool -- True if we should use HTML tags because we're in a complex list } @@ -55,15 +58,15 @@ pandocToMediaWiki :: PandocMonad m => Pandoc -> MediaWikiWriter m Text pandocToMediaWiki (Pandoc meta blocks) = do opts <- asks options metadata <- metaToContext opts - (fmap (literal . pack . trimr) . blockListToMediaWiki) - (fmap (literal . pack . trimr) . inlineListToMediaWiki) + (fmap (literal . trimr) . blockListToMediaWiki) + (fmap (literal . trimr) . inlineListToMediaWiki) meta body <- blockListToMediaWiki blocks notesExist <- gets stNotes let notes = if notesExist then "\n<references />" else "" - let main = pack $ body ++ notes + let main = body <> notes let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata return $ @@ -72,43 +75,43 @@ pandocToMediaWiki (Pandoc meta blocks) = do Just tpl -> render Nothing $ renderTemplate tpl context -- | Escape special characters for MediaWiki. -escapeString :: String -> String -escapeString = escapeStringForXML +escapeText :: Text -> Text +escapeText = escapeStringForXML -- | Convert Pandoc block element to MediaWiki. blockToMediaWiki :: PandocMonad m => Block -- ^ Block element - -> MediaWikiWriter m String + -> MediaWikiWriter m Text blockToMediaWiki Null = return "" blockToMediaWiki (Div attrs bs) = do contents <- blockListToMediaWiki bs - return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++ - contents ++ "\n\n" ++ "</div>" + return $ render Nothing (tagWithAttrs "div" attrs) <> "\n\n" <> + contents <> "\n\n" <> "</div>" blockToMediaWiki (Plain inlines) = inlineListToMediaWiki inlines -- title beginning with fig: indicates that the image is a figure -blockToMediaWiki (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToMediaWiki (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do capt <- inlineListToMediaWiki txt img <- imageToMediaWiki attr - let opt = if null tit + let opt = if T.null tit then - if null capt + if T.null capt then "" - else "alt=" ++ capt - else "alt=" ++ tit - return $ "[[" ++ - intercalate "|" - (filter (not . null) ["File:" ++ src + else "alt=" <> capt + else "alt=" <> tit + return $ "[[" <> + T.intercalate "|" + (filter (not . T.null) ["File:" <> src , "thumb" , "none" , img , opt , capt - ]) ++ + ]) <> "]]\n" blockToMediaWiki (Para inlines) = do @@ -116,8 +119,8 @@ blockToMediaWiki (Para inlines) = do lev <- asks listLevel contents <- inlineListToMediaWiki inlines return $ if tags - then "<p>" ++ contents ++ "</p>" - else contents ++ if null lev then "\n" else "" + then "<p>" <> contents <> "</p>" + else contents <> if null lev then "\n" else "" blockToMediaWiki (LineBlock lns) = blockToMediaWiki $ linesToPara lns @@ -131,109 +134,109 @@ blockToMediaWiki HorizontalRule = return "\n-----\n" blockToMediaWiki (Header level _ inlines) = do contents <- inlineListToMediaWiki inlines - let eqs = replicate level '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + let eqs = T.replicate level "=" + return $ eqs <> " " <> contents <> " " <> eqs <> "\n" blockToMediaWiki (CodeBlock (_,classes,_) str) = do let at = Set.fromList classes `Set.intersection` highlightingLangs return $ case Set.toList at of - [] -> "<pre" ++ (if null classes + [] -> "<pre" <> (if null classes then ">" - else " class=\"" ++ unwords classes ++ "\">") ++ - escapeString str ++ "</pre>" - (l:_) -> "<source lang=\"" ++ l ++ "\">" ++ str ++ "</source>" + else " class=\"" <> T.unwords classes <> "\">") <> + escapeText str <> "</pre>" + (l:_) -> "<source lang=\"" <> l <> "\">" <> str <> "</source>" -- note: no escape! even for <! blockToMediaWiki (BlockQuote blocks) = do contents <- blockListToMediaWiki blocks - return $ "<blockquote>" ++ contents ++ "</blockquote>" + return $ "<blockquote>" <> contents <> "</blockquote>" blockToMediaWiki (Table capt aligns widths headers rows') = do caption <- if null capt then return "" else do c <- inlineListToMediaWiki capt - return $ "|+ " ++ trimr c ++ "\n" + return $ "|+ " <> trimr c <> "\n" let headless = all null headers let allrows = if headless then rows' else headers:rows' - tableBody <- intercalate "|-\n" `fmap` + tableBody <- T.intercalate "|-\n" `fmap` mapM (tableRowToMediaWiki headless aligns widths) (zip [1..] allrows) - return $ "{|\n" ++ caption ++ tableBody ++ "|}\n" + return $ "{|\n" <> caption <> tableBody <> "|}\n" blockToMediaWiki x@(BulletList items) = do tags <- fmap (|| not (isSimpleList x)) $ asks useTags if tags then do contents <- local (\ s -> s { useTags = True }) $ mapM listItemToMediaWiki items - return $ "<ul>\n" ++ vcat contents ++ "</ul>\n" + return $ "<ul>\n" <> vcat contents <> "</ul>\n" else do lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ "*" }) $ mapM listItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" + contents <- local (\s -> s { listLevel = listLevel s <> "*" }) $ mapM listItemToMediaWiki items + return $ vcat contents <> if null lev then "\n" else "" blockToMediaWiki x@(OrderedList attribs items) = do tags <- fmap (|| not (isSimpleList x)) $ asks useTags if tags then do contents <- local (\s -> s { useTags = True }) $ mapM listItemToMediaWiki items - return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "</ol>\n" + return $ "<ol" <> listAttribsToText attribs <> ">\n" <> vcat contents <> "</ol>\n" else do lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ "#" }) $ mapM listItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" + contents <- local (\s -> s { listLevel = listLevel s <> "#" }) $ mapM listItemToMediaWiki items + return $ vcat contents <> if null lev then "\n" else "" blockToMediaWiki x@(DefinitionList items) = do tags <- fmap (|| not (isSimpleList x)) $ asks useTags if tags then do contents <- local (\s -> s { useTags = True }) $ mapM definitionListItemToMediaWiki items - return $ "<dl>\n" ++ vcat contents ++ "</dl>\n" + return $ "<dl>\n" <> vcat contents <> "</dl>\n" else do lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ ";" }) $ mapM definitionListItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" + contents <- local (\s -> s { listLevel = listLevel s <> ";" }) $ mapM definitionListItemToMediaWiki items + return $ vcat contents <> if null lev then "\n" else "" -- Auxiliary functions for lists: -- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String -listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle +listAttribsToText :: ListAttributes -> Text +listAttribsToText (startnum, numstyle, _) = + let numstyle' = camelCaseToHyphenated $ tshow numstyle in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ + then " start=\"" <> tshow startnum <> "\"" + else "") <> (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + then " style=\"list-style-type: " <> numstyle' <> ";\"" else "") -- | Convert bullet or ordered list item (list of blocks) to MediaWiki. -listItemToMediaWiki :: PandocMonad m => [Block] -> MediaWikiWriter m String +listItemToMediaWiki :: PandocMonad m => [Block] -> MediaWikiWriter m Text listItemToMediaWiki items = do contents <- blockListToMediaWiki items tags <- asks useTags if tags - then return $ "<li>" ++ contents ++ "</li>" + then return $ "<li>" <> contents <> "</li>" else do marker <- asks listLevel - return $ marker ++ " " ++ contents + return $ T.pack marker <> " " <> contents -- | Convert definition list item (label, list of blocks) to MediaWiki. definitionListItemToMediaWiki :: PandocMonad m => ([Inline],[[Block]]) - -> MediaWikiWriter m String + -> MediaWikiWriter m Text definitionListItemToMediaWiki (label, items) = do labelText <- inlineListToMediaWiki label contents <- mapM blockListToMediaWiki items tags <- asks useTags if tags - then return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents) + then return $ "<dt>" <> labelText <> "</dt>\n" <> + T.intercalate "\n" (map (\d -> "<dd>" <> d <> "</dd>") contents) else do marker <- asks listLevel - return $ marker ++ " " ++ labelText ++ "\n" ++ - intercalate "\n" (map (\d -> init marker ++ ": " ++ d) contents) + return $ T.pack marker <> " " <> labelText <> "\n" <> + T.intercalate "\n" (map (\d -> T.pack (init marker) <> ": " <> d) contents) -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -271,8 +274,8 @@ isPlainOrPara (Para _) = True isPlainOrPara _ = False -- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" +vcat :: [Text] -> Text +vcat = T.intercalate "\n" -- Auxiliary functions for tables: @@ -281,119 +284,119 @@ tableRowToMediaWiki :: PandocMonad m -> [Alignment] -> [Double] -> (Int, [[Block]]) - -> MediaWikiWriter m String + -> MediaWikiWriter m Text tableRowToMediaWiki headless alignments widths (rownum, cells) = do cells' <- mapM (tableCellToMediaWiki headless rownum) $ zip3 alignments widths cells - return $ unlines cells' + return $ T.unlines cells' tableCellToMediaWiki :: PandocMonad m => Bool -> Int -> (Alignment, Double, [Block]) - -> MediaWikiWriter m String + -> MediaWikiWriter m Text tableCellToMediaWiki headless rownum (alignment, width, bs) = do contents <- blockListToMediaWiki bs let marker = if rownum == 1 && not headless then "!" else "|" - let percent w = show (truncate (100*w) :: Integer) ++ "%" - let attrs = ["align=" ++ show (alignmentToString alignment) | - alignment /= AlignDefault && alignment /= AlignLeft] ++ - ["width=\"" ++ percent width ++ "\"" | + let percent w = tshow (truncate (100*w) :: Integer) <> "%" + let attrs = ["align=" <> tshow (alignmentToText alignment) | + alignment /= AlignDefault && alignment /= AlignLeft] <> + ["width=\"" <> percent width <> "\"" | width /= 0.0 && rownum == 1] let attr = if null attrs then "" - else unwords attrs ++ "|" + else T.unwords attrs <> "|" let sep = case bs of [Plain _] -> " " [Para _] -> " " [] -> "" _ -> "\n" - return $ marker ++ attr ++ sep ++ trimr contents + return $ marker <> attr <> sep <> trimr contents -alignmentToString :: Alignment -> String -alignmentToString alignment = case alignment of +alignmentToText :: Alignment -> Text +alignmentToText alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" AlignDefault -> "left" -imageToMediaWiki :: PandocMonad m => Attr -> MediaWikiWriter m String +imageToMediaWiki :: PandocMonad m => Attr -> MediaWikiWriter m Text imageToMediaWiki attr = do opts <- gets stOptions let (_, cls, _) = attr toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing checkPct maybeDim = maybeDim - go (Just w) Nothing = w ++ "px" - go (Just w) (Just h) = w ++ "x" ++ h ++ "px" - go Nothing (Just h) = "x" ++ h ++ "px" + go (Just w) Nothing = w <> "px" + go (Just w) (Just h) = w <> "x" <> h <> "px" + go Nothing (Just h) = "x" <> h <> "px" go Nothing Nothing = "" dims = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) classes = if null cls then "" - else "class=" ++ unwords cls - return $ intercalate "|" $ filter (not . null) [dims, classes] + else "class=" <> T.unwords cls + return $ T.intercalate "|" $ filter (not . T.null) [dims, classes] -- | Convert list of Pandoc block elements to MediaWiki. blockListToMediaWiki :: PandocMonad m => [Block] -- ^ List of block elements - -> MediaWikiWriter m String + -> MediaWikiWriter m Text blockListToMediaWiki blocks = fmap vcat $ mapM blockToMediaWiki blocks -- | Convert list of Pandoc inline elements to MediaWiki. -inlineListToMediaWiki :: PandocMonad m => [Inline] -> MediaWikiWriter m String +inlineListToMediaWiki :: PandocMonad m => [Inline] -> MediaWikiWriter m Text inlineListToMediaWiki lst = - fmap concat $ mapM inlineToMediaWiki lst + fmap T.concat $ mapM inlineToMediaWiki lst -- | Convert Pandoc inline element to MediaWiki. -inlineToMediaWiki :: PandocMonad m => Inline -> MediaWikiWriter m String +inlineToMediaWiki :: PandocMonad m => Inline -> MediaWikiWriter m Text inlineToMediaWiki (Span attrs ils) = do contents <- inlineListToMediaWiki ils - return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>" + return $ render Nothing (tagWithAttrs "span" attrs) <> contents <> "</span>" inlineToMediaWiki (Emph lst) = do contents <- inlineListToMediaWiki lst - return $ "''" ++ contents ++ "''" + return $ "''" <> contents <> "''" inlineToMediaWiki (Strong lst) = do contents <- inlineListToMediaWiki lst - return $ "'''" ++ contents ++ "'''" + return $ "'''" <> contents <> "'''" inlineToMediaWiki (Strikeout lst) = do contents <- inlineListToMediaWiki lst - return $ "<s>" ++ contents ++ "</s>" + return $ "<s>" <> contents <> "</s>" inlineToMediaWiki (Superscript lst) = do contents <- inlineListToMediaWiki lst - return $ "<sup>" ++ contents ++ "</sup>" + return $ "<sup>" <> contents <> "</sup>" inlineToMediaWiki (Subscript lst) = do contents <- inlineListToMediaWiki lst - return $ "<sub>" ++ contents ++ "</sub>" + return $ "<sub>" <> contents <> "</sub>" inlineToMediaWiki (SmallCaps lst) = inlineListToMediaWiki lst inlineToMediaWiki (Quoted SingleQuote lst) = do contents <- inlineListToMediaWiki lst - return $ "\8216" ++ contents ++ "\8217" + return $ "\8216" <> contents <> "\8217" inlineToMediaWiki (Quoted DoubleQuote lst) = do contents <- inlineListToMediaWiki lst - return $ "\8220" ++ contents ++ "\8221" + return $ "\8220" <> contents <> "\8221" inlineToMediaWiki (Cite _ lst) = inlineListToMediaWiki lst inlineToMediaWiki (Code _ str) = - return $ "<code>" ++ escapeString str ++ "</code>" + return $ "<code>" <> escapeText str <> "</code>" -inlineToMediaWiki (Str str) = return $ escapeString str +inlineToMediaWiki (Str str) = return $ escapeText str inlineToMediaWiki (Math mt str) = return $ - "<math display=\"" ++ - (if mt == DisplayMath then "block" else "inline") ++ - "\">" ++ str ++ "</math>" + "<math display=\"" <> + (if mt == DisplayMath then "block" else "inline") <> + "\">" <> str <> "</math>" -- note: str should NOT be escaped inlineToMediaWiki il@(RawInline f str) @@ -420,35 +423,34 @@ inlineToMediaWiki (Link _ txt (src, _)) = do case txt of [Str s] | isURI src && escapeURI s == src -> return src _ -> return $ if isURI src - then "[" ++ src ++ " " ++ label ++ "]" - else "[[" ++ src' ++ "|" ++ label ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page + then "[" <> src <> " " <> label <> "]" + else "[[" <> src' <> "|" <> label <> "]]" + -- with leading / it's a link to a help page + where src' = fromMaybe src $ T.stripPrefix "/" src inlineToMediaWiki (Image attr alt (source, tit)) = do img <- imageToMediaWiki attr alt' <- inlineListToMediaWiki alt - let txt = if null alt' - then if null tit + let txt = if T.null alt' + then if T.null tit then "" else tit else alt' - return $ "[[" ++ - intercalate "|" - (filter (not . null) - [ "File:" ++ source + return $ "[[" <> + T.intercalate "|" + (filter (not . T.null) + [ "File:" <> source , img , txt - ]) ++ "]]" + ]) <> "]]" inlineToMediaWiki (Note contents) = do contents' <- blockListToMediaWiki contents modify (\s -> s { stNotes = True }) - return $ "<ref>" ++ stripTrailingNewlines contents' ++ "</ref>" + return $ "<ref>" <> stripTrailingNewlines contents' <> "</ref>" -- note - does not work for notes with multiple blocks -highlightingLangs :: Set.Set String +highlightingLangs :: Set.Set Text highlightingLangs = Set.fromList [ "abap", "abl", diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 634255604..7e0a58134 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Ms Copyright : Copyright (C) 2007-2019 John MacFarlane @@ -21,7 +23,7 @@ TODO: module Text.Pandoc.Writers.Ms ( writeMs ) where import Prelude import Control.Monad.State.Strict -import Data.Char (isLower, isUpper, toUpper, ord) +import Data.Char (isLower, isUpper, ord) import Data.List (intercalate, intersperse) import qualified Data.Map as Map import Data.Maybe (catMaybes, fromMaybe) @@ -75,32 +77,33 @@ pandocToMs opts (Pandoc meta blocks) = do let context = defField "body" main $ defField "has-inline-math" hasInlineMath $ defField "hyphenate" True - $ defField "pandoc-version" (T.pack pandocVersion) + $ defField "pandoc-version" pandocVersion $ defField "toc" (writerTableOfContents opts) - $ defField "title-meta" (T.pack titleMeta) - $ defField "author-meta" (T.pack $ intercalate "; " authorsMeta) + $ defField "title-meta" titleMeta + $ defField "author-meta" (T.intercalate "; " authorsMeta) $ defField "highlighting-macros" highlightingMacros metadata return $ render colwidth $ case writerTemplate opts of Nothing -> main Just tpl -> renderTemplate tpl context -escapeStr :: WriterOptions -> String -> String +escapeStr :: WriterOptions -> Text -> Text escapeStr opts = escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8) -escapeUri :: String -> String -escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) +escapeUri :: Text -> Text +escapeUri = T.pack . escapeURIString (\c -> c /= '@' && isAllowedInURI c) . T.unpack -toSmallCaps :: WriterOptions -> String -> String -toSmallCaps _ [] = [] -toSmallCaps opts (c:cs) - | isLower c = let (lowers,rest) = span isLower (c:cs) - in "\\s-2" ++ escapeStr opts (map toUpper lowers) ++ - "\\s0" ++ toSmallCaps opts rest - | isUpper c = let (uppers,rest) = span isUpper (c:cs) - in escapeStr opts uppers ++ toSmallCaps opts rest - | otherwise = escapeStr opts [c] ++ toSmallCaps opts cs +toSmallCaps :: WriterOptions -> Text -> Text +toSmallCaps opts s = case T.uncons s of + Nothing -> "" + Just (c, cs) + | isLower c -> let (lowers,rest) = T.span isLower s + in "\\s-2" <> escapeStr opts (T.toUpper lowers) <> + "\\s0" <> toSmallCaps opts rest + | isUpper c -> let (uppers,rest) = T.span isUpper s + in escapeStr opts uppers <> toSmallCaps opts rest + | otherwise -> escapeStr opts (T.singleton c) <> toSmallCaps opts cs -- We split inline lists into sentences, and print one sentence per -- line. roff treats the line-ending period differently. @@ -112,11 +115,11 @@ blockToMs :: PandocMonad m -> MS m (Doc Text) blockToMs _ Null = return empty blockToMs opts (Div (ident,_,_) bs) = do - let anchor = if null ident + let anchor = if T.null ident then empty else nowrap $ - text ".pdfhref M " - <> doubleQuotes (text (toAscii ident)) + literal ".pdfhref M " + <> doubleQuotes (literal (toAscii ident)) setFirstPara res <- blockListToMs opts bs setFirstPara @@ -124,38 +127,38 @@ blockToMs opts (Div (ident,_,_) bs) = do blockToMs opts (Plain inlines) = liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines blockToMs opts (Para [Image attr alt (src,_tit)]) - | let ext = takeExtension src in (ext == ".ps" || ext == ".eps") = do + | let ext = takeExtension (T.unpack src) in (ext == ".ps" || ext == ".eps") = do let (mbW,mbH) = (inPoints opts <$> dimension Width attr, inPoints opts <$> dimension Height attr) let sizeAttrs = case (mbW, mbH) of (Just wp, Nothing) -> space <> doubleQuotes - (text (show (floor wp :: Int) ++ "p")) + (literal (tshow (floor wp :: Int) <> "p")) (Just wp, Just hp) -> space <> doubleQuotes - (text (show (floor wp :: Int) ++ "p")) <> + (literal (tshow (floor wp :: Int) <> "p")) <> space <> - doubleQuotes (text (show (floor hp :: Int))) + doubleQuotes (literal (tshow (floor hp :: Int))) _ -> empty capt <- inlineListToMs' opts alt - return $ nowrap (text ".PSPIC -C " <> - doubleQuotes (text (escapeStr opts src)) <> + return $ nowrap (literal ".PSPIC -C " <> + doubleQuotes (literal (escapeStr opts src)) <> sizeAttrs) $$ - text ".ce 1000" $$ + literal ".ce 1000" $$ capt $$ - text ".ce 0" + literal ".ce 0" blockToMs opts (Para inlines) = do firstPara <- gets stFirstPara resetFirstPara contents <- liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines - return $ text (if firstPara then ".LP" else ".PP") $$ contents + return $ literal (if firstPara then ".LP" else ".PP") $$ contents blockToMs _ b@(RawBlock f str) - | f == Format "ms" = return $ text str + | f == Format "ms" = return $ literal str | otherwise = do report $ BlockNotRendered b return empty blockToMs _ HorizontalRule = do resetFirstPara - return $ text ".HLINE" + return $ literal ".HLINE" blockToMs opts (Header level (ident,classes,_) inlines) = do setFirstPara modify $ \st -> st{ stInHeader = True } @@ -165,33 +168,33 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do "unnumbered" `notElem` classes then (".NH", "\\*[SN]") else (".SH", "") - let anchor = if null ident + let anchor = if T.null ident then empty else nowrap $ - text ".pdfhref M " - <> doubleQuotes (text (toAscii ident)) - let bookmark = text ".pdfhref O " <> text (show level ++ " ") <> - doubleQuotes (text $ secnum ++ - (if null secnum + literal ".pdfhref M " + <> doubleQuotes (literal (toAscii ident)) + let bookmark = literal ".pdfhref O " <> literal (tshow level <> " ") <> + doubleQuotes (literal $ secnum <> + (if T.null secnum then "" - else " ") ++ + else " ") <> escapeStr opts (stringify inlines)) - let backlink = nowrap (text ".pdfhref L -D " <> - doubleQuotes (text (toAscii ident)) <> space <> text "\\") <> cr <> - text " -- " + let backlink = nowrap (literal ".pdfhref L -D " <> + doubleQuotes (literal (toAscii ident)) <> space <> literal "\\") <> cr <> + literal " -- " let tocEntry = if writerTableOfContents opts && level <= writerTOCDepth opts - then text ".XS" + then literal ".XS" $$ backlink <> doubleQuotes ( - nowrap (text (replicate level '\t') <> - (if null secnum + nowrap (literal (T.replicate level "\t") <> + (if T.null secnum then empty - else text secnum <> text "\\~\\~") + else literal secnum <> literal "\\~\\~") <> contents)) - $$ text ".XE" + $$ literal ".XE" else empty modify $ \st -> st{ stFirstPara = True } - return $ (text heading <> space <> text (show level)) $$ + return $ (literal heading <> space <> literal (tshow level)) $$ contents $$ bookmark $$ anchor $$ @@ -200,12 +203,12 @@ blockToMs opts (CodeBlock attr str) = do hlCode <- highlightCode opts attr str setFirstPara return $ - text ".IP" $$ - text ".nf" $$ - text "\\f[C]" $$ + literal ".IP" $$ + literal ".nf" $$ + literal "\\f[C]" $$ hlCode $$ - text "\\f[]" $$ - text ".fi" + literal "\\f[]" $$ + literal ".fi" blockToMs opts (LineBlock ls) = do setFirstPara -- use .LP, see #5588 blockToMs opts $ Para $ intercalate [LineBreak] ls @@ -213,7 +216,7 @@ blockToMs opts (BlockQuote blocks) = do setFirstPara contents <- blockListToMs opts blocks setFirstPara - return $ text ".RS" $$ contents $$ text ".RE" + return $ literal ".RS" $$ contents $$ literal ".RE" blockToMs opts (Table caption alignments widths headers rows) = let aligncode AlignLeft = "l" aligncode AlignRight = "r" @@ -223,15 +226,15 @@ blockToMs opts (Table caption alignments widths headers rows) = caption' <- inlineListToMs' opts caption let iwidths = if all (== 0) widths then repeat "" - else map (printf "w(%0.1fn)" . (70 *)) widths + else map (T.pack . printf "w(%0.1fn)" . (70 *)) widths -- 78n default width - 8n indent = 70n - let coldescriptions = text $ unwords - (zipWith (\align width -> aligncode align ++ width) - alignments iwidths) ++ "." + let coldescriptions = literal $ T.unwords + (zipWith (\align width -> aligncode align <> width) + alignments iwidths) <> "." colheadings <- mapM (blockListToMs opts) headers - let makeRow cols = text "T{" $$ - vcat (intersperse (text "T}\tT{") cols) $$ - text "T}" + let makeRow cols = literal "T{" $$ + vcat (intersperse (literal "T}\tT{") cols) $$ + literal "T}" let colheadings' = if all null headers then empty else makeRow colheadings $$ char '_' @@ -239,9 +242,9 @@ blockToMs opts (Table caption alignments widths headers rows) = cols <- mapM (blockListToMs opts) row return $ makeRow cols) rows setFirstPara - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "delim(@@) tab(\t);" $$ coldescriptions $$ - colheadings' $$ vcat body $$ text ".TE" + return $ literal ".PP" $$ caption' $$ + literal ".TS" $$ literal "delim(@@) tab(\t);" $$ coldescriptions $$ + colheadings' $$ vcat body $$ literal ".TE" blockToMs opts (BulletList items) = do contents <- mapM (bulletListItemToMs opts) items @@ -250,7 +253,7 @@ blockToMs opts (BulletList items) = do blockToMs opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs let indent = 2 + - maximum (map length markers) + maximum (map T.length markers) contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $ zip markers items setFirstPara @@ -268,20 +271,20 @@ bulletListItemToMs opts (Para first:rest) = bulletListItemToMs opts (Plain first:rest) = do first' <- blockToMs opts (Plain first) rest' <- blockListToMs opts rest - let first'' = text ".IP \\[bu] 3" $$ first' + let first'' = literal ".IP \\[bu] 3" $$ first' let rest'' = if null rest then empty - else text ".RS 3" $$ rest' $$ text ".RE" + else literal ".RS 3" $$ rest' $$ literal ".RE" return (first'' $$ rest'') bulletListItemToMs opts (first:rest) = do first' <- blockToMs opts first rest' <- blockListToMs opts rest - return $ text "\\[bu] .RS 3" $$ first' $$ rest' $$ text ".RE" + return $ literal "\\[bu] .RS 3" $$ first' $$ rest' $$ literal ".RE" -- | Convert ordered list item (a list of blocks) to ms. orderedListItemToMs :: PandocMonad m => WriterOptions -- ^ options - -> String -- ^ order marker for list item + -> Text -- ^ order marker for list item -> Int -- ^ number of spaces to indent -> [Block] -- ^ list item (list of blocks) -> MS m (Doc Text) @@ -291,12 +294,12 @@ orderedListItemToMs opts num indent (Para first:rest) = orderedListItemToMs opts num indent (first:rest) = do first' <- blockToMs opts first rest' <- blockListToMs opts rest - let num' = printf ("%" ++ show (indent - 1) ++ "s") num - let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' + let num' = T.pack $ printf ("%" <> show (indent - 1) <> "s") num + let first'' = literal (".IP \"" <> num' <> "\" " <> tshow indent) $$ first' let rest'' = if null rest then empty - else text ".RS " <> text (show indent) $$ - rest' $$ text ".RE" + else literal ".RS " <> literal (tshow indent) $$ + rest' $$ literal ".RE" return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to ms. @@ -317,8 +320,8 @@ definitionListItemToMs opts (label, defs) = do rest' <- liftM vcat $ mapM (\item -> blockToMs opts item) rest first' <- blockToMs opts first - return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ nowrap (text ".IP " <> doubleQuotes labelText) $$ contents + return $ first' $$ literal ".RS" $$ rest' $$ literal ".RE" + return $ nowrap (literal ".IP " <> doubleQuotes labelText) $$ contents -- | Convert list of Pandoc block elements to ms. blockListToMs :: PandocMonad m @@ -353,13 +356,13 @@ inlineToMs opts (Strikeout lst) = do contents <- inlineListToMs opts lst -- we use grey color instead of strikeout, which seems quite -- hard to do in roff for arbitrary bits of text - return $ text "\\m[strikecolor]" <> contents <> text "\\m[]" + return $ literal "\\m[strikecolor]" <> contents <> literal "\\m[]" inlineToMs opts (Superscript lst) = do contents <- inlineListToMs opts lst - return $ text "\\*{" <> contents <> text "\\*}" + return $ literal "\\*{" <> contents <> literal "\\*}" inlineToMs opts (Subscript lst) = do contents <- inlineListToMs opts lst - return $ text "\\*<" <> contents <> text "\\*>" + return $ literal "\\*<" <> contents <> literal "\\*>" inlineToMs opts (SmallCaps lst) = do -- see https://lists.gnu.org/archive/html/groff/2015-01/msg00016.html modify $ \st -> st{ stSmallCaps = not (stSmallCaps st) } @@ -371,40 +374,40 @@ inlineToMs opts (Quoted SingleQuote lst) = do return $ char '`' <> contents <> char '\'' inlineToMs opts (Quoted DoubleQuote lst) = do contents <- inlineListToMs opts lst - return $ text "\\[lq]" <> contents <> text "\\[rq]" + return $ literal "\\[lq]" <> contents <> literal "\\[rq]" inlineToMs opts (Cite _ lst) = inlineListToMs opts lst inlineToMs opts (Code attr str) = do hlCode <- highlightCode opts attr str withFontFeature 'C' (return hlCode) inlineToMs opts (Str str) = do - let shim = case str of - '.':_ -> afterBreak (T.pack "\\&") - _ -> empty + let shim = case T.uncons str of + Just ('.',_) -> afterBreak "\\&" + _ -> empty smallcaps <- gets stSmallCaps if smallcaps - then return $ shim <> text (toSmallCaps opts str) - else return $ shim <> text (escapeStr opts str) + then return $ shim <> literal (toSmallCaps opts str) + else return $ shim <> literal (escapeStr opts str) inlineToMs opts (Math InlineMath str) = do modify $ \st -> st{ stHasInlineMath = True } res <- convertMath writeEqn InlineMath str case res of Left il -> inlineToMs opts il - Right r -> return $ text "@" <> text r <> text "@" + Right r -> return $ literal "@" <> literal r <> literal "@" inlineToMs opts (Math DisplayMath str) = do res <- convertMath writeEqn InlineMath str case res of Left il -> do contents <- inlineToMs opts il - return $ cr <> text ".RS" $$ contents $$ text ".RE" + return $ cr <> literal ".RS" $$ contents $$ literal ".RE" Right r -> return $ - cr <> text ".EQ" $$ text r $$ text ".EN" <> cr + cr <> literal ".EQ" $$ literal r $$ literal ".EN" <> cr inlineToMs _ il@(RawInline f str) - | f == Format "ms" = return $ text str + | f == Format "ms" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty -inlineToMs _ LineBreak = return $ cr <> text ".br" <> cr +inlineToMs _ LineBreak = return $ cr <> literal ".br" <> cr inlineToMs opts SoftBreak = handleNotes opts $ case writerWrapText opts of @@ -412,27 +415,27 @@ inlineToMs opts SoftBreak = WrapNone -> space WrapPreserve -> cr inlineToMs opts Space = handleNotes opts space -inlineToMs opts (Link _ txt ('#':ident, _)) = do +inlineToMs opts (Link _ txt (T.uncons -> Just ('#',ident), _)) = do -- internal link contents <- inlineListToMs' opts $ map breakToSpace txt - return $ text "\\c" <> cr <> nowrap (text ".pdfhref L -D " <> - doubleQuotes (text (toAscii ident)) <> text " -A " <> - doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> - text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" + return $ literal "\\c" <> cr <> nowrap (literal ".pdfhref L -D " <> + doubleQuotes (literal (toAscii ident)) <> literal " -A " <> + doubleQuotes (literal "\\c") <> space <> literal "\\") <> cr <> + literal " -- " <> doubleQuotes (nowrap contents) <> cr <> literal "\\&" inlineToMs opts (Link _ txt (src, _)) = do -- external link contents <- inlineListToMs' opts $ map breakToSpace txt - return $ text "\\c" <> cr <> nowrap (text ".pdfhref W -D " <> - doubleQuotes (text (escapeUri src)) <> text " -A " <> - doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> - text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" + return $ literal "\\c" <> cr <> nowrap (literal ".pdfhref W -D " <> + doubleQuotes (literal (escapeUri src)) <> literal " -A " <> + doubleQuotes (literal "\\c") <> space <> literal "\\") <> cr <> + literal " -- " <> doubleQuotes (nowrap contents) <> cr <> literal "\\&" inlineToMs opts (Image _ alternate (_, _)) = - return $ char '[' <> text "IMAGE: " <> - text (escapeStr opts (stringify alternate)) + return $ char '[' <> literal "IMAGE: " <> + literal (escapeStr opts (stringify alternate)) <> char ']' inlineToMs _ (Note contents) = do modify $ \st -> st{ stNotes = contents : stNotes st } - return $ text "\\**" + return $ literal "\\**" handleNotes :: PandocMonad m => WriterOptions -> Doc Text -> MS m (Doc Text) handleNotes opts fallback = do @@ -451,7 +454,7 @@ handleNote opts bs = do (Para ils : rest) -> Plain ils : rest _ -> bs contents <- blockListToMs opts bs' - return $ cr <> text ".FS" $$ contents $$ text ".FE" <> cr + return $ cr <> literal ".FS" $$ contents $$ literal ".FE" <> cr setFirstPara :: PandocMonad m => MS m () setFirstPara = modify $ \st -> st{ stFirstPara = True } @@ -467,38 +470,38 @@ breakToSpace x = x -- Highlighting styleToMs :: Style -> Doc Text -styleToMs sty = vcat $ colordefs ++ map (toMacro sty) alltoktypes +styleToMs sty = vcat $ colordefs <> map (toMacro sty) alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok colordefs = map toColorDef allcolors - toColorDef c = text (".defcolor " ++ - hexColor c ++ " rgb #" ++ hexColor c) + toColorDef c = literal (".defcolor " <> + hexColor c <> " rgb #" <> hexColor c) allcolors = catMaybes $ ordNub $ [defaultColor sty, backgroundColor sty, - lineNumberColor sty, lineNumberBackgroundColor sty] ++ + lineNumberColor sty, lineNumberBackgroundColor sty] <> concatMap (colorsForToken. snd) (Map.toList (tokenStyles sty)) colorsForToken ts = [tokenColor ts, tokenBackground ts] -hexColor :: Color -> String -hexColor (RGB r g b) = printf "%02x%02x%02x" r g b +hexColor :: Color -> Text +hexColor (RGB r g b) = T.pack $ printf "%02x%02x%02x" r g b toMacro :: Style -> TokenType -> Doc Text toMacro sty toktype = - nowrap (text ".ds " <> text (show toktype) <> text " " <> + nowrap (literal ".ds " <> literal (tshow toktype) <> literal " " <> setbg <> setcolor <> setfont <> - text "\\\\$1" <> + literal "\\\\$1" <> resetfont <> resetcolor <> resetbg) where setcolor = maybe empty fgcol tokCol - resetcolor = maybe empty (const $ text "\\\\m[]") tokCol + resetcolor = maybe empty (const $ literal "\\\\m[]") tokCol setbg = empty -- maybe empty bgcol tokBg resetbg = empty -- maybe empty (const $ text "\\\\M[]") tokBg - fgcol c = text $ "\\\\m[" ++ hexColor c ++ "]" - -- bgcol c = text $ "\\\\M[" ++ hexColor c ++ "]" + fgcol c = literal $ "\\\\m[" <> hexColor c <> "]" + -- bgcol c = literal $ "\\\\M[" <> hexColor c <> "]" setfont = if tokBold || tokItalic - then text $ "\\\\f[C" ++ ['B' | tokBold] ++ - ['I' | tokItalic] ++ "]" + then literal $ T.pack $ "\\\\f[C" <> ['B' | tokBold] <> + ['I' | tokItalic] <> "]" else empty resetfont = if tokBold || tokItalic - then text "\\\\f[C]" + then literal "\\\\f[C]" else empty tokSty = Map.lookup toktype (tokenStyles sty) tokCol = (tokSty >>= tokenColor) `mplus` defaultColor sty @@ -513,24 +516,24 @@ msFormatter :: WriterOptions -> FormatOptions -> [SourceLine] -> Doc Text msFormatter opts _fmtopts = vcat . map fmtLine where fmtLine = hcat . map fmtToken - fmtToken (toktype, tok) = text "\\*" <> - brackets (text (show toktype) <> text " \"" - <> text (escapeStr opts (T.unpack tok)) <> text "\"") + fmtToken (toktype, tok) = literal "\\*" <> + brackets (literal (tshow toktype) <> literal " \"" + <> literal (escapeStr opts tok) <> literal "\"") -highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m (Doc Text) +highlightCode :: PandocMonad m => WriterOptions -> Attr -> Text -> MS m (Doc Text) highlightCode opts attr str = case highlight (writerSyntaxMap opts) (msFormatter opts) attr str of Left msg -> do - unless (null msg) $ report $ CouldNotHighlight msg - return $ text (escapeStr opts str) + unless (T.null msg) $ report $ CouldNotHighlight msg + return $ literal (escapeStr opts str) Right h -> do modify (\st -> st{ stHighlighting = True }) return h -- This is used for PDF anchors. -toAscii :: String -> String -toAscii = concatMap +toAscii :: Text -> Text +toAscii = T.concatMap (\c -> case toAsciiChar c of - Nothing -> '_':'u':show (ord c) ++ "_" - Just '/' -> '_':'u':show (ord c) ++ "_" -- see #4515 - Just c' -> [c']) + Nothing -> "_u" <> tshow (ord c) <> "_" + Just '/' -> "_u" <> tshow (ord c) <> "_" -- see #4515 + Just c' -> T.singleton c') diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index c6ff70f5b..b70345b3a 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Muse Copyright : Copyright (C) 2017-2019 Alexander Krotov @@ -31,7 +32,7 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace) import Data.Default -import Data.List (intersperse, isInfixOf, transpose) +import Data.List (intersperse, transpose) import qualified Data.Set as Set import qualified Data.Text as T import Data.Text (Text) @@ -66,7 +67,7 @@ data WriterEnv = data WriterState = WriterState { stNotes :: Notes , stNoteNum :: Int - , stIds :: Set.Set String + , stIds :: Set.Set Text , stUseTags :: Bool -- ^ Use tags for emphasis, for example because previous character is a letter } @@ -161,7 +162,7 @@ simpleTable caption headers rows = do rows' <- mapM (mapM blockListToMuse) rows let widthsInChars = maximum . map offset <$> transpose (headers' : rows') let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks - where sep' = lblock (length sep) $ text sep + where sep' = lblock (T.length sep) $ literal sep let makeRow sep = hpipeBlocks sep . zipWith lblock widthsInChars let head' = makeRow " || " headers' rows'' <- mapM (\row -> makeRow rowSeparator <$> mapM blockListToMuse row) rows @@ -192,12 +193,12 @@ blockToMuse (Para inlines) = do return $ contents <> blankline blockToMuse (LineBlock lns) = do lns' <- local (\env -> env { envOneLine = True }) $ mapM inlineListToMuse lns - return $ nowrap $ vcat (map (text "> " <>) lns') <> blankline + return $ nowrap $ vcat (map (literal "> " <>) lns') <> blankline blockToMuse (CodeBlock (_,_,_) str) = - return $ "<example>" $$ text str $$ "</example>" $$ blankline + return $ "<example>" $$ literal str $$ "</example>" $$ blankline blockToMuse (RawBlock (Format format) str) = - return $ blankline $$ "<literal style=\"" <> text format <> "\">" $$ - text str $$ "</literal>" $$ blankline + return $ blankline $$ "<literal style=\"" <> literal format <> "\">" $$ + literal str $$ "</literal>" $$ blankline blockToMuse (BlockQuote blocks) = do contents <- flatBlockListToMuse blocks return $ blankline @@ -212,10 +213,10 @@ blockToMuse (OrderedList (start, style, _) items) = do topLevel <- asks envTopLevel return $ (if topLevel then nest 1 else id) (vcat contents) $$ blankline where orderedListItemToMuse :: PandocMonad m - => String -- ^ marker for list item + => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> Muse m (Doc Text) - orderedListItemToMuse marker item = hang (length marker + 1) (text marker <> space) + orderedListItemToMuse marker item = hang (T.length marker + 1) (literal marker <> space) <$> blockListToMuse item blockToMuse (BulletList items) = do contents <- mapM bulletListItemToMuse items @@ -253,10 +254,10 @@ blockToMuse (Header level (ident,_,_) inlines) = do let autoId = uniqueIdent (writerExtensions opts) inlines ids modify $ \st -> st{ stIds = Set.insert autoId ids } - let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) + let attr' = if T.null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) then empty - else "#" <> text ident <> cr - let header' = if topLevel then text (replicate level '*') <> space else mempty + else "#" <> literal ident <> cr + let header' = if topLevel then literal (T.replicate level "*") <> space else mempty return $ blankline <> attr' $$ nowrap (header' <> contents) <> blankline -- https://www.gnu.org/software/emacs-muse/manual/muse.html#Horizontal-Rules-and-Anchors blockToMuse HorizontalRule = return $ blankline $$ "----" $$ blankline @@ -297,14 +298,14 @@ noteToMuse :: PandocMonad m -> [Block] -> Muse m (Doc Text) noteToMuse num note = do - res <- hang (length marker) (text marker) <$> + res <- hang (T.length marker) (literal marker) <$> local (\env -> env { envInsideBlock = True , envInlineStart = True , envAfterSpace = True }) (blockListToMuse note) return $ res <> blankline where - marker = "[" ++ show num ++ "] " + marker = "[" <> tshow num <> "] " -- | Return Muse representation of block and accumulated notes. blockToMuseWithNotes :: PandocMonad m @@ -330,30 +331,26 @@ blockToMuseWithNotes blk = do else return b -- | Escape special characters for Muse. -escapeString :: String -> String -escapeString s = - "<verbatim>" ++ - substitute "</verbatim>" "<</verbatim><verbatim>/verbatim>" s ++ +escapeText :: Text -> Text +escapeText s = + "<verbatim>" <> + T.replace "</verbatim>" "<</verbatim><verbatim>/verbatim>" s <> "</verbatim>" -- | Replace newlines with spaces -replaceNewlines :: String -> String -replaceNewlines ('\n':xs) = ' ':replaceNewlines xs -replaceNewlines (x:xs) = x:replaceNewlines xs -replaceNewlines [] = [] - -startsWithMarker :: (Char -> Bool) -> String -> Bool -startsWithMarker f (' ':xs) = startsWithMarker f xs -startsWithMarker f (x:xs) = - f x && (startsWithMarker f xs || startsWithDot xs) +replaceNewlines :: Text -> Text +replaceNewlines = T.map $ \c -> + if c == '\n' then ' ' else c + +startsWithMarker :: (Char -> Bool) -> Text -> Bool +startsWithMarker f t = case T.uncons $ T.dropWhile f' t of + Just ('.', xs) -> T.null xs || isSpace (T.head xs) + _ -> False where - startsWithDot ['.'] = True - startsWithDot ('.':c:_) = isSpace c - startsWithDot _ = False -startsWithMarker _ [] = False + f' c = c == ' ' || f c -containsNotes :: Char -> Char -> String -> Bool -containsNotes left right = p +containsNotes :: Char -> Char -> Text -> Bool +containsNotes left right = p . T.unpack -- This ought to be a parser where p (left':xs) | left' == left = q xs || p xs | otherwise = p xs @@ -370,29 +367,29 @@ containsNotes left right = p s [] = False -- | Return True if string should be escaped with <verbatim> tags -shouldEscapeString :: PandocMonad m - => String +shouldEscapeText :: PandocMonad m + => Text -> Muse m Bool -shouldEscapeString s = do +shouldEscapeText s = do insideLink <- asks envInsideLinkDescription - return $ null s || - any (`elem` ("#*<=|" :: String)) s || - "::" `isInfixOf` s || - "~~" `isInfixOf` s || - "[[" `isInfixOf` s || - ">>>" `isInfixOf` s || - ("]" `isInfixOf` s && insideLink) || + return $ T.null s || + T.any (`elem` ("#*<=|" :: String)) s || + "::" `T.isInfixOf` s || + "~~" `T.isInfixOf` s || + "[[" `T.isInfixOf` s || + ">>>" `T.isInfixOf` s || + ("]" `T.isInfixOf` s && insideLink) || containsNotes '[' ']' s || containsNotes '{' '}' s -- | Escape special characters for Muse if needed. -conditionalEscapeString :: PandocMonad m - => String - -> Muse m String -conditionalEscapeString s = do - shouldEscape <- shouldEscapeString s +conditionalEscapeText :: PandocMonad m + => Text + -> Muse m Text +conditionalEscapeText s = do + shouldEscape <- shouldEscapeText s return $ if shouldEscape - then escapeString s + then escapeText s else s -- Expand Math and Cite before normalizing inline list @@ -425,23 +422,23 @@ normalizeInlineList (Str "" : xs) normalizeInlineList (x : Str "" : xs) = normalizeInlineList (x:xs) normalizeInlineList (Str x1 : Str x2 : xs) - = normalizeInlineList $ Str (x1 ++ x2) : xs + = normalizeInlineList $ Str (x1 <> x2) : xs normalizeInlineList (Emph x1 : Emph x2 : ils) - = normalizeInlineList $ Emph (x1 ++ x2) : ils + = normalizeInlineList $ Emph (x1 <> x2) : ils normalizeInlineList (Strong x1 : Strong x2 : ils) - = normalizeInlineList $ Strong (x1 ++ x2) : ils + = normalizeInlineList $ Strong (x1 <> x2) : ils normalizeInlineList (Strikeout x1 : Strikeout x2 : ils) - = normalizeInlineList $ Strikeout (x1 ++ x2) : ils + = normalizeInlineList $ Strikeout (x1 <> x2) : ils normalizeInlineList (Superscript x1 : Superscript x2 : ils) - = normalizeInlineList $ Superscript (x1 ++ x2) : ils + = normalizeInlineList $ Superscript (x1 <> x2) : ils normalizeInlineList (Subscript x1 : Subscript x2 : ils) - = normalizeInlineList $ Subscript (x1 ++ x2) : ils + = normalizeInlineList $ Subscript (x1 <> x2) : ils normalizeInlineList (SmallCaps x1 : SmallCaps x2 : ils) - = normalizeInlineList $ SmallCaps (x1 ++ x2) : ils + = normalizeInlineList $ SmallCaps (x1 <> x2) : ils normalizeInlineList (Code _ x1 : Code _ x2 : ils) - = normalizeInlineList $ Code nullAttr (x1 ++ x2) : ils + = normalizeInlineList $ Code nullAttr (x1 <> x2) : ils normalizeInlineList (RawInline f1 x1 : RawInline f2 x2 : ils) | f1 == f2 - = normalizeInlineList $ RawInline f1 (x1 ++ x2) : ils + = normalizeInlineList $ RawInline f1 (x1 <> x2) : ils -- Do not join Span's during normalization normalizeInlineList (x:xs) = x : normalizeInlineList xs normalizeInlineList [] = [] @@ -461,33 +458,41 @@ startsWithSpace _ = False endsWithSpace :: [Inline] -> Bool endsWithSpace [Space] = True endsWithSpace [SoftBreak] = True -endsWithSpace [Str s] = stringStartsWithSpace $ reverse s +endsWithSpace [Str s] = stringEndsWithSpace s endsWithSpace (_:xs) = endsWithSpace xs endsWithSpace [] = False -urlEscapeBrackets :: String -> String -urlEscapeBrackets (']':xs) = '%':'5':'D':urlEscapeBrackets xs -urlEscapeBrackets (x:xs) = x:urlEscapeBrackets xs -urlEscapeBrackets [] = [] +urlEscapeBrackets :: Text -> Text +urlEscapeBrackets = T.concatMap $ \c -> case c of + ']' -> "%5D" + _ -> T.singleton c -isHorizontalRule :: String -> Bool -isHorizontalRule s = length s >= 4 && all (== '-') s +isHorizontalRule :: Text -> Bool +isHorizontalRule s = T.length s >= 4 && T.all (== '-') s -stringStartsWithSpace :: String -> Bool -stringStartsWithSpace (x:_) = isSpace x -stringStartsWithSpace "" = False +stringStartsWithSpace :: Text -> Bool +stringStartsWithSpace = maybe False (isSpace . fst) . T.uncons + +stringEndsWithSpace :: Text -> Bool +stringEndsWithSpace = maybe False (isSpace . snd) . T.unsnoc fixOrEscape :: Bool -> Inline -> Bool -fixOrEscape sp (Str "-") = sp -fixOrEscape sp (Str s@('-':x:_)) = (sp && isSpace x) || isHorizontalRule s -fixOrEscape sp (Str ";") = not sp -fixOrEscape sp (Str (';':x:_)) = not sp && isSpace x -fixOrEscape _ (Str ">") = True -fixOrEscape _ (Str ('>':x:_)) = isSpace x -fixOrEscape sp (Str s) = (sp && (startsWithMarker isDigit s || - startsWithMarker isAsciiLower s || - startsWithMarker isAsciiUpper s)) - || stringStartsWithSpace s +fixOrEscape b (Str s) = fixOrEscapeStr b s + where + fixOrEscapeStr sp t = case T.uncons t of + Just ('-', xs) + | T.null xs -> sp + | otherwise -> (sp && isSpace (T.head xs)) || isHorizontalRule t + Just (';', xs) + | T.null xs -> not sp + | otherwise -> not sp && isSpace (T.head xs) + Just ('>', xs) + | T.null xs -> True + | otherwise -> isSpace (T.head xs) + _ -> (sp && (startsWithMarker isDigit s || + startsWithMarker isAsciiLower s || + startsWithMarker isAsciiUpper s)) + || stringStartsWithSpace s fixOrEscape _ Space = True fixOrEscape _ SoftBreak = True fixOrEscape _ _ = False @@ -496,8 +501,8 @@ inlineListStartsWithAlnum :: PandocMonad m => [Inline] -> Muse m Bool inlineListStartsWithAlnum (Str s:_) = do - esc <- shouldEscapeString s - return $ esc || isAlphaNum (head s) + esc <- shouldEscapeText s + return $ esc || isAlphaNum (T.head s) inlineListStartsWithAlnum _ = return False -- | Convert list of Pandoc inline elements to Muse @@ -527,7 +532,7 @@ renderInlineList (x:xs) = do , envNearAsterisks = False }) $ renderInlineList xs if start && fixOrEscape afterSpace x - then pure (text "<verbatim></verbatim>" <> r <> lst') + then pure (literal "<verbatim></verbatim>" <> r <> lst') else pure (r <> lst') -- | Normalize and convert list of Pandoc inline elements to Muse. @@ -551,23 +556,23 @@ inlineListToMuse' lst = do , envAfterSpace = afterSpace || not topLevel }) $ inlineListToMuse lst -emphasis :: PandocMonad m => String -> String -> [Inline] -> Muse m (Doc Text) +emphasis :: PandocMonad m => Text -> Text -> [Inline] -> Muse m (Doc Text) emphasis b e lst = do contents <- local (\env -> env { envInsideAsterisks = inAsterisks }) $ inlineListToMuse lst modify $ \st -> st { stUseTags = useTags } - return $ text b <> contents <> text e - where inAsterisks = last b == '*' || head e == '*' - useTags = last e /= '>' + return $ literal b <> contents <> literal e + where inAsterisks = T.last b == '*' || T.head e == '*' + useTags = T.last e /= '>' -- | Convert Pandoc inline element to Muse. inlineToMuse :: PandocMonad m => Inline -> Muse m (Doc Text) inlineToMuse (Str str) = do - escapedStr <- conditionalEscapeString $ replaceNewlines str - let useTags = isAlphaNum $ last escapedStr -- escapedStr is never empty because empty strings are escaped + escapedStr <- conditionalEscapeText $ replaceNewlines str + let useTags = isAlphaNum $ T.last escapedStr -- escapedStr is never empty because empty strings are escaped modify $ \st -> st { stUseTags = useTags } - return $ text escapedStr + return $ literal escapedStr inlineToMuse (Emph [Strong lst]) = do useTags <- gets stUseTags let lst' = normalizeInlineList lst @@ -625,15 +630,16 @@ inlineToMuse Cite {} = inlineToMuse (Code _ str) = do useTags <- gets stUseTags modify $ \st -> st { stUseTags = False } - return $ if useTags || null str || '=' `elem` str || isSpace (head str) || isSpace (last str) - then "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>" - else "=" <> text str <> "=" + return $ if useTags || T.null str || T.any (== '=') str + || isSpace (T.head str) || isSpace (T.last str) + then "<code>" <> literal (T.replace "</code>" "<</code><code>/code>" str) <> "</code>" + else "=" <> literal str <> "=" inlineToMuse Math{} = throwError $ PandocShouldNeverHappenError "Math should be expanded before normalization" inlineToMuse (RawInline (Format f) str) = do modify $ \st -> st { stUseTags = False } - return $ "<literal style=\"" <> text f <> "\">" <> text str <> "</literal>" + return $ "<literal style=\"" <> literal f <> "\">" <> literal str <> "</literal>" inlineToMuse LineBreak = do oneline <- asks envOneLine modify $ \st -> st { stUseTags = False } @@ -650,27 +656,27 @@ inlineToMuse (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> do modify $ \st -> st { stUseTags = False } - return $ "[[" <> text (escapeLink x) <> "]]" + return $ "[[" <> literal (escapeLink x) <> "]]" _ -> do contents <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse txt modify $ \st -> st { stUseTags = False } - return $ "[[" <> text (escapeLink src) <> "][" <> contents <> "]]" - where escapeLink lnk = if isImageUrl lnk then "URL:" ++ urlEscapeBrackets lnk else urlEscapeBrackets lnk + return $ "[[" <> literal (escapeLink src) <> "][" <> contents <> "]]" + where escapeLink lnk = if isImageUrl lnk then "URL:" <> urlEscapeBrackets lnk else urlEscapeBrackets lnk -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] - isImageUrl = (`elem` imageExtensions) . takeExtension -inlineToMuse (Image attr alt (source,'f':'i':'g':':':title)) = + isImageUrl = (`elem` imageExtensions) . takeExtension . T.unpack +inlineToMuse (Image attr alt (source,T.stripPrefix "fig:" -> Just title)) = inlineToMuse (Image attr alt (source,title)) inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do opts <- asks envOptions alt <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse inlines - title' <- if null title + title' <- if T.null title then if null inlines then return "" else return $ "[" <> alt <> "]" - else do s <- local (\env -> env { envInsideLinkDescription = True }) $ conditionalEscapeString title - return $ "[" <> text s <> "]" + else do s <- local (\env -> env { envInsideLinkDescription = True }) $ conditionalEscapeText title + return $ "[" <> literal s <> "]" let width = case dimension Width attr of - Just (Percent x) | isEnabled Ext_amuse opts -> " " ++ show (round x :: Integer) + Just (Percent x) | isEnabled Ext_amuse opts -> " " <> tshow (round x :: Integer) _ -> "" let leftalign = if "align-left" `elem` classes then " l" @@ -679,7 +685,7 @@ inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do then " r" else "" modify $ \st -> st { stUseTags = False } - return $ "[[" <> text (urlEscapeBrackets source ++ width ++ leftalign ++ rightalign) <> "]" <> title' <> "]" + return $ "[[" <> literal (urlEscapeBrackets source <> width <> leftalign <> rightalign) <> "]" <> title' <> "]" inlineToMuse (Note contents) = do -- add to notes in state notes <- gets stNotes @@ -687,19 +693,19 @@ inlineToMuse (Note contents) = do , stUseTags = False } n <- gets stNoteNum - let ref = show $ n + length notes - return $ "[" <> text ref <> "]" + let ref = tshow $ n + length notes + return $ "[" <> literal ref <> "]" inlineToMuse (Span (anchor,names,kvs) inlines) = do contents <- inlineListToMuse inlines let (contents', hasDir) = case lookup "dir" kvs of Just "rtl" -> ("<<<" <> contents <> ">>>", True) Just "ltr" -> (">>>" <> contents <> "<<<", True) _ -> (contents, False) - let anchorDoc = if null anchor + let anchorDoc = if T.null anchor then mempty - else text ('#':anchor) <> space + else literal ("#" <> anchor) <> space modify $ \st -> st { stUseTags = False } - return $ anchorDoc <> (if null inlines && not (null anchor) + return $ anchorDoc <> (if null inlines && not (T.null anchor) then mempty else (if null names then (if hasDir then contents' else "<class>" <> contents' <> "</class>") - else "<class name=\"" <> text (head names) <> "\">" <> contents' <> "</class>")) + else "<class name=\"" <> literal (head names) <> "\">" <> contents' <> "</class>")) diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 3d8bfbca7..a5ea4b641 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.ODT Copyright : Copyright (C) 2008-2019 John MacFarlane @@ -18,9 +19,9 @@ import Control.Monad.Except (catchError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Generics (everywhere', mkT) -import Data.List (isPrefixOf, intercalate) -import Data.Maybe (fromMaybe) +import Data.List (isPrefixOf) import qualified Data.Map as Map +import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) @@ -33,7 +34,7 @@ import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.DocLayout -import Text.Pandoc.Shared (stringify, pandocVersion) +import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, fixDisplayMath) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) @@ -89,7 +90,7 @@ pandocToODT opts doc@(Pandoc meta _) = do Nothing -> empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) - ,("manifest:full-path", fp) + ,("manifest:full-path", T.pack fp) ,("manifest:version", "1.2") ] let files = [ ent | ent <- filesInArchive archive, @@ -114,7 +115,7 @@ pandocToODT opts doc@(Pandoc meta _) = do let userDefinedMetaFields = [k | k <- Map.keys (unMeta meta) , k `notElem` ["title", "lang", "author" , "description", "subject", "keywords"]] - let escapedText = text . escapeStringForXML + let escapedText = text . T.unpack . escapeStringForXML let keywords = case lookupMeta "keywords" meta of Just (MetaList xs) -> map stringify xs _ -> [] @@ -136,17 +137,17 @@ pandocToODT opts doc@(Pandoc meta _) = do ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") ,("office:version","1.2")] ( inTags True "office:meta" [] $ - ( metaTag "meta:generator" ("Pandoc/" ++ pandocVersion) + ( metaTag "meta:generator" ("Pandoc/" <> pandocVersion) $$ metaTag "dc:title" (stringify title) $$ metaTag "dc:description" - (intercalate "\n" (map stringify $ + (T.intercalate "\n" (map stringify $ lookupMetaBlocks "description" meta)) $$ metaTag "dc:subject" (lookupMetaString "subject" meta) $$ - metaTag "meta:keyword" (intercalate ", " keywords) + metaTag "meta:keyword" (T.intercalate ", " keywords) $$ case lang of Just l -> metaTag "dc:language" (renderLang l) @@ -156,8 +157,8 @@ pandocToODT opts doc@(Pandoc meta _) = do $$ metaTag "dc:creator" a $$ metaTag "meta:creation-date" d $$ metaTag "dc:date" d - ) (formatTime defaultTimeLocale "%FT%XZ" utctime) - (intercalate "; " (map stringify authors)) + ) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) + (T.intercalate "; " (map stringify authors)) $$ vcat userDefinedMeta ) @@ -190,9 +191,9 @@ updateStyleWithLang (Just lang) arch = do addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) - = Attr n (langLanguage lang) + = Attr n (T.unpack $ langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (langRegion lang) + = Attr n (T.unpack $ langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements @@ -206,12 +207,12 @@ transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError return (100, 100) let dims = case (getDim Width, getDim Height) of - (Just w, Just h) -> [("width", show w), ("height", show h)] - (Just w@(Percent _), Nothing) -> [("rel-width", show w),("rel-height", "scale"),("width", show ptX ++ "pt"),("height", show ptY ++ "pt")] - (Nothing, Just h@(Percent _)) -> [("rel-width", "scale"),("rel-height", show h),("width", show ptX ++ "pt"),("height", show ptY ++ "pt")] - (Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")] - (Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)] - _ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")] + (Just w, Just h) -> [("width", tshow w), ("height", tshow h)] + (Just w@(Percent _), Nothing) -> [("rel-width", tshow w),("rel-height", "scale"),("width", tshow ptX <> "pt"),("height", tshow ptY <> "pt")] + (Nothing, Just h@(Percent _)) -> [("rel-width", "scale"),("rel-height", tshow h),("width", tshow ptX <> "pt"),("height", tshow ptY <> "pt")] + (Just w@(Inch i), Nothing) -> [("width", tshow w), ("height", tshow (i / ratio) <> "in")] + (Nothing, Just h@(Inch i)) -> [("width", tshow (i * ratio) <> "in"), ("height", tshow h)] + _ -> [("width", tshow ptX <> "pt"), ("height", tshow ptY <> "pt")] where ratio = ptX / ptY getDim dir = case dimension dir attr of @@ -220,16 +221,16 @@ transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError Nothing -> Nothing let newattr = (id', cls, dims) entries <- gets stEntries - let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) + let extension = maybe (takeExtension $ takeWhile (/='?') $ T.unpack src) T.unpack (mbMimeType >>= extensionFromMimeType) let newsrc = "Pictures/" ++ show (length entries) <.> extension let toLazy = B.fromChunks . (:[]) epochtime <- floor `fmap` lift P.getPOSIXTime let entry = toEntry newsrc epochtime $ toLazy img modify $ \st -> st{ stEntries = entry : entries } - return $ Image newattr lab (newsrc, t)) + return $ Image newattr lab (T.pack newsrc, t)) (\e -> do - report $ CouldNotFetchResource src (show e) + report $ CouldNotFetchResource src $ T.pack (show e) return $ Emph lab) transformPicMath _ (Math t math) = do @@ -257,7 +258,7 @@ transformPicMath _ (Math t math) = do ,("text:anchor-type","paragraph")] else [("draw:style-name","fr1") ,("text:anchor-type","as-char")]) $ - selfClosingTag "draw:object" [("xlink:href", dirname) + selfClosingTag "draw:object" [("xlink:href", T.pack dirname) , ("xlink:type", "simple") , ("xlink:show", "embed") , ("xlink:actuate", "onLoad")] diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 97ff86156..3f1d9701c 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.OOXML Copyright : Copyright (C) 2012-2019 John MacFarlane @@ -11,6 +12,7 @@ Functions common to OOXML writers (Docx and Powerpoint) -} module Text.Pandoc.Writers.OOXML ( mknode + , mktnode , nodename , toLazy , renderXml @@ -31,6 +33,7 @@ import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 import Data.Maybe (mapMaybe) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light as XML @@ -39,6 +42,9 @@ mknode :: Node t => String -> [(String,String)] -> t -> Element mknode s attrs = add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) +mktnode :: String -> [(String,String)] -> T.Text -> Element +mktnode s attrs = mknode s attrs . T.unpack + nodename :: String -> QName nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } where (name, prefix) = case break (==':') s of @@ -57,10 +63,10 @@ parseXml refArchive distArchive relpath = case findEntryByPath relpath refArchive `mplus` findEntryByPath relpath distArchive of Nothing -> throwError $ PandocSomeError $ - relpath ++ " missing in reference file" + T.pack relpath <> " missing in reference file" Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of Nothing -> throwError $ PandocSomeError $ - relpath ++ " corrupt in reference file" + T.pack relpath <> " corrupt in reference file" Just d -> return d -- Copied from Util diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index cf6f9a037..3f5c0d341 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Writers.OPML Copyright : Copyright (C) 2013-2019 John MacFarlane @@ -56,12 +57,12 @@ writeHtmlInlines ils = T.strip <$> writeHtml5String def (Pandoc nullMeta [Plain ils]) -- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT -showDateTimeRFC822 :: UTCTime -> String -showDateTimeRFC822 = formatTime defaultTimeLocale "%a, %d %b %Y %X %Z" +showDateTimeRFC822 :: UTCTime -> Text +showDateTimeRFC822 = T.pack . formatTime defaultTimeLocale "%a, %d %b %Y %X %Z" -convertDate :: [Inline] -> String +convertDate :: [Inline] -> Text convertDate ils = maybe "" showDateTimeRFC822 $ - parseTimeM True defaultTimeLocale "%F" =<< normalizeDate (stringify ils) + parseTimeM True defaultTimeLocale "%F" . T.unpack =<< normalizeDate (stringify ils) -- | Convert a Block to OPML. blockToOPML :: PandocMonad m => WriterOptions -> Block -> m (Doc Text) @@ -73,8 +74,8 @@ blockToOPML opts (Div (_,"section":_,_) (Header _ _ title : xs)) = do md <- if null blocks then return mempty else writeMarkdown def $ Pandoc nullMeta blocks - let attrs = ("text", T.unpack htmlIls) : - [("_note", T.unpack $ T.stripEnd md) | not (null blocks)] + let attrs = ("text", htmlIls) : + [("_note", T.stripEnd md) | not (null blocks)] rest' <- vcat <$> mapM (blockToOPML opts) rest return $ inTags True "outline" attrs rest' blockToOPML _ _ = return empty diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 9c6867797..58d4698a8 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -2,6 +2,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.OpenDocument Copyright : Copyright (C) 2008-2019 Andrea Rossato and John MacFarlane @@ -24,6 +25,7 @@ import Data.Maybe (fromMaybe) import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.BCP47 (Lang (..), parseBCP47) import Text.Pandoc.Class (PandocMonad, report, translateTerm, setTranslations, toLang) @@ -31,7 +33,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout -import Text.Pandoc.Shared (linesToPara) +import Text.Pandoc.Shared (linesToPara, tshow) import Text.Pandoc.Templates (renderTemplate) import qualified Text.Pandoc.Translations as Term (Term(Figure, Table)) import Text.Pandoc.Writers.Math @@ -56,7 +58,7 @@ data WriterState = , stParaStyles :: [Doc Text] , stListStyles :: [(Int, [Doc Text])] , stTextStyles :: Map.Map (Set.Set TextStyle) - (String, Doc Text) + (Text, Doc Text) , stTextStyleAttr :: Set.Set TextStyle , stIndentPara :: Int , stInDefinition :: Bool @@ -97,7 +99,7 @@ addParaStyle :: PandocMonad m => Doc Text -> OD m () addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } addTextStyle :: PandocMonad m - => Set.Set TextStyle -> (String, Doc Text) -> OD m () + => Set.Set TextStyle -> (Text, Doc Text) -> OD m () addTextStyle attrs i = modify $ \s -> s { stTextStyles = Map.insert attrs i (stTextStyles s) } @@ -130,10 +132,10 @@ inParagraphTags d = do else return [("text:style-name", "Text_20_body")] return $ inTags False "text:p" a d -inParagraphTagsWithStyle :: String -> Doc Text -> Doc Text +inParagraphTagsWithStyle :: Text -> Doc Text -> Doc Text inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] -inSpanTags :: String -> Doc Text -> Doc Text +inSpanTags :: Text -> Doc Text -> Doc Text inSpanTags s = inTags False "text:span" [("text:style-name",s)] withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a @@ -155,7 +157,7 @@ inTextStyle d = do Just (styleName, _) -> return $ inTags False "text:span" [("text:style-name",styleName)] d Nothing -> do - let styleName = "T" ++ show (Map.size styles + 1) + let styleName = "T" <> tshow (Map.size styles + 1) addTextStyle at (styleName, inTags False "style:style" [("style:name", styleName) @@ -184,11 +186,11 @@ formulaStyle mt = inTags False "style:style" ,("style:horizontal-rel", "paragraph-content") ,("style:wrap", "none")] -inHeaderTags :: PandocMonad m => Int -> String -> Doc Text -> OD m (Doc Text) +inHeaderTags :: PandocMonad m => Int -> Text -> Doc Text -> OD m (Doc Text) inHeaderTags i ident d = - return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) - , ("text:outline-level", show i)] - $ if null ident + return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" <> tshow i) + , ("text:outline-level", tshow i)] + $ if T.null ident then d else selfClosingTag "text:bookmark-start" [ ("text:name", ident) ] <> d <> @@ -198,18 +200,19 @@ inQuotes :: QuoteType -> Doc Text -> Doc Text inQuotes SingleQuote s = char '\8216' <> s <> char '\8217' inQuotes DoubleQuote s = char '\8220' <> s <> char '\8221' -handleSpaces :: String -> Doc Text -handleSpaces s - | ( ' ':_) <- s = genTag s - | ('\t':x) <- s = selfClosingTag "text:tab" [] <> rm x - | otherwise = rm s - where - genTag = span (==' ') >>> tag . length *** rm >>> uncurry (<>) - tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", show n)] - rm ( ' ':xs) = char ' ' <> genTag xs - rm ('\t':xs) = selfClosingTag "text:tab" [] <> genTag xs - rm ( x:xs) = char x <> rm xs - rm [] = empty +handleSpaces :: Text -> Doc Text +handleSpaces s = case T.uncons s of + Just (' ', _) -> genTag s + Just ('\t',x) -> selfClosingTag "text:tab" [] <> rm x + _ -> rm s + where + genTag = T.span (==' ') >>> tag . T.length *** rm >>> uncurry (<>) + tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", tshow n)] + rm t = case T.uncons t of + Just ( ' ',xs) -> char ' ' <> genTag xs + Just ('\t',xs) -> selfClosingTag "text:tab" [] <> genTag xs + Just ( x,xs) -> char x <> rm xs + Nothing -> empty -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -234,7 +237,7 @@ writeOpenDocument opts (Pandoc meta blocks) = do map snd (sortBy (flip (comparing fst)) ( Map.elems (stTextStyles s))) listStyle (n,l) = inTags True "text:list-style" - [("style:name", "L" ++ show n)] (vcat l) + [("style:name", "L" <> tshow n)] (vcat l) let listStyles = map listStyle (stListStyles s) let automaticStyles = vcat $ reverse $ styles ++ listStyles let context = defField "body" body @@ -247,17 +250,17 @@ writeOpenDocument opts (Pandoc meta blocks) = do Just tpl -> renderTemplate tpl context withParagraphStyle :: PandocMonad m - => WriterOptions -> String -> [Block] -> OD m (Doc Text) + => WriterOptions -> Text -> [Block] -> OD m (Doc Text) withParagraphStyle o s (b:bs) | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b where go i = (<>) i <$> withParagraphStyle o s bs withParagraphStyle _ _ [] = return empty -inPreformattedTags :: PandocMonad m => String -> OD m (Doc Text) +inPreformattedTags :: PandocMonad m => Text -> OD m (Doc Text) inPreformattedTags s = do n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")] - return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s + return . inParagraphTagsWithStyle ("P" <> tshow n) . handleSpaces $ s orderedListToOpenDocument :: PandocMonad m => WriterOptions -> Int -> [[Block]] -> OD m (Doc Text) @@ -269,7 +272,7 @@ orderedItemToOpenDocument :: PandocMonad m => WriterOptions -> Int -> [Block] -> OD m (Doc Text) orderedItemToOpenDocument o n bs = vcat <$> mapM go bs where go (OrderedList a l) = newLevel a l - go (Para l) = inParagraphTagsWithStyle ("P" ++ show n) <$> + go (Para l) = inParagraphTagsWithStyle ("P" <> tshow n) <$> inlinesToOpenDocument o l go b = blockToOpenDocument o b newLevel a l = do @@ -300,11 +303,11 @@ bulletListToOpenDocument o b = do ln <- (+) 1 . length <$> gets stListStyles (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln modify $ \s -> s { stListStyles = ns : stListStyles s } - is <- listItemsToOpenDocument ("P" ++ show pn) o b - return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is + is <- listItemsToOpenDocument ("P" <> tshow pn) o b + return $ inTags True "text:list" [("text:style-name", "L" <> tshow ln)] is listItemsToOpenDocument :: PandocMonad m - => String -> WriterOptions -> [[Block]] -> OD m (Doc Text) + => Text -> WriterOptions -> [[Block]] -> OD m (Doc Text) listItemsToOpenDocument s o is = vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is @@ -326,7 +329,7 @@ inBlockQuote o i (b:bs) ni <- paraStyle [("style:parent-style-name","Quotations")] go =<< inBlockQuote o ni (map plainToPara l) - | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l + | Para l <- b = go =<< inParagraphTagsWithStyle ("P" <> tshow i) <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b where go block = ($$) block <$> inBlockQuote o i bs inBlockQuote _ _ [] = resetIndent >> return empty @@ -341,7 +344,7 @@ blockToOpenDocument o bs | Plain b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b - | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs + | Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] <- bs = figure attr c s t | Para b <- bs = if null b && not (isEnabled Ext_empty_paragraphs o) @@ -362,7 +365,7 @@ blockToOpenDocument o bs | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]) | RawBlock f s <- bs = if f == Format "opendocument" - then return $ text s + then return $ text $ T.unpack s else do report $ BlockNotRendered bs return empty @@ -373,21 +376,21 @@ blockToOpenDocument o bs r <- vcat <$> mapM (deflistItemToOpenDocument o) b setInDefinitionList False return r - preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (lines s) + preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (T.lines s) mkBlockQuote b = do increaseIndent i <- paraStyle [("style:parent-style-name","Quotations")] inBlockQuote o i (map plainToPara b) orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a - inTags True "text:list" [ ("text:style-name", "L" ++ show ln)] + inTags True "text:list" [ ("text:style-name", "L" <> tshow ln)] <$> orderedListToOpenDocument o pn b table c a w h r = do tn <- length <$> gets stTableStyles pn <- length <$> gets stParaStyles let genIds = map chr [65..] - name = "Table" ++ show (tn + 1) + name = "Table" <> tshow (tn + 1) columnIds = zip genIds w - mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name ++ "." ++ [fst n])] + mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name <> "." <> T.singleton (fst n))] columns = map mkColumn columnIds paraHStyles = paraTableStyles "Heading" pn a paraStyles = paraTableStyles "Contents" (pn + length (newPara paraHStyles)) a @@ -434,36 +437,36 @@ numberedFigureCaption caption = do capterm <- translateTerm Term.Figure return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption -numberedCaption :: String -> String -> String -> Int -> Doc Text -> Doc Text +numberedCaption :: Text -> Text -> Text -> Int -> Doc Text -> Doc Text numberedCaption style term name num caption = - let t = text term + let t = text $ T.unpack term r = num - 1 - s = inTags False "text:sequence" [ ("text:ref-name", "ref" ++ name ++ show r), + s = inTags False "text:sequence" [ ("text:ref-name", "ref" <> name <> tshow r), ("text:name", name), - ("text:formula", "ooow:" ++ name ++ "+1"), + ("text:formula", "ooow:" <> name <> "+1"), ("style:num-format", "1") ] $ text $ show num c = text ": " in inParagraphTagsWithStyle style $ hcat [ t, text " ", s, c, caption ] -unNumberedCaption :: Monad m => String -> Doc Text -> OD m (Doc Text) +unNumberedCaption :: Monad m => Text -> Doc Text -> OD m (Doc Text) unNumberedCaption style caption = return $ inParagraphTagsWithStyle style caption colHeadsToOpenDocument :: PandocMonad m - => WriterOptions -> [String] -> [[Block]] + => WriterOptions -> [Text] -> [[Block]] -> OD m (Doc Text) colHeadsToOpenDocument o ns hs = inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o "TableHeaderRowCell") (zip ns hs) tableRowToOpenDocument :: PandocMonad m - => WriterOptions -> [String] -> [[Block]] + => WriterOptions -> [Text] -> [[Block]] -> OD m (Doc Text) tableRowToOpenDocument o ns cs = inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o "TableRowCell") (zip ns cs) tableItemToOpenDocument :: PandocMonad m - => WriterOptions -> String -> (String,[Block]) + => WriterOptions -> Text -> (Text,[Block]) -> OD m (Doc Text) tableItemToOpenDocument o s (n,i) = let a = [ ("table:style-name" , s ) @@ -520,7 +523,7 @@ inlineToOpenDocument o ils inlinesToOpenDocument o Cite _ l -> inlinesToOpenDocument o l RawInline f s -> if f == Format "opendocument" - then return $ text s + then return $ text $ T.unpack s else do report $ InlineNotRendered ils return empty @@ -544,7 +547,7 @@ inlineToOpenDocument o ils getDims (("rel-height", w):xs) = ("style:rel-height", w) : getDims xs getDims (_:xs) = getDims xs return $ inTags False "draw:frame" - (("draw:name", "img" ++ show id') : getDims kvs) $ + (("draw:name", "img" <> tshow id') : getDims kvs) $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") , ("xlink:show" , "embed" ) @@ -552,7 +555,7 @@ inlineToOpenDocument o ils mkNote l = do n <- length <$> gets stNotes let footNote t = inTags False "text:note" - [ ("text:id" , "ftn" ++ show n) + [ ("text:id" , "ftn" <> tshow n) , ("text:note-class", "footnote" )] $ inTagsSimple "text:note-citation" (text . show $ n + 1) <> inTagsSimple "text:note-body" t @@ -563,10 +566,10 @@ inlineToOpenDocument o ils bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc Text])) bulletListStyle l = do let doStyles i = inTags True "text:list-level-style-bullet" - [ ("text:level" , show (i + 1) ) - , ("text:style-name" , "Bullet_20_Symbols") - , ("style:num-suffix", "." ) - , ("text:bullet-char", [bulletList !! i] ) + [ ("text:level" , tshow (i + 1)) + , ("text:style-name" , "Bullet_20_Symbols" ) + , ("style:num-suffix", "." ) + , ("text:bullet-char", T.singleton (bulletList !! i)) ] (listLevelStyle (1 + i)) bulletList = map chr $ cycle [8226,9702,9642] listElStyle = map doStyles [0..9] @@ -587,16 +590,16 @@ orderedListLevelStyle (s,n, d) (l,ls) = LowerRoman -> "i" _ -> "1" listStyle = inTags True "text:list-level-style-number" - ([ ("text:level" , show $ 1 + length ls ) + ([ ("text:level" , tshow $ 1 + length ls ) , ("text:style-name" , "Numbering_20_Symbols") , ("style:num-format", format ) - , ("text:start-value", show s ) + , ("text:start-value", tshow s ) ] ++ suffix) (listLevelStyle (1 + length ls)) in (l, ls ++ [listStyle]) listLevelStyle :: Int -> Doc Text listLevelStyle i = - let indent = show (0.25 + (0.25 * fromIntegral i :: Double)) in + let indent = tshow (0.25 + (0.25 * fromIntegral i :: Double)) in inTags True "style:list-level-properties" [ ("text:list-level-position-and-space-mode", "label-alignment") @@ -604,27 +607,27 @@ listLevelStyle i = ] $ selfClosingTag "style:list-level-label-alignment" [ ("text:label-followed-by", "listtab") - , ("text:list-tab-stop-position", indent ++ "in") + , ("text:list-tab-stop-position", indent <> "in") , ("fo:text-indent", "-0.25in") - , ("fo:margin-left", indent ++ "in") + , ("fo:margin-left", indent <> "in") ] tableStyle :: Int -> [(Char,Double)] -> Doc Text tableStyle num wcs = - let tableId = "Table" ++ show (num + 1) + let tableId = "Table" <> tshow (num + 1) table = inTags True "style:style" [("style:name", tableId) ,("style:family", "table")] $ selfClosingTag "style:table-properties" [("table:align" , "center")] colStyle (c,0) = selfClosingTag "style:style" - [ ("style:name" , tableId ++ "." ++ [c]) + [ ("style:name" , tableId <> "." <> T.singleton c) , ("style:family", "table-column" )] colStyle (c,w) = inTags True "style:style" - [ ("style:name" , tableId ++ "." ++ [c]) + [ ("style:name" , tableId <> "." <> T.singleton c) , ("style:family", "table-column" )] $ selfClosingTag "style:table-column-properties" - [("style:rel-column-width", printf "%d*" (floor $ w * 65535 :: Integer))] + [("style:rel-column-width", T.pack $ printf "%d*" (floor $ w * 65535 :: Integer))] headerRowCellStyle = inTags True "style:style" [ ("style:name" , "TableHeaderRowCell") , ("style:family", "table-cell" )] $ @@ -641,15 +644,15 @@ tableStyle num wcs = columnStyles = map colStyle wcs in cellStyles $$ table $$ vcat columnStyles -paraStyle :: PandocMonad m => [(String,String)] -> OD m Int +paraStyle :: PandocMonad m => [(Text,Text)] -> OD m Int paraStyle attrs = do pn <- (+) 1 . length <$> gets stParaStyles i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara b <- gets stInDefinition t <- gets stTight - let styleAttr = [ ("style:name" , "P" ++ show pn) + let styleAttr = [ ("style:name" , "P" <> tshow pn) , ("style:family" , "paragraph" )] - indentVal = flip (++) "in" . show $ if b then max 0.5 i else i + indentVal = flip (<>) "in" . tshow $ if b then max 0.5 i else i tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] @@ -659,30 +662,30 @@ paraStyle attrs = do , ("fo:text-indent" , "0in" ) , ("style:auto-text-indent" , "false" )] else [] - attributes = indent ++ tight + attributes = indent <> tight paraProps = if null attributes then mempty else selfClosingTag "style:paragraph-properties" attributes - addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps + addParaStyle $ inTags True "style:style" (styleAttr <> attrs) paraProps return pn paraListStyle :: PandocMonad m => Int -> OD m Int paraListStyle l = paraStyle [("style:parent-style-name","Text_20_body") - ,("style:list-style-name", "L" ++ show l )] + ,("style:list-style-name", "L" <> tshow l)] -paraTableStyles :: String -> Int -> [Alignment] -> [(String, Doc Text)] +paraTableStyles :: Text -> Int -> [Alignment] -> [(Text, Doc Text)] paraTableStyles _ _ [] = [] paraTableStyles t s (a:xs) | AlignRight <- a = ( pName s, res s "end" ) : paraTableStyles t (s + 1) xs | AlignCenter <- a = ( pName s, res s "center") : paraTableStyles t (s + 1) xs - | otherwise = ("Table_20_" ++ t, empty ) : paraTableStyles t s xs - where pName sn = "P" ++ show (sn + 1) + | otherwise = ("Table_20_" <> t, empty ) : paraTableStyles t s xs + where pName sn = "P" <> tshow (sn + 1) res sn x = inTags True "style:style" [ ("style:name" , pName sn ) , ("style:family" , "paragraph" ) - , ("style:parent-style-name", "Table_20_" ++ t)] $ + , ("style:parent-style-name", "Table_20_" <> t)] $ selfClosingTag "style:paragraph-properties" [ ("fo:text-align", x) , ("style:justify-single-word", "false")] @@ -697,9 +700,9 @@ data TextStyle = Italic | Language Lang deriving ( Eq,Ord ) -textStyleAttr :: Map.Map String String +textStyleAttr :: Map.Map Text Text -> TextStyle - -> Map.Map String String + -> Map.Map Text Text textStyleAttr m s | Italic <- s = Map.insert "fo:font-style" "italic" . Map.insert "style:font-style-asian" "italic" . diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 3c4f1b237..e21d3f8c2 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Org - Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> + Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> 2010-2019 John MacFarlane <jgm@berkeley.edu> 2016-2019 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> License : GNU GPL, version 2 or above @@ -18,9 +18,10 @@ Org-Mode: <http://orgmode.org> module Text.Pandoc.Writers.Org (writeOrg) where import Prelude import Control.Monad.State.Strict -import Data.Char (isAlphaNum, toLower) -import Data.List (intersect, intersperse, isPrefixOf, partition, transpose) +import Data.Char (isAlphaNum) +import Data.List (intersect, intersperse, partition, transpose) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -82,7 +83,7 @@ noteToOrg num note = do return $ hang (length marker) (text marker) contents -- | Escape special characters for Org. -escapeString :: String -> String +escapeString :: Text -> Text escapeString = escapeStringUsing $ [ ('\x2014',"---") , ('\x2013',"--") @@ -101,10 +102,10 @@ blockToOrg :: PandocMonad m blockToOrg Null = return empty blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do contents <- blockListToOrg bs - let drawerNameTag = ":" <> text cls <> ":" + let drawerNameTag = ":" <> literal cls <> ":" let keys = vcat $ map (\(k,v) -> - ":" <> text k <> ":" - <> space <> text v) kvs + ":" <> literal k <> ":" + <> space <> literal v) kvs let drawerEndTag = text ":END:" return $ drawerNameTag $$ cr $$ keys $$ blankline $$ contents $$ @@ -115,28 +116,29 @@ blockToOrg (Div (ident, classes, kv) bs) = do -- if one class looks like the name of a greater block then output as such: -- The ID, if present, is added via the #+NAME keyword; other classes and -- key-value pairs are kept as #+ATTR_HTML attributes. - let isGreaterBlockClass = (`elem` ["center", "quote"]) . map toLower + let isGreaterBlockClass = (`elem` ["center", "quote"]) . T.toLower (blockTypeCand, classes') = partition isGreaterBlockClass classes return $ case blockTypeCand of (blockType:classes'') -> blankline $$ attrHtml (ident, classes'' <> classes', kv) $$ - "#+BEGIN_" <> text blockType $$ contents $$ - "#+END_" <> text blockType $$ blankline + "#+BEGIN_" <> literal blockType $$ contents $$ + "#+END_" <> literal blockType $$ blankline _ -> -- fallback with id: add id as an anchor if present, discard classes and -- key-value pairs, unwrap the content. - let contents' = if not (null ident) - then "<<" <> text ident <> ">>" $$ contents + let contents' = if not (T.null ident) + then "<<" <> literal ident <> ">>" $$ contents else contents in blankline $$ contents' $$ blankline blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure -blockToOrg (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else ("#+CAPTION: " <>) `fmap` inlineListToOrg txt - img <- inlineToOrg (Image attr txt (src,tit)) - return $ capt $$ img $$ blankline +blockToOrg (Para [Image attr txt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt = do + capt <- if null txt + then return empty + else ("#+CAPTION: " <>) `fmap` inlineListToOrg txt + img <- inlineToOrg (Image attr txt (src,tit)) + return $ capt $$ img $$ blankline blockToOrg (Para inlines) = do contents <- inlineListToOrg inlines return $ contents <> blankline @@ -153,9 +155,9 @@ blockToOrg (LineBlock lns) = do nest 2 contents $$ "#+END_VERSE" <> blankline blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ - nest 2 (text str) $$ "#+END_HTML" $$ blankline + nest 2 (literal str) $$ "#+END_HTML" $$ blankline blockToOrg b@(RawBlock f str) - | isRawFormat f = return $ text str + | isRawFormat f = return $ literal str | otherwise = do report $ BlockNotRendered b return empty @@ -168,17 +170,17 @@ blockToOrg (Header level attr inlines) = do else cr <> nest (level + 1) (propertiesDrawer attr) return $ headerStr <> " " <> contents <> drawerStr <> blankline blockToOrg (CodeBlock (_,classes,kvs) str) = do - let startnum = maybe "" (\x -> ' ' : trimr x) $ lookup "startFrom" kvs + let startnum = maybe "" (\x -> " " <> trimr x) $ lookup "startFrom" kvs let numberlines = if "numberLines" `elem` classes then if "continuedSourceBlock" `elem` classes - then " +n" ++ startnum - else " -n" ++ startnum + then " +n" <> startnum + else " -n" <> startnum else "" let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers let (beg, end) = case at of - [] -> ("#+BEGIN_EXAMPLE" ++ numberlines, "#+END_EXAMPLE") - (x:_) -> ("#+BEGIN_SRC " ++ x ++ numberlines, "#+END_SRC") - return $ text beg $$ nest 2 (text str) $$ text end $$ blankline + [] -> ("#+BEGIN_EXAMPLE" <> numberlines, "#+END_EXAMPLE") + (x:_) -> ("#+BEGIN_SRC " <> x <> numberlines, "#+END_SRC") + return $ literal beg $$ nest 2 (literal str) $$ text end $$ blankline blockToOrg (BlockQuote blocks) = do contents <- blockListToOrg blocks return $ blankline $$ "#+BEGIN_QUOTE" $$ @@ -225,9 +227,9 @@ blockToOrg (OrderedList (start, _, delim) items) = do x -> x let markers = take (length items) $ orderedListMarkers (start, Decimal, delim') - let maxMarkerLength = maximum $ map length markers - let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') markers + let maxMarkerLength = maximum $ map T.length markers + let markers' = map (\m -> let s = maxMarkerLength - T.length m + in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToOrg markers' items -- ensure that sublists have preceding blank line return $ blankline $$ @@ -249,12 +251,12 @@ bulletListItemToOrg items = do -- | Convert ordered list item (a list of blocks) to Org. orderedListItemToOrg :: PandocMonad m - => String -- ^ marker for list item + => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> Org m (Doc Text) orderedListItemToOrg marker items = do contents <- blockListToOrg items - return $ hang (length marker + 1) (text marker <> space) contents $$ + return $ hang (T.length marker + 1) (literal marker <> space) contents $$ if endsWithPlain items then cr else blankline @@ -276,25 +278,25 @@ propertiesDrawer (ident, classes, kv) = let drawerStart = text ":PROPERTIES:" drawerEnd = text ":END:" - kv' = if classes == mempty then kv else ("CLASS", unwords classes):kv + kv' = if classes == mempty then kv else ("CLASS", T.unwords classes):kv kv'' = if ident == mempty then kv' else ("CUSTOM_ID", ident):kv' properties = vcat $ map kvToOrgProperty kv'' in drawerStart <> cr <> properties <> cr <> drawerEnd where - kvToOrgProperty :: (String, String) -> Doc Text + kvToOrgProperty :: (Text, Text) -> Doc Text kvToOrgProperty (key, value) = - text ":" <> text key <> text ": " <> text value <> cr + text ":" <> literal key <> text ": " <> literal value <> cr attrHtml :: Attr -> Doc Text attrHtml ("" , [] , []) = mempty attrHtml (ident, classes, kvs) = let - name = if null ident then mempty else "#+NAME: " <> text ident <> cr + name = if T.null ident then mempty else "#+NAME: " <> literal ident <> cr keyword = "#+ATTR_HTML" - classKv = ("class", unwords classes) + classKv = ("class", T.unwords classes) kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs) - in name <> keyword <> ": " <> text (unwords kvStrings) <> cr + in name <> keyword <> ": " <> literal (T.unwords kvStrings) <> cr -- | Convert list of Pandoc block elements to Org. blockListToOrg :: PandocMonad m @@ -322,7 +324,7 @@ inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst) -- | Convert Pandoc inline element to Org. inlineToOrg :: PandocMonad m => Inline -> Org m (Doc Text) inlineToOrg (Span (uid, [], []) []) = - return $ "<<" <> text uid <> ">>" + return $ "<<" <> literal uid <> ">>" inlineToOrg (Span _ lst) = inlineListToOrg lst inlineToOrg (Emph lst) = do @@ -348,15 +350,15 @@ inlineToOrg (Quoted DoubleQuote lst) = do contents <- inlineListToOrg lst return $ "\"" <> contents <> "\"" inlineToOrg (Cite _ lst) = inlineListToOrg lst -inlineToOrg (Code _ str) = return $ "=" <> text str <> "=" -inlineToOrg (Str str) = return . text $ escapeString str +inlineToOrg (Code _ str) = return $ "=" <> literal str <> "=" +inlineToOrg (Str str) = return . literal $ escapeString str inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then "$" <> text str <> "$" - else "$$" <> text str <> "$$" + then "$" <> literal str <> "$" + else "$$" <> literal str <> "$$" inlineToOrg il@(RawInline f str) - | isRawFormat f = return $ text str + | isRawFormat f = return $ literal str | otherwise = do report $ InlineNotRendered il return empty @@ -371,39 +373,38 @@ inlineToOrg SoftBreak = do inlineToOrg (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink - return $ "[[" <> text (orgPath x) <> "]]" + return $ "[[" <> literal (orgPath x) <> "]]" _ -> do contents <- inlineListToOrg txt - return $ "[[" <> text (orgPath src) <> "][" <> contents <> "]]" + return $ "[[" <> literal (orgPath src) <> "][" <> contents <> "]]" inlineToOrg (Image _ _ (source, _)) = - return $ "[[" <> text (orgPath source) <> "]]" + return $ "[[" <> literal (orgPath source) <> "]]" inlineToOrg (Note contents) = do -- add to notes in state notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ length notes + 1 - return $ "[fn:" <> text ref <> "]" + let ref = tshow $ length notes + 1 + return $ "[fn:" <> literal ref <> "]" -orgPath :: String -> String -orgPath src = - case src of - [] -> mempty -- wiki link - ('#':_) -> src -- internal link - _ | isUrl src -> src - _ | isFilePath src -> src - _ -> "file:" <> src - where - isFilePath :: String -> Bool - isFilePath cs = any (`isPrefixOf` cs) ["/", "./", "../", "file:"] +orgPath :: Text -> Text +orgPath src = case T.uncons src of + Nothing -> "" -- wiki link + Just ('#', _) -> src -- internal link + _ | isUrl src -> src + _ | isFilePath src -> src + _ -> "file:" <> src + where + isFilePath :: Text -> Bool + isFilePath cs = any (`T.isPrefixOf` cs) ["/", "./", "../", "file:"] - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) + isUrl :: Text -> Bool + isUrl cs = + let (scheme, path) = T.break (== ':') cs + in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme + && not (T.null path) -- | Translate from pandoc's programming language identifiers to those used by -- org-mode. -pandocLangToOrg :: String -> String +pandocLangToOrg :: Text -> Text pandocLangToOrg cs = case cs of "c" -> "C" @@ -414,7 +415,7 @@ pandocLangToOrg cs = _ -> cs -- | List of language identifiers recognized by org-mode. -orgLangIdentifiers :: [String] +orgLangIdentifiers :: [Text] orgLangIdentifiers = [ "asymptote", "awk", "C", "C++", "clojure", "css", "d", "ditaa", "dot" , "calc", "emacs-lisp", "fortran", "gnuplot", "haskell", "java", "js" diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 58f230a9d..344a5564a 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -136,7 +136,7 @@ data MediaInfo = MediaInfo { mInfoFilePath :: FilePath , mInfoLocalId :: Int , mInfoGlobalId :: Int , mInfoMimeType :: Maybe MimeType - , mInfoExt :: Maybe String + , mInfoExt :: Maybe T.Text , mInfoCaption :: Bool } deriving (Show, Eq) @@ -159,16 +159,20 @@ runP env st p = evalStateT (runReaderT p env) st -------------------------------------------------------------------- -monospaceFont :: Monad m => P m String +findAttrText :: QName -> Element -> Maybe T.Text +findAttrText n = fmap T.pack . findAttr n + +monospaceFont :: Monad m => P m T.Text monospaceFont = do vars <- writerVariables <$> asks envOpts case lookupContext "monofont" vars of - Just s -> return (T.unpack s) + Just s -> return s Nothing -> return "Courier" +-- Kept as string for XML.Light fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)] fontSizeAttributes RunProps { rPropForceSize = Just sz } = - return [("sz", (show $ sz * 100))] + return [("sz", show $ sz * 100)] fontSizeAttributes _ = return [] copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive @@ -177,7 +181,8 @@ copyFileToArchive arch fp = do distArchive <- asks envDistArchive case findEntryByPath fp refArchive `mplus` findEntryByPath fp distArchive of Nothing -> throwError $ PandocSomeError - $ fp ++ " missing in reference file" + $ T.pack + $ fp <> " missing in reference file" Just e -> return $ addEntryToArchive e arch alwaysInheritedPatterns :: [Pattern] @@ -196,7 +201,7 @@ alwaysInheritedPatterns = -- We only look for these under special conditions contingentInheritedPatterns :: Presentation -> [Pattern] -contingentInheritedPatterns pres = [] ++ +contingentInheritedPatterns pres = [] <> if presHasSpeakerNotes pres then map compile [ "ppt/notesMasters/notesMaster*.xml" , "ppt/notesMasters/_rels/notesMaster*.xml.rels" @@ -207,7 +212,7 @@ contingentInheritedPatterns pres = [] ++ inheritedPatterns :: Presentation -> [Pattern] inheritedPatterns pres = - alwaysInheritedPatterns ++ contingentInheritedPatterns pres + alwaysInheritedPatterns <> contingentInheritedPatterns pres patternToFilePaths :: PandocMonad m => Pattern -> P m [FilePath] patternToFilePaths pat = do @@ -248,8 +253,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do unless (null missingFiles) (throwError $ PandocSomeError $ - "The following required files are missing:\n" ++ - (unlines $ map (" " ++) missingFiles) + "The following required files are missing:\n" <> + (T.unlines $ map (T.pack . (" " <>)) missingFiles) ) newArch' <- foldM copyFileToArchive emptyArchive filePaths @@ -276,11 +281,11 @@ presentationToArchiveP p@(Presentation docProps slides) = do contentTypesEntry <- presentationToContentTypes p >>= contentTypesToEntry -- fold everything into our inherited archive and return it. return $ foldr addEntryToArchive newArch' $ - slideEntries ++ - slideRelEntries ++ - spkNotesEntries ++ - spkNotesRelEntries ++ - mediaEntries ++ + slideEntries <> + slideRelEntries <> + spkNotesEntries <> + spkNotesRelEntries <> + mediaEntries <> [contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry, presEntry, presRelsEntry, viewPropsEntry] @@ -352,11 +357,11 @@ getLayout layout = do distArchive <- asks envDistArchive parseXml refArchive distArchive layoutpath -shapeHasId :: NameSpaces -> String -> Element -> Bool +shapeHasId :: NameSpaces -> T.Text -> Element -> Bool shapeHasId ns ident element | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr - , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = + , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr = nm == ident | otherwise = False @@ -397,7 +402,7 @@ getShapeDimensions ns element | otherwise = Nothing -getMasterShapeDimensionsById :: String +getMasterShapeDimensionsById :: T.Text -> Element -> Maybe ((Integer, Integer), (Integer, Integer)) getMasterShapeDimensionsById ident master = do @@ -422,7 +427,7 @@ getContentShapeSize ns layout master Nothing -> do let mbSz = findChild (elemName ns "p" "nvSpPr") sp >>= findChild (elemName ns "p" "cNvPr") >>= - findAttr (QName "id" Nothing Nothing) >>= + findAttrText (QName "id" Nothing Nothing) >>= flip getMasterShapeDimensionsById master case mbSz of Just sz' -> return sz' @@ -436,7 +441,7 @@ getContentShapeSize _ _ _ = throwError $ buildSpTree :: NameSpaces -> Element -> [Element] -> Element buildSpTree ns spTreeElem newShapes = emptySpTreeElem { elContent = newContent } - where newContent = elContent emptySpTreeElem ++ map Elem newShapes + where newContent = elContent emptySpTreeElem <> map Elem newShapes emptySpTreeElem = spTreeElem { elContent = filter fn (elContent spTreeElem) } fn :: Content -> Bool fn (Elem e) = isElem ns "p" "nvGrpSpPr" e || @@ -506,8 +511,8 @@ registerMedia fp caption = do [] -> 0 ids -> maximum ids - (imgBytes, mbMt) <- P.fetchItem fp - let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ '.':x)) + (imgBytes, mbMt) <- P.fetchItem $ T.pack fp + let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ "." <> x)) <|> case imageType imgBytes of Just Png -> Just ".png" @@ -546,11 +551,11 @@ registerMedia fp caption = do makeMediaEntry :: PandocMonad m => MediaInfo -> P m Entry makeMediaEntry mInfo = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$> asks envUTCTime - (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo) + (imgBytes, _) <- P.fetchItem (T.pack $ mInfoFilePath mInfo) let ext = case mInfoExt mInfo of Just e -> e Nothing -> "" - let fp = "ppt/media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext + let fp = "ppt/media/image" <> (show $ mInfoGlobalId mInfo) <> T.unpack ext return $ toEntry fp epochtime $ BL.fromStrict imgBytes makeMediaEntries :: PandocMonad m => P m [Entry] @@ -642,7 +647,7 @@ createCaption contentShapeDimensions paraElements = do elements <- mapM paragraphToElement [para] let ((x, y), (cx, cy)) = contentShapeDimensions let txBody = mknode "p:txBody" [] $ - [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> elements return $ mknode "p:sp" [] [ mknode "p:nvSpPr" [] [ mknode "p:cNvPr" [("id","1"), ("name","TextBox 3")] () @@ -675,7 +680,7 @@ makePicElements layout picProps mInfo alt = do (pageWidth, pageHeight) <- asks envPresentationSize -- hasHeader <- asks envSlideHasHeader let hasCaption = mInfoCaption mInfo - (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo) + (imgBytes, _) <- P.fetchItem (T.pack $ mInfoFilePath mInfo) let (pxX, pxY) = case imageSize opts imgBytes of Right sz -> sizeInPixels $ sz Left _ -> sizeInPixels $ def @@ -707,14 +712,14 @@ makePicElements layout picProps mInfo alt = do cNvPr <- case picPropLink picProps of Just link -> do idNum <- registerLink link return $ mknode "p:cNvPr" cNvPrAttr $ - mknode "a:hlinkClick" [("r:id", "rId" ++ show idNum)] () + mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] () Nothing -> return $ mknode "p:cNvPr" cNvPrAttr () let nvPicPr = mknode "p:nvPicPr" [] [ cNvPr , cNvPicPr , mknode "p:nvPr" [] ()] let blipFill = mknode "p:blipFill" [] - [ mknode "a:blip" [("r:embed", "rId" ++ (show $ mInfoLocalId mInfo))] () + [ mknode "a:blip" [("r:embed", "rId" <> (show $ mInfoLocalId mInfo))] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] let xfrm = mknode "a:xfrm" [] @@ -746,23 +751,23 @@ paraElemToElements :: PandocMonad m => ParaElem -> P m [Element] paraElemToElements Break = return [mknode "a:br" [] ()] paraElemToElements (Run rpr s) = do sizeAttrs <- fontSizeAttributes rpr - let attrs = sizeAttrs ++ - (if rPropBold rpr then [("b", "1")] else []) ++ - (if rPropItalics rpr then [("i", "1")] else []) ++ - (if rPropUnderline rpr then [("u", "sng")] else []) ++ + let attrs = sizeAttrs <> + (if rPropBold rpr then [("b", "1")] else []) <> + (if rPropItalics rpr then [("i", "1")] else []) <> + (if rPropUnderline rpr then [("u", "sng")] else []) <> (case rStrikethrough rpr of Just NoStrike -> [("strike", "noStrike")] Just SingleStrike -> [("strike", "sngStrike")] Just DoubleStrike -> [("strike", "dblStrike")] - Nothing -> []) ++ + Nothing -> []) <> (case rBaseline rpr of Just n -> [("baseline", show n)] - Nothing -> []) ++ + Nothing -> []) <> (case rCap rpr of Just NoCapitals -> [("cap", "none")] Just SmallCapitals -> [("cap", "small")] Just AllCapitals -> [("cap", "all")] - Nothing -> []) ++ + Nothing -> []) <> [] linkProps <- case rLink rpr of Just link -> do @@ -773,14 +778,14 @@ paraElemToElements (Run rpr s) = do return $ case link of InternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" ++ show idNum) + [ ("r:id", "rId" <> show idNum) , ("action", "ppaction://hlinksldjump") ] in [mknode "a:hlinkClick" linkAttrs ()] -- external ExternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" ++ show idNum) + [ ("r:id", "rId" <> show idNum) ] in [mknode "a:hlinkClick" linkAttrs ()] Nothing -> return [] @@ -794,11 +799,11 @@ paraElemToElements (Run rpr s) = do Nothing -> [] codeFont <- monospaceFont let codeContents = if rPropCode rpr - then [mknode "a:latin" [("typeface", codeFont)] ()] + then [mknode "a:latin" [("typeface", T.unpack codeFont)] ()] else [] - let propContents = linkProps ++ colorContents ++ codeContents + let propContents = linkProps <> colorContents <> codeContents return [mknode "a:r" [] [ mknode "a:rPr" attrs $ propContents - , mknode "a:t" [] s + , mknode "a:t" [] $ T.unpack s ]] paraElemToElements (MathElem mathType texStr) = do res <- convertMath writeOMML mathType (unTeXString texStr) @@ -839,29 +844,29 @@ surroundWithMathAlternate element = paragraphToElement :: PandocMonad m => Paragraph -> P m Element paragraphToElement par = do let - attrs = [("lvl", show $ pPropLevel $ paraProps par)] ++ + attrs = [("lvl", show $ pPropLevel $ paraProps par)] <> (case pPropMarginLeft (paraProps par) of Just px -> [("marL", show $ pixelsToEmu px)] Nothing -> [] - ) ++ + ) <> (case pPropIndent (paraProps par) of Just px -> [("indent", show $ pixelsToEmu px)] Nothing -> [] - ) ++ + ) <> (case pPropAlign (paraProps par) of Just AlgnLeft -> [("algn", "l")] Just AlgnRight -> [("algn", "r")] Just AlgnCenter -> [("algn", "ctr")] Nothing -> [] ) - props = [] ++ + props = [] <> (case pPropSpaceBefore $ paraProps par of Just px -> [mknode "a:spcBef" [] [ mknode "a:spcPts" [("val", show $ 100 * px)] () ] ] Nothing -> [] - ) ++ + ) <> (case pPropBullet $ paraProps par of Just Bullet -> [] Just (AutoNumbering attrs') -> @@ -869,7 +874,7 @@ paragraphToElement par = do Nothing -> [mknode "a:buNone" [] ()] ) paras <- concat <$> mapM paraElemToElements (paraElems par) - return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] ++ paras + return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] <> paras shapeToElement :: PandocMonad m => Element -> Shape -> P m Element shapeToElement layout (TextBox paras) @@ -879,7 +884,7 @@ shapeToElement layout (TextBox paras) sp <- getContentShape ns spTree elements <- mapM paragraphToElement paras let txBody = mknode "p:txBody" [] $ - [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> elements emptySpPr = mknode "p:spPr" [] () return $ surroundWithMathAlternate $ @@ -933,19 +938,19 @@ graphicFrameToElements layout tbls caption = do [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] () , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] () ] - ] ++ elements + ] <> elements if (not $ null caption) then do capElt <- createCaption ((x, y), (cx, cytmp)) caption return [graphicFrameElts, capElt] else return [graphicFrameElts] -getDefaultTableStyle :: PandocMonad m => P m (Maybe String) +getDefaultTableStyle :: PandocMonad m => P m (Maybe T.Text) getDefaultTableStyle = do refArchive <- asks envRefArchive distArchive <- asks envDistArchive tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml" - return $ findAttr (QName "def" Nothing Nothing) tblStyleLst + return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do @@ -970,7 +975,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do [mknode "a:txBody" [] $ ([ mknode "a:bodyPr" [] () , mknode "a:lstStyle" [] ()] - ++ elements')] + <> elements')] headers' <- mapM cellToOpenXML hdrCells rows' <- mapM (mapM cellToOpenXML) rows let borderProps = mknode "a:tcPr" [] () @@ -978,7 +983,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do let mkcell border contents = mknode "a:tc" [] $ (if null contents then emptyCell - else contents) ++ [ borderProps | border ] + else contents) <> [ borderProps | border ] let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells let mkgridcol w = mknode "a:gridCol" @@ -991,7 +996,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do , ("bandRow", if tblPrBandRow tblPr then "1" else "0") ] (case mbDefTblStyle of Nothing -> [] - Just sty -> [mknode "a:tableStyleId" [] sty]) + Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty]) return $ mknode "a:graphic" [] $ [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] $ @@ -1001,7 +1006,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do then [] else map mkgridcol colWidths) ] - ++ [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' + <> [ mkrow True headers' | hasHeader ] <> map (mkrow False) rows' ] ] @@ -1009,7 +1014,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do -- We get the shape by placeholder type. If there is NO type, it -- defaults to a content placeholder. -data PHType = PHType String | ObjType +data PHType = PHType T.Text | ObjType deriving (Show, Eq) findPHType :: NameSpaces -> Element -> PHType -> Bool @@ -1024,7 +1029,7 @@ findPHType ns spElem phType -- if it's a named PHType, we want to check that the attribute -- value matches. Just phElem | (PHType tp) <- phType -> - case findAttr (QName "type" Nothing Nothing) phElem of + case findAttrText (QName "type" Nothing Nothing) phElem of Just tp' -> tp == tp' Nothing -> False -- if it's an ObjType, we want to check that there is NO @@ -1063,7 +1068,7 @@ nonBodyTextToElement layout phTypes paraElements let hdrPara = Paragraph def paraElements element <- paragraphToElement hdrPara let txBody = mknode "p:txBody" [] $ - [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> [element] return $ replaceNamedChildren ns "p" "txBody" [txBody] sp -- XXX: TODO @@ -1081,7 +1086,7 @@ contentToElement layout hdrShape shapes contentElements <- local (\env -> env {envContentType = NormalContent}) (shapesToElements layout shapes) - return $ buildSpTree ns spTree (hdrShapeElements ++ contentElements) + return $ buildSpTree ns spTree (hdrShapeElements <> contentElements) contentToElement _ _ _ = return $ mknode "p:sp" [] () twoColumnToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> [Shape] -> P m Element @@ -1101,7 +1106,7 @@ twoColumnToElement layout hdrShape shapesL shapesR (shapesToElements layout shapesR) -- let contentElementsL' = map (setIdx ns "1") contentElementsL -- contentElementsR' = map (setIdx ns "2") contentElementsR - return $ buildSpTree ns spTree (hdrShapeElements ++ contentElementsL ++ contentElementsR) + return $ buildSpTree ns spTree (hdrShapeElements <> contentElementsL <> contentElementsR) twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] () @@ -1133,7 +1138,7 @@ metadataToElement layout titleElems subtitleElems authorsElems dateElems dateShapeElements <- if null dateElems then return [] else sequence [nonBodyTextToElement layout [PHType "dt"] dateElems] - return $ buildSpTree ns spTree (titleShapeElements ++ subtitleShapeElements ++ dateShapeElements) + return $ buildSpTree ns spTree (titleShapeElements <> subtitleShapeElements <> dateShapeElements) metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] () slideToElement :: PandocMonad m => Slide -> P m Element @@ -1186,7 +1191,7 @@ getNotesMaster = do distArchive <- asks envDistArchive parseXml refArchive distArchive "ppt/notesMasters/notesMaster1.xml" -getSlideNumberFieldId :: PandocMonad m => Element -> P m String +getSlideNumberFieldId :: PandocMonad m => Element -> P m T.Text getSlideNumberFieldId notesMaster | ns <- elemToNameSpaces notesMaster , Just cSld <- findChild (elemName ns "p" "cSld") notesMaster @@ -1195,7 +1200,7 @@ getSlideNumberFieldId notesMaster , Just txBody <- findChild (elemName ns "p" "txBody") sp , Just p <- findChild (elemName ns "a" "p") txBody , Just fld <- findChild (elemName ns "a" "fld") p - , Just fldId <- findAttr (QName "id" Nothing Nothing) fld = + , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld = return fldId | otherwise = throwError $ PandocSomeError $ @@ -1236,7 +1241,7 @@ speakerNotesBody :: PandocMonad m => [Paragraph] -> P m Element speakerNotesBody paras = do elements <- mapM paragraphToElement $ spaceParas $ map removeParaLinks paras let txBody = mknode "p:txBody" [] $ - [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> elements return $ mknode "p:sp" [] $ [ mknode "p:nvSpPr" [] $ @@ -1252,7 +1257,7 @@ speakerNotesBody paras = do , txBody ] -speakerNotesSlideNumber :: Int -> String -> Element +speakerNotesSlideNumber :: Int -> T.Text -> Element speakerNotesSlideNumber pgNum fieldId = mknode "p:sp" [] $ [ mknode "p:nvSpPr" [] $ @@ -1273,7 +1278,7 @@ speakerNotesSlideNumber pgNum fieldId = [ mknode "a:bodyPr" [] () , mknode "a:lstStyle" [] () , mknode "a:p" [] $ - [ mknode "a:fld" [ ("id", fieldId) + [ mknode "a:fld" [ ("id", T.unpack fieldId) , ("type", "slidenum") ] [ mknode "a:rPr" [("lang", "en-US")] () @@ -1329,24 +1334,24 @@ getSlideIdNum sldId = do Just n -> return n Nothing -> throwError $ PandocShouldNeverHappenError $ - "Slide Id " ++ (show sldId) ++ " not found." + "Slide Id " <> T.pack (show sldId) <> " not found." slideNum :: PandocMonad m => Slide -> P m Int slideNum slide = getSlideIdNum $ slideId slide idNumToFilePath :: Int -> FilePath -idNumToFilePath idNum = "slide" ++ (show $ idNum) ++ ".xml" +idNumToFilePath idNum = "slide" <> (show $ idNum) <> ".xml" slideToFilePath :: PandocMonad m => Slide -> P m FilePath slideToFilePath slide = do idNum <- slideNum slide - return $ "slide" ++ (show $ idNum) ++ ".xml" + return $ "slide" <> (show $ idNum) <> ".xml" -slideToRelId :: PandocMonad m => Slide -> P m String +slideToRelId :: PandocMonad m => Slide -> P m T.Text slideToRelId slide = do n <- slideNum slide offset <- asks envSlideIdOffset - return $ "rId" ++ (show $ n + offset) + return $ "rId" <> T.pack (show $ n + offset) data Relationship = Relationship { relId :: Int @@ -1362,7 +1367,7 @@ elementToRel element num <- case reads numStr :: [(Int, String)] of (n, _) : _ -> Just n [] -> Nothing - type' <- findAttr (QName "Type" Nothing Nothing) element + type' <- findAttrText (QName "Type" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element return $ Relationship num type' target | otherwise = Nothing @@ -1372,7 +1377,7 @@ slideToPresRel slide = do idNum <- slideNum slide n <- asks envSlideIdOffset let rId = idNum + n - fp = "slides/" ++ idNumToFilePath idNum + fp = "slides/" <> idNumToFilePath idNum return $ Relationship { relId = rId , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" , relTarget = fp @@ -1397,7 +1402,7 @@ presentationToRels pres@(Presentation _ slides) = do , relTarget = "notesMasters/notesMaster1.xml" }] else [] - insertedRels = mySlideRels ++ notesMasterRels + insertedRels = mySlideRels <> notesMasterRels rels <- getRels -- we remove the slide rels and the notesmaster (if it's -- there). We'll put these back in ourselves, if necessary. @@ -1427,7 +1432,7 @@ presentationToRels pres@(Presentation _ slides) = do relsWeKeep' = map (\r -> r{relId = modifyRelNum $ relId r}) relsWeKeep - return $ insertedRels ++ relsWeKeep' + return $ insertedRels <> relsWeKeep' -- We make this ourselves, in case there's a thumbnail in the one from -- the template. @@ -1455,8 +1460,8 @@ topLevelRelsEntry :: PandocMonad m => P m Entry topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels relToElement :: Relationship -> Element -relToElement rel = mknode "Relationship" [ ("Id", "rId" ++ (show $ relId rel)) - , ("Type", relType rel) +relToElement rel = mknode "Relationship" [ ("Id", "rId" <> (show $ relId rel)) + , ("Type", T.unpack $ relType rel) , ("Target", relTarget rel) ] () relsToElement :: [Relationship] -> Element @@ -1479,7 +1484,7 @@ slideToEntry slide = do idNum <- slideNum slide local (\env -> env{envCurSlideId = idNum}) $ do element <- slideToElement slide - elemToEntry ("ppt/slides/" ++ idNumToFilePath idNum) element + elemToEntry ("ppt/slides/" <> idNumToFilePath idNum) element slideToSpeakerNotesEntry :: PandocMonad m => Slide -> P m (Maybe Entry) slideToSpeakerNotesEntry slide = do @@ -1492,7 +1497,7 @@ slideToSpeakerNotesEntry slide = do Just element | Just notesIdNum <- mbNotesIdNum -> Just <$> elemToEntry - ("ppt/notesSlides/notesSlide" ++ show notesIdNum ++ ".xml") + ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml") element _ -> return Nothing @@ -1505,7 +1510,7 @@ slideToSpeakerNotesRelElement slide@(Slide _ _ _) = do [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] [ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "../slides/slide" ++ show idNum ++ ".xml") + , ("Target", "../slides/slide" <> show idNum <> ".xml") ] () , mknode "Relationship" [ ("Id", "rId1") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") @@ -1524,7 +1529,7 @@ slideToSpeakerNotesRelEntry slide = do Just element | Just notesIdNum <- mbNotesIdNum -> Just <$> elemToEntry - ("ppt/notesSlides/_rels/notesSlide" ++ show notesIdNum ++ ".xml.rels") + ("ppt/notesSlides/_rels/notesSlide" <> show notesIdNum <> ".xml.rels") element _ -> return Nothing @@ -1532,21 +1537,21 @@ slideToSlideRelEntry :: PandocMonad m => Slide -> P m Entry slideToSlideRelEntry slide = do idNum <- slideNum slide element <- slideToSlideRelElement slide - elemToEntry ("ppt/slides/_rels/" ++ idNumToFilePath idNum ++ ".rels") element + elemToEntry ("ppt/slides/_rels/" <> idNumToFilePath idNum <> ".rels") element linkRelElement :: PandocMonad m => Int -> LinkTarget -> P m Element linkRelElement rIdNum (InternalTarget targetId) = do targetIdNum <- getSlideIdNum targetId return $ - mknode "Relationship" [ ("Id", "rId" ++ show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> show rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "slide" ++ show targetIdNum ++ ".xml") + , ("Target", "slide" <> show targetIdNum <> ".xml") ] () linkRelElement rIdNum (ExternalTarget (url, _)) = do return $ - mknode "Relationship" [ ("Id", "rId" ++ show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> show rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") - , ("Target", url) + , ("Target", T.unpack url) , ("TargetMode", "External") ] () @@ -1559,9 +1564,9 @@ mediaRelElement mInfo = Just e -> e Nothing -> "" in - mknode "Relationship" [ ("Id", "rId" ++ (show $ mInfoLocalId mInfo)) + mknode "Relationship" [ ("Id", "rId" <> (show $ mInfoLocalId mInfo)) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image") - , ("Target", "../media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext) + , ("Target", "../media/image" <> (show $ mInfoGlobalId mInfo) <> T.unpack ext) ] () speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element) @@ -1571,7 +1576,7 @@ speakerNotesSlideRelElement slide = do return $ case M.lookup idNum mp of Nothing -> Nothing Just n -> - let target = "../notesSlides/notesSlide" ++ show n ++ ".xml" + let target = "../notesSlides/notesSlide" <> show n <> ".xml" in Just $ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide") @@ -1605,14 +1610,14 @@ slideToSlideRelElement slide = do ([mknode "Relationship" [ ("Id", "rId1") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout") , ("Target", target)] () - ] ++ speakerNotesRels ++ linkRels ++ mediaRels) + ] <> speakerNotesRels <> linkRels <> mediaRels) slideToSldIdElement :: PandocMonad m => Slide -> P m Element slideToSldIdElement slide = do n <- slideNum slide let id' = show $ n + 255 rId <- slideToRelId slide - return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () + return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] () presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element presentationToSldIdLst (Presentation _ slides) = do @@ -1637,7 +1642,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do notesMasterElem = mknode "p:notesMasterIdLst" [] [ mknode "p:NotesMasterId" - [("r:id", "rId" ++ show notesMasterRId)] + [("r:id", "rId" <> show notesMasterRId)] () ] @@ -1683,7 +1688,7 @@ docPropsElement :: PandocMonad m => DocProps -> P m Element docPropsElement docProps = do utctime <- asks envUTCTime let keywords = case dcKeywords docProps of - Just xs -> intercalate ", " xs + Just xs -> T.intercalate ", " xs Nothing -> "" return $ mknode "cp:coreProperties" @@ -1692,16 +1697,16 @@ docPropsElement docProps = do ,("xmlns:dcterms","http://purl.org/dc/terms/") ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] - $ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps) - : (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps) - : (mknode "cp:keywords" [] keywords) + $ (mknode "dc:title" [] $ maybe "" T.unpack $ dcTitle docProps) + : (mknode "dc:creator" [] $ maybe "" T.unpack $ dcCreator docProps) + : (mknode "cp:keywords" [] $ T.unpack keywords) : (if isNothing (dcSubject docProps) then [] else - [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps]) - ++ (if isNothing (dcDescription docProps) then [] else - [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps]) - ++ (if isNothing (cpCategory docProps) then [] else - [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps]) - ++ (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x + [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps]) + <> (if isNothing (dcDescription docProps) then [] else + [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps]) + <> (if isNothing (cpCategory docProps) then [] else + [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps]) + <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -1715,7 +1720,7 @@ docCustomPropsElement docProps = do let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") ,("pid", show pid) - ,("name", k)] $ mknode "vt:lpwstr" [] v + ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v) return $ mknode "Properties" [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") @@ -1745,15 +1750,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml" defaultContentTypeToElem :: DefaultContentType -> Element defaultContentTypeToElem dct = mknode "Default" - [("Extension", defContentTypesExt dct), - ("ContentType", defContentTypesType dct)] + [("Extension", T.unpack $ defContentTypesExt dct), + ("ContentType", T.unpack $ defContentTypesType dct)] () overrideContentTypeToElem :: OverrideContentType -> Element overrideContentTypeToElem oct = mknode "Override" [("PartName", overrideContentTypesPart oct), - ("ContentType", overrideContentTypesType oct)] + ("ContentType", T.unpack $ overrideContentTypesType oct)] () contentTypesToElement :: ContentTypes -> Element @@ -1761,11 +1766,11 @@ contentTypesToElement ct = let ns = "http://schemas.openxmlformats.org/package/2006/content-types" in mknode "Types" [("xmlns", ns)] $ - (map defaultContentTypeToElem $ contentTypesDefaults ct) ++ + (map defaultContentTypeToElem $ contentTypesDefaults ct) <> (map overrideContentTypeToElem $ contentTypesOverrides ct) data DefaultContentType = DefaultContentType - { defContentTypesExt :: String + { defContentTypesExt :: T.Text , defContentTypesType:: MimeType } deriving (Show, Eq) @@ -1785,12 +1790,12 @@ contentTypesToEntry :: PandocMonad m => ContentTypes -> P m Entry contentTypesToEntry ct = elemToEntry "[Content_Types].xml" $ contentTypesToElement ct pathToOverride :: FilePath -> Maybe OverrideContentType -pathToOverride fp = OverrideContentType ("/" ++ fp) <$> (getContentType fp) +pathToOverride fp = OverrideContentType ("/" <> fp) <$> (getContentType fp) mediaFileContentType :: FilePath -> Maybe DefaultContentType mediaFileContentType fp = case takeExtension fp of '.' : ext -> Just $ - DefaultContentType { defContentTypesExt = ext + DefaultContentType { defContentTypesExt = T.pack ext , defContentTypesType = case getMimeType fp of Just mt -> mt @@ -1800,7 +1805,8 @@ mediaFileContentType fp = case takeExtension fp of mediaContentType :: MediaInfo -> Maybe DefaultContentType mediaContentType mInfo - | Just ('.' : ext) <- mInfoExt mInfo = + | Just t <- mInfoExt mInfo + , Just ('.', ext) <- T.uncons t = Just $ DefaultContentType { defContentTypesExt = ext , defContentTypesType = case mInfoMimeType mInfo of @@ -1813,7 +1819,7 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath] getSpeakerNotesFilePaths = do mp <- asks envSpeakerNotesIdMap let notesIdNums = M.elems mp - return $ map (\n -> "ppt/notesSlides/notesSlide" ++ show n ++ ".xml") notesIdNums + return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes presentationToContentTypes p@(Presentation _ slides) = do @@ -1824,7 +1830,7 @@ presentationToContentTypes p@(Presentation _ slides) = do , DefaultContentType "rels" "application/vnd.openxmlformats-package.relationships+xml" ] mediaDefaults = nub $ - (mapMaybe mediaContentType $ mediaInfos) ++ + (mapMaybe mediaContentType $ mediaInfos) <> (mapMaybe mediaFileContentType $ mediaFps) inheritedOverrides = mapMaybe pathToOverride filePaths @@ -1835,55 +1841,56 @@ presentationToContentTypes p@(Presentation _ slides) = do ] relativePaths <- mapM slideToFilePath slides let slideOverrides = mapMaybe - (\fp -> pathToOverride $ "ppt/slides/" ++ fp) + (\fp -> pathToOverride $ "ppt/slides/" <> fp) relativePaths speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths return $ ContentTypes - (defaults ++ mediaDefaults) - (inheritedOverrides ++ createdOverrides ++ slideOverrides ++ speakerNotesOverrides) + (defaults <> mediaDefaults) + (inheritedOverrides <> createdOverrides <> slideOverrides <> speakerNotesOverrides) -presML :: String +presML :: T.Text presML = "application/vnd.openxmlformats-officedocument.presentationml" -noPresML :: String +noPresML :: T.Text noPresML = "application/vnd.openxmlformats-officedocument" getContentType :: FilePath -> Maybe MimeType getContentType fp - | fp == "ppt/presentation.xml" = Just $ presML ++ ".presentation.main+xml" - | fp == "ppt/presProps.xml" = Just $ presML ++ ".presProps+xml" - | fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml" - | fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml" + | fp == "ppt/presentation.xml" = Just $ presML <> ".presentation.main+xml" + | fp == "ppt/presProps.xml" = Just $ presML <> ".presProps+xml" + | fp == "ppt/viewProps.xml" = Just $ presML <> ".viewProps+xml" + | fp == "ppt/tableStyles.xml" = Just $ presML <> ".tableStyles+xml" | fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml" | fp == "docProps/custom.xml" = Just $ "application/vnd.openxmlformats-officedocument.custom-properties+xml" - | fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml" + | fp == "docProps/app.xml" = Just $ noPresML <> ".extended-properties+xml" | "ppt" : "slideMasters" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ presML ++ ".slideMaster+xml" + Just $ presML <> ".slideMaster+xml" | "ppt" : "slides" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ presML ++ ".slide+xml" + Just $ presML <> ".slide+xml" | "ppt" : "notesMasters" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ presML ++ ".notesMaster+xml" + Just $ presML <> ".notesMaster+xml" | "ppt" : "notesSlides" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ presML ++ ".notesSlide+xml" + Just $ presML <> ".notesSlide+xml" | "ppt" : "theme" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ noPresML ++ ".theme+xml" + Just $ noPresML <> ".theme+xml" | "ppt" : "slideLayouts" : _ : [] <- splitDirectories fp= - Just $ presML ++ ".slideLayout+xml" + Just $ presML <> ".slideLayout+xml" | otherwise = Nothing +-- Kept as String for XML.Light autoNumAttrs :: ListAttributes -> [(String, String)] autoNumAttrs (startNum, numStyle, numDelim) = - numAttr ++ typeAttr + numAttr <> typeAttr where numAttr = if startNum == 1 then [] else [("startAt", show startNum)] - typeAttr = [("type", typeString ++ delimString)] + typeAttr = [("type", typeString <> delimString)] typeString = case numStyle of Decimal -> "arabic" UpperAlpha -> "alphaUc" diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index 8667c79f4..75ce0dd4e 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -1,5 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | Module : Text.Pandoc.Writers.Powerpoint.Presentation @@ -54,6 +56,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Walk import Data.Time (UTCTime) import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element" +import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.Shared (lookupMetaInlines, lookupMetaBlocks , lookupMetaString, toTableOfContents) import qualified Data.Map as M @@ -93,7 +96,7 @@ instance Default WriterEnv where data WriterState = WriterState { stNoteIds :: M.Map Int [Block] -- associate anchors with slide id - , stAnchorMap :: M.Map String SlideId + , stAnchorMap :: M.Map T.Text SlideId , stSlideIdSet :: S.Set SlideId , stLog :: [LogMessage] , stSpeakerNotes :: SpeakerNotes @@ -123,17 +126,17 @@ reservedSlideIds = S.fromList [ metadataSlideId , endNotesSlideId ] -uniqueSlideId' :: Integer -> S.Set SlideId -> String -> SlideId +uniqueSlideId' :: Integer -> S.Set SlideId -> T.Text -> SlideId uniqueSlideId' n idSet s = - let s' = if n == 0 then s else s ++ "-" ++ show n + let s' = if n == 0 then s else s <> "-" <> tshow n in if SlideId s' `S.member` idSet then uniqueSlideId' (n+1) idSet s else SlideId s' -uniqueSlideId :: S.Set SlideId -> String -> SlideId +uniqueSlideId :: S.Set SlideId -> T.Text -> SlideId uniqueSlideId = uniqueSlideId' 0 -runUniqueSlideId :: String -> Pres SlideId +runUniqueSlideId :: T.Text -> Pres SlideId runUniqueSlideId s = do idSet <- gets stSlideIdSet let sldId = uniqueSlideId idSet s @@ -159,14 +162,14 @@ type Pixels = Integer data Presentation = Presentation DocProps [Slide] deriving (Show) -data DocProps = DocProps { dcTitle :: Maybe String - , dcSubject :: Maybe String - , dcCreator :: Maybe String - , dcKeywords :: Maybe [String] - , dcDescription :: Maybe String - , cpCategory :: Maybe String +data DocProps = DocProps { dcTitle :: Maybe T.Text + , dcSubject :: Maybe T.Text + , dcCreator :: Maybe T.Text + , dcKeywords :: Maybe [T.Text] + , dcDescription :: Maybe T.Text + , cpCategory :: Maybe T.Text , dcCreated :: Maybe UTCTime - , customProperties :: Maybe [(String, String)] + , customProperties :: Maybe [(T.Text, T.Text)] } deriving (Show, Eq) @@ -175,7 +178,7 @@ data Slide = Slide { slideId :: SlideId , slideSpeakerNotes :: SpeakerNotes } deriving (Show, Eq) -newtype SlideId = SlideId String +newtype SlideId = SlideId T.Text deriving (Show, Eq, Ord) -- In theory you could have anything on a notes slide but it seems @@ -197,7 +200,7 @@ data Layout = MetadataSlide [ParaElem] [ParaElem] [[ParaElem]] [ParaElem] data Shape = Pic PicProps FilePath [ParaElem] | GraphicFrame [Graphic] [ParaElem] | TextBox [Paragraph] - | RawOOXMLShape String + | RawOOXMLShape T.Text deriving (Show, Eq) type Cell = [Paragraph] @@ -240,17 +243,17 @@ instance Default ParaProps where , pPropIndent = Just 0 } -newtype TeXString = TeXString {unTeXString :: String} +newtype TeXString = TeXString {unTeXString :: T.Text} deriving (Eq, Show) data ParaElem = Break - | Run RunProps String + | Run RunProps T.Text -- It would be more elegant to have native TeXMath -- Expressions here, but this allows us to use -- `convertmath` from T.P.Writers.Math. Will perhaps -- revisit in the future. | MathElem MathType TeXString - | RawOOXMLParaElem String + | RawOOXMLParaElem T.Text deriving (Show, Eq) data Strikethrough = NoStrike | SingleStrike | DoubleStrike @@ -259,9 +262,9 @@ data Strikethrough = NoStrike | SingleStrike | DoubleStrike data Capitals = NoCapitals | SmallCapitals | AllCapitals deriving (Show, Eq) -type URL = String +type URL = T.Text -data LinkTarget = ExternalTarget (URL, String) +data LinkTarget = ExternalTarget (URL, T.Text) | InternalTarget SlideId deriving (Show, Eq) @@ -360,7 +363,7 @@ inlineToParElems (Note blks) = do curNoteId = maxNoteId + 1 modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes } local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $ - inlineToParElems $ Superscript [Str $ show curNoteId] + inlineToParElems $ Superscript [Str $ tshow curNoteId] inlineToParElems (Span (_, ["underline"], _) ils) = local (\r -> r{envRunProps = (envRunProps r){rPropUnderline=True}}) $ inlinesToParElems ils @@ -389,11 +392,11 @@ isListType (BulletList _) = True isListType (DefinitionList _) = True isListType _ = False -registerAnchorId :: String -> Pres () +registerAnchorId :: T.Text -> Pres () registerAnchorId anchor = do anchorMap <- gets stAnchorMap sldId <- asks envCurSlideId - unless (null anchor) $ + unless (T.null anchor) $ modify $ \st -> st {stAnchorMap = M.insert anchor sldId anchorMap} -- Currently hardcoded, until I figure out how to make it dynamic. @@ -531,11 +534,11 @@ withAttr _ sp = sp blockToShape :: Block -> Pres Shape blockToShape (Plain ils) = blockToShape (Para ils) blockToShape (Para (il:_)) | Image attr ils (url, _) <- il = - (withAttr attr . Pic def url) <$> inlinesToParElems ils + (withAttr attr . Pic def (T.unpack url)) <$> inlinesToParElems ils blockToShape (Para (il:_)) | Link _ (il':_) target <- il , Image attr ils (url, _) <- il' = - (withAttr attr . Pic def{picPropLink = Just $ ExternalTarget target} url) <$> - inlinesToParElems ils + (withAttr attr . Pic def{picPropLink = Just $ ExternalTarget target} (T.unpack url)) + <$> inlinesToParElems ils blockToShape (Table caption algn _ hdrCells rows) = do caption' <- inlinesToParElems caption hdrCells' <- rowToParagraphs algn hdrCells @@ -711,7 +714,7 @@ blocksToSlide blks = do makeNoteEntry :: Int -> [Block] -> [Block] makeNoteEntry n blks = - let enum = Str (show n ++ ".") + let enum = Str (tshow n <> ".") in case blks of (Para ils : blks') -> (Para $ enum : Space : ils) : blks' @@ -786,7 +789,7 @@ combineParaElems' (Just pElem') (pElem : pElems) | Run rPr' s' <- pElem' , Run rPr s <- pElem , rPr == rPr' = - combineParaElems' (Just $ Run rPr' $ s' ++ s) pElems + combineParaElems' (Just $ Run rPr' $ s' <> s) pElems | otherwise = pElem' : combineParaElems' (Just pElem) pElems @@ -831,7 +834,8 @@ applyToSlide f slide = do replaceAnchor :: ParaElem -> Pres ParaElem replaceAnchor (Run rProps s) - | Just (ExternalTarget ('#':anchor, _)) <- rLink rProps = do + | Just (ExternalTarget (T.uncons -> Just ('#', anchor), _)) <- rLink rProps + = do anchorMap <- gets stAnchorMap -- If the anchor is not in the anchormap, we just remove the -- link. @@ -843,9 +847,9 @@ replaceAnchor pe = return pe emptyParaElem :: ParaElem -> Bool emptyParaElem (Run _ s) = - null $ Shared.trim s + T.null $ Shared.trim s emptyParaElem (MathElem _ ts) = - null $ Shared.trim $ unTeXString ts + T.null $ Shared.trim $ unTeXString ts emptyParaElem _ = False emptyParagraph :: Paragraph -> Bool @@ -900,7 +904,7 @@ blocksToPresentationSlides blks = do -- slide later blksLst <- splitBlocks blks' bodySlideIds <- mapM - (\n -> runUniqueSlideId $ "BodySlide" ++ show n) + (\n -> runUniqueSlideId $ "BodySlide" <> tshow n) (take (length blksLst) [1..] :: [Integer]) bodyslides <- mapM (\(bs, ident) -> @@ -935,11 +939,11 @@ metaToDocProps meta = authors = case map Shared.stringify $ docAuthors meta of [] -> Nothing - ss -> Just $ intercalate "; " ss + ss -> Just $ T.intercalate "; " ss description = case map Shared.stringify $ lookupMetaBlocks "description" meta of [] -> Nothing - ss -> Just $ intercalate "_x000d_\n" ss + ss -> Just $ T.intercalate "_x000d_\n" ss customProperties' = case [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords", "description" @@ -987,7 +991,7 @@ formatToken sty (tokType, txt) = Just tokSty -> applyTokStyToRunProps tokSty rProps Nothing -> rProps in - Run rProps' $ T.unpack txt + Run rProps' txt formatSourceLine :: Style -> FormatOptions -> SourceLine -> [ParaElem] formatSourceLine sty _ srcLn = map (formatToken sty) srcLn diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index efe86e73b..5f035ee1f 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.RST Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -16,8 +17,8 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html> module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Prelude import Control.Monad.State.Strict -import Data.Char (isSpace, toLower) -import Data.List (isPrefixOf, stripPrefix, transpose, intersperse) +import Data.Char (isSpace) +import Data.List (transpose, intersperse) import Data.Maybe (fromMaybe) import qualified Data.Text as T import Data.Text (Text) @@ -38,7 +39,7 @@ type Refs = [([Inline], Target)] data WriterState = WriterState { stNotes :: [[Block]] , stLinks :: Refs - , stImages :: [([Inline], (Attr, String, String, Maybe String))] + , stImages :: [([Inline], (Attr, Text, Text, Maybe Text))] , stHasMath :: Bool , stHasRawTeX :: Bool , stOptions :: WriterOptions @@ -81,7 +82,7 @@ pandocToRST (Pandoc meta blocks) = do let main = vsep [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) - $ defField "toc-depth" (T.pack $ show $ writerTOCDepth opts) + $ defField "toc-depth" (tshow $ writerTOCDepth opts) $ defField "number-sections" (writerNumberSections opts) $ defField "math" hasMath $ defField "titleblock" (render Nothing title :: Text) @@ -105,13 +106,13 @@ refsToRST :: PandocMonad m => Refs -> RST m (Doc Text) refsToRST refs = mapM keyToRST refs >>= return . vcat -- | Return RST representation of a reference key. -keyToRST :: PandocMonad m => ([Inline], (String, String)) -> RST m (Doc Text) +keyToRST :: PandocMonad m => ([Inline], (Text, Text)) -> RST m (Doc Text) keyToRST (label, (src, _)) = do label' <- inlineListToRST label let label'' = if (==':') `T.any` (render Nothing label' :: Text) then char '`' <> label' <> char '`' else label' - return $ nowrap $ ".. _" <> label'' <> ": " <> text src + return $ nowrap $ ".. _" <> label'' <> ": " <> literal src -- | Return RST representation of notes. notesToRST :: PandocMonad m => [[Block]] -> RST m (Doc Text) @@ -128,13 +129,13 @@ noteToRST num note = do -- | Return RST representation of picture reference table. pictRefsToRST :: PandocMonad m - => [([Inline], (Attr, String, String, Maybe String))] + => [([Inline], (Attr, Text, Text, Maybe Text))] -> RST m (Doc Text) pictRefsToRST refs = mapM pictToRST refs >>= return . vcat -- | Return RST representation of a picture substitution reference. pictToRST :: PandocMonad m - => ([Inline], (Attr, String, String, Maybe String)) + => ([Inline], (Attr, Text, Text, Maybe Text)) -> RST m (Doc Text) pictToRST (label, (attr, src, _, mbtarget)) = do label' <- inlineListToRST label @@ -145,32 +146,32 @@ pictToRST (label, (attr, src, _, mbtarget)) = do ["align-right"] -> ":align: right" ["align-left"] -> ":align: left" ["align-center"] -> ":align: center" - _ -> ":class: " <> text (unwords cls) + _ -> ":class: " <> literal (T.unwords cls) return $ nowrap - $ ".. |" <> label' <> "| image:: " <> text src $$ hang 3 empty (classes $$ dims) + $ ".. |" <> label' <> "| image:: " <> literal src $$ hang 3 empty (classes $$ dims) $$ case mbtarget of Nothing -> empty - Just t -> " :target: " <> text t + Just t -> " :target: " <> literal t -- | Escape special characters for RST. -escapeString :: WriterOptions -> String -> String -escapeString = escapeString' True +escapeText :: WriterOptions -> Text -> Text +escapeText o = T.pack . escapeString' True o . T.unpack -- This ought to be parser where escapeString' _ _ [] = [] escapeString' firstChar opts (c:cs) = case c of - _ | c `elem` ['\\','`','*','_','|'] && - (firstChar || null cs) -> '\\':c:escapeString' False opts cs + _ | c `elemText` "\\`*_|" && + (firstChar || null cs) -> '\\':c:escapeString' False opts cs '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString' False opts cs - '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs - '-' | isEnabled Ext_smart opts -> - case cs of - '-':_ -> '\\':'-':escapeString' False opts cs - _ -> '-':escapeString' False opts cs - '.' | isEnabled Ext_smart opts -> - case cs of - '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest - _ -> '.':escapeString' False opts cs + '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs + '-' | isEnabled Ext_smart opts -> + case cs of + '-':_ -> '\\':'-':escapeString' False opts cs + _ -> '-':escapeString' False opts cs + '.' | isEnabled Ext_smart opts -> + case cs of + '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest + _ -> '.':escapeString' False opts cs _ -> c : escapeString' False opts cs titleToRST :: PandocMonad m => [Inline] -> [Inline] -> RST m (Doc Text) @@ -186,7 +187,7 @@ bordered contents c = then border $$ contents $$ border else empty where len = offset contents - border = text (replicate len c) + border = literal (T.replicate len $ T.singleton c) -- | Convert Pandoc block element to RST. blockToRST :: PandocMonad m @@ -203,30 +204,30 @@ blockToRST (Div (ident,classes,_kvs) bs) = do let admonition = case classes of (cl:_) | cl `elem` admonitions - -> ".. " <> text cl <> "::" + -> ".. " <> literal cl <> "::" cls -> ".. container::" <> space <> - text (unwords (filter (/= "container") cls)) + literal (T.unwords (filter (/= "container") cls)) return $ blankline $$ admonition $$ - (if null ident + (if T.null ident then blankline - else " :name: " <> text ident $$ blankline) $$ + else " :name: " <> literal ident $$ blankline) $$ nest 3 contents $$ blankline blockToRST (Plain inlines) = inlineListToRST inlines -- title beginning with fig: indicates that the image is a figure -blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToRST (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do capt <- inlineListToRST txt dims <- imageDimsToRST attr - let fig = "figure:: " <> text src - alt = ":alt: " <> if null tit then capt else text tit + let fig = "figure:: " <> literal src + alt = ":alt: " <> if T.null tit then capt else literal tit (_,cls,_) = attr classes = case cls of [] -> empty ["align-right"] -> ":align: right" ["align-left"] -> ":align: left" ["align-center"] -> ":align: center" - _ -> ":figclass: " <> text (unwords cls) + _ -> ":figclass: " <> literal (T.unwords cls) return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = @@ -237,11 +238,11 @@ blockToRST (Para inlines) blockToRST (LineBlock lns) = linesToLineBlock lns blockToRST (RawBlock f@(Format f') str) - | f == "rst" = return $ text str + | f == "rst" = return $ literal str | f == "tex" = blockToRST (RawBlock (Format "latex") str) | otherwise = return $ blankline <> ".. raw:: " <> - text (map toLower f') $+$ - nest 3 (text str) $$ blankline + literal (T.toLower f') $+$ + nest 3 (literal str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToRST (Header level (name,classes,_) inlines) = do @@ -254,33 +255,33 @@ blockToRST (Header level (name,classes,_) inlines) = do if isTopLevel then do let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate (offset contents) headerChar - let anchor | null name || name == autoId = empty - | otherwise = ".. _" <> text name <> ":" $$ blankline + let border = literal $ T.replicate (offset contents) $ T.singleton headerChar + let anchor | T.null name || name == autoId = empty + | otherwise = ".. _" <> literal name <> ":" $$ blankline return $ nowrap $ anchor $$ contents $$ border $$ blankline else do let rub = "rubric:: " <> contents - let name' | null name = empty - | otherwise = ":name: " <> text name - let cls | null classes = empty - | otherwise = ":class: " <> text (unwords classes) + let name' | T.null name = empty + | otherwise = ":name: " <> literal name + let cls | null classes = empty + | otherwise = ":class: " <> literal (T.unwords classes) return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline blockToRST (CodeBlock (_,classes,kvs) str) = do opts <- gets stOptions - let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs + let startnum = maybe "" (\x -> " " <> literal x) $ lookup "startFrom" kvs let numberlines = if "numberLines" `elem` classes then " :number-lines:" <> startnum else empty if "haskell" `elem` classes && "literate" `elem` classes && isEnabled Ext_literate_haskell opts - then return $ prefixed "> " (text str) $$ blankline + then return $ prefixed "> " (literal str) $$ blankline else return $ (case [c | c <- classes, c `notElem` ["sourceCode","literate","numberLines", "number-lines","example"]] of [] -> "::" - (lang:_) -> (".. code:: " <> text lang) $$ numberlines) - $+$ nest 3 (text str) $$ blankline + (lang:_) -> (".. code:: " <> literal lang) $$ numberlines) + $+$ nest 3 (literal str) $$ blankline blockToRST (BlockQuote blocks) = do contents <- blockListToRST blocks return $ nest 3 contents <> blankline @@ -314,9 +315,9 @@ blockToRST (OrderedList (start, style', delim) items) = do then replicate (length items) "#." else take (length items) $ orderedListMarkers (start, style', delim) - let maxMarkerLength = maximum $ map length markers - let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') markers + let maxMarkerLength = maximum $ map T.length markers + let markers' = map (\m -> let s = maxMarkerLength - T.length m + in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToRST markers' items -- ensure that sublists have preceding blank line return $ blankline $$ @@ -338,13 +339,13 @@ bulletListItemToRST items = do -- | Convert ordered list item (a list of blocks) to RST. orderedListItemToRST :: PandocMonad m - => String -- ^ marker for list item + => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> RST m (Doc Text) orderedListItemToRST marker items = do contents <- blockListToRST items - let marker' = marker ++ " " - return $ hang (length marker') (text marker') contents $$ + let marker' = marker <> " " + return $ hang (T.length marker') (literal marker') contents $$ if endsWithPlain items then cr else blankline @@ -364,7 +365,7 @@ linesToLineBlock :: PandocMonad m => [[Inline]] -> RST m (Doc Text) linesToLineBlock inlineLines = do lns <- mapM inlineListToRST inlineLines return $ - vcat (map (hang 2 (text "| ")) lns) <> blankline + vcat (map (hang 2 (literal "| ")) lns) <> blankline -- | Convert list of Pandoc block elements to RST. blockListToRST' :: PandocMonad m @@ -376,13 +377,13 @@ blockListToRST' topLevel blocks = do let fixBlocks (b1:b2@(BlockQuote _):bs) | toClose b1 = b1 : commentSep : b2 : fixBlocks bs where - toClose Plain{} = False - toClose Header{} = False - toClose LineBlock{} = False - toClose HorizontalRule = False - toClose (Para [Image _ _ (_,'f':'i':'g':':':_)]) = True - toClose Para{} = False - toClose _ = True + toClose Plain{} = False + toClose Header{} = False + toClose LineBlock{} = False + toClose HorizontalRule = False + toClose (Para [Image _ _ (_,t)]) = "fig:" `T.isPrefixOf` t + toClose Para{} = False + toClose _ = True commentSep = RawBlock "rst" "..\n\n" fixBlocks (b:bs) = b : fixBlocks bs fixBlocks [] = [] @@ -438,26 +439,30 @@ transformInlines = insertBS . transformNested :: [Inline] -> [Inline] transformNested = map (mapNested stripLeadingTrailingSpace) surroundComplex :: Inline -> Inline -> Bool - surroundComplex (Str s@(_:_)) (Str s'@(_:_)) = - case (last s, head s') of - ('\'','\'') -> True - ('"','"') -> True - ('<','>') -> True - ('[',']') -> True - ('{','}') -> True - _ -> False + surroundComplex (Str s) (Str s') + | Just (_, c) <- T.unsnoc s + , Just (c', _) <- T.uncons s' + = case (c, c') of + ('\'','\'') -> True + ('"','"') -> True + ('<','>') -> True + ('[',']') -> True + ('{','}') -> True + _ -> False surroundComplex _ _ = False okAfterComplex :: Inline -> Bool okAfterComplex Space = True okAfterComplex SoftBreak = True okAfterComplex LineBreak = True - okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String) + okAfterComplex (Str (T.uncons -> Just (c,_))) + = isSpace c || c `elemText` "-.,:;!?\\/'\")]}>–—" okAfterComplex _ = False okBeforeComplex :: Inline -> Bool okBeforeComplex Space = True okBeforeComplex SoftBreak = True okBeforeComplex LineBreak = True - okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) + okBeforeComplex (Str (T.uncons -> Just (c,_))) + = isSpace c || c `elemText` "-:/'\"<([{–—" okBeforeComplex _ = False isComplex :: Inline -> Bool isComplex (Emph _) = True @@ -563,7 +568,7 @@ inlineToRST (Span (_,_,kvs) ils) = do contents <- writeInlines ils return $ case lookup "role" kvs of - Just role -> ":" <> text role <> ":`" <> contents <> "`" + Just role -> ":" <> literal role <> ":`" <> contents <> "`" Nothing -> contents inlineToRST (Emph lst) = do contents <- writeInlines lst @@ -596,7 +601,7 @@ inlineToRST (Quoted DoubleQuote lst) = do inlineToRST (Cite _ lst) = writeInlines lst inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do - return $ ":" <> text role <> ":`" <> text str <> "`" + return $ ":" <> literal role <> ":`" <> literal str <> "`" inlineToRST (Code _ str) = do opts <- gets stOptions -- we trim the string because the delimiters must adjoin a @@ -604,28 +609,28 @@ inlineToRST (Code _ str) = do -- we use :literal: when the code contains backticks, since -- :literal: allows backslash-escapes; see #3974 return $ - if '`' `elem` str - then ":literal:`" <> text (escapeString opts (trim str)) <> "`" - else "``" <> text (trim str) <> "``" + if '`' `elemText` str + then ":literal:`" <> literal (escapeText opts (trim str)) <> "`" + else "``" <> literal (trim str) <> "``" inlineToRST (Str str) = do opts <- gets stOptions - return $ text $ + return $ literal $ (if isEnabled Ext_smart opts then unsmartify opts - else id) $ escapeString opts str + else id) $ escapeText opts str inlineToRST (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then ":math:`" <> text str <> "`" - else if '\n' `elem` str + then ":math:`" <> literal str <> "`" + else if '\n' `elemText` str then blankline $$ ".. math::" $$ - blankline $$ nest 3 (text str) $$ blankline - else blankline $$ (".. math:: " <> text str) $$ blankline + blankline $$ nest 3 (literal str) $$ blankline + else blankline $$ (".. math:: " <> literal str) $$ blankline inlineToRST il@(RawInline f x) - | f == "rst" = return $ text x + | f == "rst" = return $ literal x | f == "latex" || f == "tex" = do modify $ \st -> st{ stHasRawTeX = True } - return $ ":raw-latex:`" <> text x <> "`" + return $ ":raw-latex:`" <> literal x <> "`" | otherwise = empty <$ report (InlineNotRendered il) inlineToRST LineBreak = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space @@ -638,11 +643,11 @@ inlineToRST SoftBreak = do -- autolink inlineToRST (Link _ [Str str] (src, _)) | isURI src && - if "mailto:" `isPrefixOf` src - then src == escapeURI ("mailto:" ++ str) + if "mailto:" `T.isPrefixOf` src + then src == escapeURI ("mailto:" <> str) else src == escapeURI str = do - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) - return $ text srcSuffix + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) + return $ literal srcSuffix inlineToRST (Link _ [Image attr alt (imgsrc,imgtit)] (src, _tit)) = do label <- registerImage attr alt (imgsrc,imgtit) (Just src) return $ "|" <> label <> "|" @@ -656,11 +661,11 @@ inlineToRST (Link _ txt (src, tit)) = do if src == src' && tit == tit' then return $ "`" <> linktext <> "`_" else - return $ "`" <> linktext <> " <" <> text src <> ">`__" + return $ "`" <> linktext <> " <" <> literal src <> ">`__" Nothing -> do modify $ \st -> st { stLinks = (txt,(src,tit)):refs } return $ "`" <> linktext <> "`_" - else return $ "`" <> linktext <> " <" <> text src <> ">`__" + else return $ "`" <> linktext <> " <" <> literal src <> ">`__" inlineToRST (Image attr alternate (source, tit)) = do label <- registerImage attr alternate (source,tit) Nothing return $ "|" <> label <> "|" @@ -671,7 +676,7 @@ inlineToRST (Note contents) = do let ref = show $ length notes + 1 return $ " [" <> text ref <> "]_" -registerImage :: PandocMonad m => Attr -> [Inline] -> Target -> Maybe String -> RST m (Doc Text) +registerImage :: PandocMonad m => Attr -> [Inline] -> Target -> Maybe Text -> RST m (Doc Text) registerImage attr alt (src,tit) mbtarget = do pics <- gets stImages txt <- case lookup alt pics of @@ -679,7 +684,7 @@ registerImage attr alt (src,tit) mbtarget = do -> return alt _ -> do let alt' = if null alt || alt == [Str ""] - then [Str $ "image" ++ show (length pics)] + then [Str $ "image" <> tshow (length pics)] else alt modify $ \st -> st { stImages = (alt', (attr,src,tit, mbtarget)):stImages st } @@ -689,9 +694,9 @@ registerImage attr alt (src,tit) mbtarget = do imageDimsToRST :: PandocMonad m => Attr -> RST m (Doc Text) imageDimsToRST attr = do let (ident, _, _) = attr - name = if null ident + name = if T.null ident then empty - else ":name: " <> text ident + else ":name: " <> literal ident showDim dir = let cols d = ":" <> text (show dir) <> ": " <> text (show d) in case dimension dir attr of Just (Percent a) -> @@ -711,7 +716,7 @@ simpleTable :: PandocMonad m simpleTable opts blocksToDoc headers rows = do -- can't have empty cells in first column: let fixEmpties (d:ds) = if isEmpty d - then text "\\ " : ds + then literal "\\ " : ds else d : ds fixEmpties [] = [] headerDocs <- if all null headers @@ -722,7 +727,7 @@ simpleTable opts blocksToDoc headers rows = do numChars xs = maximum . map offset $ xs let colWidths = map numChars $ transpose (headerDocs : rowDocs) let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths - let hline = nowrap $ hsep (map (\n -> text (replicate n '=')) colWidths) + let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths) let hdr = if all null headers then mempty else hline $$ toRow headerDocs diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 366b4cdcd..08f0df0f8 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.RTF Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -18,7 +19,6 @@ import Control.Monad.Except (catchError, throwError) import Control.Monad import qualified Data.ByteString as B import Data.Char (chr, isDigit, ord) -import Data.List (intercalate, isSuffixOf) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T @@ -46,28 +46,28 @@ rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError case result of (imgdata, Just mime) | mime == "image/jpeg" || mime == "image/png" -> do - let bytes = map (printf "%02x") $ B.unpack imgdata + let bytes = map (T.pack . printf "%02x") $ B.unpack imgdata filetype <- case mime of "image/jpeg" -> return "\\jpegblip" "image/png" -> return "\\pngblip" _ -> throwError $ PandocShouldNeverHappenError $ - "Unknown file type " ++ mime + "Unknown file type " <> mime sizeSpec <- case imageSize opts imgdata of Left msg -> do report $ CouldNotDetermineImageSize src msg return "" - Right sz -> return $ "\\picw" ++ show xpx ++ - "\\pich" ++ show ypx ++ - "\\picwgoal" ++ show (floor (xpt * 20) :: Integer) - ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer) + Right sz -> return $ "\\picw" <> tshow xpx <> + "\\pich" <> tshow ypx <> + "\\picwgoal" <> tshow (floor (xpt * 20) :: Integer) + <> "\\pichgoal" <> tshow (floor (ypt * 20) :: Integer) -- twip = 1/1440in = 1/20pt where (xpx, ypx) = sizeInPixels sz (xpt, ypt) = desiredSizeInPoints opts attr sz - let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++ - concat bytes ++ "}" + let raw = "{\\pict" <> filetype <> sizeSpec <> "\\bin " <> + T.concat bytes <> "}" if B.null imgdata then do report $ CouldNotFetchResource src "image contained no data" @@ -80,7 +80,7 @@ rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError report $ CouldNotDetermineMimeType src return x) (\e -> do - report $ CouldNotFetchResource src (show e) + report $ CouldNotFetchResource src $ tshow e return x) rtfEmbedImage _ x = return x @@ -98,12 +98,12 @@ writeRTF options doc = do . M.adjust toPlain "date" $ metamap metadata <- metaToContext options - (fmap (literal . T.pack . concat) . + (fmap (literal . T.concat) . mapM (blockToRTF 0 AlignDefault)) - (fmap (literal . T.pack) . inlinesToRTF) + (fmap literal . inlinesToRTF) meta' - body <- T.pack <$> blocksToRTF 0 AlignDefault blocks - toc <- T.pack <$> blocksToRTF 0 AlignDefault + body <- blocksToRTF 0 AlignDefault blocks + toc <- blocksToRTF 0 AlignDefault [toTableOfContents options $ filter isHeaderBlock blocks] let context = defField "body" body $ defField "spacer" spacer @@ -122,25 +122,24 @@ writeRTF options doc = do _ -> body <> T.singleton '\n' -- | Convert unicode characters (> 127) into rich text format representation. -handleUnicode :: String -> String -handleUnicode [] = [] -handleUnicode (c:cs) = +handleUnicode :: Text -> Text +handleUnicode = T.concatMap $ \c -> if ord c > 127 then if surrogate c then let x = ord c - 0x10000 (q, r) = x `divMod` 0x400 upper = q + 0xd800 lower = r + 0xDC00 - in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs - else enc c ++ handleUnicode cs - else c:handleUnicode cs + in enc (chr upper) <> enc (chr lower) + else enc c + else T.singleton c where surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff) || (0xe000 <= ord x && ord x <= 0xffff) ) - enc x = '\\':'u':show (ord x) ++ "?" + enc x = "\\u" <> tshow (ord x) <> "?" -- | Escape special characters. -escapeSpecial :: String -> String +escapeSpecial :: Text -> Text escapeSpecial = escapeStringUsing $ [ ('\t',"\\tab ") , ('\8216',"\\u8216'") @@ -149,47 +148,47 @@ escapeSpecial = escapeStringUsing $ , ('\8221',"\\u8221\"") , ('\8211',"\\u8211-") , ('\8212',"\\u8212-") - ] ++ backslashEscapes "{\\}" + ] <> backslashEscapes "{\\}" -- | Escape strings as needed for rich text format. -stringToRTF :: String -> String +stringToRTF :: Text -> Text stringToRTF = handleUnicode . escapeSpecial -- | Escape things as needed for code block in RTF. -codeStringToRTF :: String -> String -codeStringToRTF str = intercalate "\\line\n" $ lines (stringToRTF str) +codeStringToRTF :: Text -> Text +codeStringToRTF str = T.intercalate "\\line\n" $ T.lines (stringToRTF str) -- | Make a paragraph with first-line indent, block indent, and space after. rtfParSpaced :: Int -- ^ space after (in twips) -> Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String + -> Text -- ^ string with content + -> Text rtfParSpaced spaceAfter indent firstLineIndent alignment content = let alignString = case alignment of AlignLeft -> "\\ql " AlignRight -> "\\qr " AlignCenter -> "\\qc " AlignDefault -> "\\ql " - in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ show spaceAfter ++ " \\li" ++ show indent ++ - " \\fi" ++ show firstLineIndent ++ " " ++ content ++ "\\par}\n" + in "{\\pard " <> alignString <> + "\\f0 \\sa" <> tshow spaceAfter <> " \\li" <> T.pack (show indent) <> + " \\fi" <> tshow firstLineIndent <> " " <> content <> "\\par}\n" -- | Default paragraph. rtfPar :: Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String + -> Text -- ^ string with content + -> Text rtfPar = rtfParSpaced 180 -- | Compact paragraph (e.g. for compact list items). rtfCompact :: Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String + -> Text -- ^ string with content + -> Text rtfCompact = rtfParSpaced 0 -- number of twips to indent @@ -200,13 +199,13 @@ listIncrement :: Int listIncrement = 360 -- | Returns appropriate bullet list marker for indent level. -bulletMarker :: Int -> String +bulletMarker :: Int -> Text bulletMarker indent = case indent `mod` 720 of 0 -> "\\bullet " _ -> "\\endash " -- | Returns appropriate (list of) ordered list markers for indent level. -orderedMarkers :: Int -> ListAttributes -> [String] +orderedMarkers :: Int -> ListAttributes -> [Text] orderedMarkers indent (start, style, delim) = if style == DefaultStyle && delim == DefaultDelim then case indent `mod` 720 of @@ -218,15 +217,15 @@ blocksToRTF :: PandocMonad m => Int -> Alignment -> [Block] - -> m String -blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align) + -> m Text +blocksToRTF indent align = fmap T.concat . mapM (blockToRTF indent align) -- | Convert Pandoc block element to RTF. blockToRTF :: PandocMonad m => Int -- ^ indent level -> Alignment -- ^ alignment -> Block -- ^ block to convert - -> m String + -> m Text blockToRTF _ _ Null = return "" blockToRTF indent alignment (Div _ bs) = blocksToRTF indent alignment bs @@ -239,139 +238,143 @@ blockToRTF indent alignment (LineBlock lns) = blockToRTF indent alignment (BlockQuote lst) = blocksToRTF (indent + indentIncrement) alignment lst blockToRTF indent _ (CodeBlock _ str) = - return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ codeStringToRTF str) + return $ rtfPar indent 0 AlignLeft ("\\f1 " <> codeStringToRTF str) blockToRTF _ _ b@(RawBlock f str) | f == Format "rtf" = return str | otherwise = do report $ BlockNotRendered b return "" -blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$> +blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . T.concat) <$> mapM (listItemToRTF alignment indent (bulletMarker indent)) lst blockToRTF indent alignment (OrderedList attribs lst) = - (spaceAtEnd . concat) <$> + (spaceAtEnd . T.concat) <$> zipWithM (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$> +blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . T.concat) <$> mapM (definitionListItemToRTF alignment indent) lst blockToRTF indent _ HorizontalRule = return $ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" blockToRTF indent alignment (Header level _ lst) = do contents <- inlinesToRTF lst return $ rtfPar indent 0 alignment $ - "\\b \\fs" ++ show (40 - (level * 4)) ++ " " ++ contents + "\\b \\fs" <> tshow (40 - (level * 4)) <> " " <> contents blockToRTF indent alignment (Table caption aligns sizes headers rows) = do caption' <- inlinesToRTF caption header' <- if all null headers then return "" else tableRowToRTF True indent aligns sizes headers - rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows - return $ header' ++ rows' ++ rtfPar indent 0 alignment caption' + rows' <- T.concat <$> mapM (tableRowToRTF False indent aligns sizes) rows + return $ header' <> rows' <> rtfPar indent 0 alignment caption' tableRowToRTF :: PandocMonad m - => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String + => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m Text tableRowToRTF header indent aligns sizes' cols = do let totalTwips = 6 * 1440 -- 6 inches let sizes = if all (== 0) sizes' then replicate (length cols) (1.0 / fromIntegral (length cols)) else sizes' - columns <- concat <$> + columns <- T.concat <$> zipWithM (tableItemToRTF indent) aligns cols let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) (0 :: Integer) sizes let cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs" - else "") ++ "\\cellx" ++ show edge) + else "") <> "\\cellx" <> tshow edge) rightEdges - let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ + let start = "{\n\\trowd \\trgaph120\n" <> T.concat cellDefs <> "\n" <> "\\trkeep\\intbl\n{\n" let end = "}\n\\intbl\\row}\n" - return $ start ++ columns ++ end + return $ start <> columns <> end -tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String +tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m Text tableItemToRTF indent alignment item = do contents <- blocksToRTF indent alignment item - return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" + return $ "{" <> T.replace "\\pard" "\\pard\\intbl" contents <> "\\cell}\n" -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. -spaceAtEnd :: String -> String -spaceAtEnd str = - if "\\par}\n" `isSuffixOf` str - then take (length str - 6) str ++ "\\sa180\\par}\n" - else str +spaceAtEnd :: Text -> Text +spaceAtEnd str = maybe str (<> "\\sa180\\par}\n") $ T.stripSuffix "\\par}\n" str -- | Convert list item (list of blocks) to RTF. listItemToRTF :: PandocMonad m => Alignment -- ^ alignment -> Int -- ^ indent level - -> String -- ^ list start marker + -> Text -- ^ list start marker -> [Block] -- ^ list item (list of blocks) - -> m String + -> m Text listItemToRTF alignment indent marker [] = return $ rtfCompact (indent + listIncrement) (negate listIncrement) alignment - (marker ++ "\\tx" ++ show listIncrement ++ "\\tab ") + (marker <> "\\tx" <> tshow listIncrement <> "\\tab ") listItemToRTF alignment indent marker (listFirst:listRest) = do let f = blockToRTF (indent + listIncrement) alignment first <- f listFirst rest <- mapM f listRest - let listMarker = "\\fi" ++ show (negate listIncrement) ++ " " ++ marker ++ - "\\tx" ++ show listIncrement ++ "\\tab" - let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = - listMarker ++ dropWhile isDigit xs - insertListMarker ('\\':'f':'i':d:xs) | isDigit d = - listMarker ++ dropWhile isDigit xs - insertListMarker (x:xs) = - x : insertListMarker xs - insertListMarker [] = [] + let listMarker = "\\fi" <> tshow (negate listIncrement) <> " " <> marker <> + "\\tx" <> tshow listIncrement <> "\\tab" + -- Find the first occurrence of \\fi or \\fi-, then replace it and the following + -- digits with the list marker. + let insertListMarker t = case popDigit $ optionDash $ T.drop 3 suff of + Just suff' -> pref <> listMarker <> T.dropWhile isDigit suff' + Nothing -> t + where + (pref, suff) = T.breakOn "\\fi" t + optionDash x = case T.uncons x of + Just ('-', xs) -> xs + _ -> x + popDigit x + | Just (d, xs) <- T.uncons x + , isDigit d = Just xs + | otherwise = Nothing -- insert the list marker into the (processed) first block - return $ insertListMarker first ++ concat rest + return $ insertListMarker first <> T.concat rest -- | Convert definition list item (label, list of blocks) to RTF. definitionListItemToRTF :: PandocMonad m => Alignment -- ^ alignment -> Int -- ^ indent level -> ([Inline],[[Block]]) -- ^ list item (list of blocks) - -> m String + -> m Text definitionListItemToRTF alignment indent (label, defs) = do labelText <- blockToRTF indent alignment (Plain label) itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs) - return $ labelText ++ itemsText + return $ labelText <> itemsText -- | Convert list of inline items to RTF. inlinesToRTF :: PandocMonad m => [Inline] -- ^ list of inlines to convert - -> m String -inlinesToRTF lst = concat <$> mapM inlineToRTF lst + -> m Text +inlinesToRTF lst = T.concat <$> mapM inlineToRTF lst -- | Convert inline item to RTF. inlineToRTF :: PandocMonad m => Inline -- ^ inline to convert - -> m String + -> m Text inlineToRTF (Span _ lst) = inlinesToRTF lst inlineToRTF (Emph lst) = do contents <- inlinesToRTF lst - return $ "{\\i " ++ contents ++ "}" + return $ "{\\i " <> contents <> "}" inlineToRTF (Strong lst) = do contents <- inlinesToRTF lst - return $ "{\\b " ++ contents ++ "}" + return $ "{\\b " <> contents <> "}" inlineToRTF (Strikeout lst) = do contents <- inlinesToRTF lst - return $ "{\\strike " ++ contents ++ "}" + return $ "{\\strike " <> contents <> "}" inlineToRTF (Superscript lst) = do contents <- inlinesToRTF lst - return $ "{\\super " ++ contents ++ "}" + return $ "{\\super " <> contents <> "}" inlineToRTF (Subscript lst) = do contents <- inlinesToRTF lst - return $ "{\\sub " ++ contents ++ "}" + return $ "{\\sub " <> contents <> "}" inlineToRTF (SmallCaps lst) = do contents <- inlinesToRTF lst - return $ "{\\scaps " ++ contents ++ "}" + return $ "{\\scaps " <> contents <> "}" inlineToRTF (Quoted SingleQuote lst) = do contents <- inlinesToRTF lst - return $ "\\u8216'" ++ contents ++ "\\u8217'" + return $ "\\u8216'" <> contents <> "\\u8217'" inlineToRTF (Quoted DoubleQuote lst) = do contents <- inlinesToRTF lst - return $ "\\u8220\"" ++ contents ++ "\\u8221\"" -inlineToRTF (Code _ str) = return $ "{\\f1 " ++ codeStringToRTF str ++ "}" + return $ "\\u8220\"" <> contents <> "\\u8221\"" +inlineToRTF (Code _ str) = return $ "{\\f1 " <> codeStringToRTF str <> "}" inlineToRTF (Str str) = return $ stringToRTF str inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF inlineToRTF (Cite _ lst) = inlinesToRTF lst @@ -385,11 +388,11 @@ inlineToRTF SoftBreak = return " " inlineToRTF Space = return " " inlineToRTF (Link _ text (src, _)) = do contents <- inlinesToRTF text - return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ codeStringToRTF src ++ - "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n" + return $ "{\\field{\\*\\fldinst{HYPERLINK \"" <> codeStringToRTF src <> + "\"}}{\\fldrslt{\\ul\n" <> contents <> "\n}}}\n" inlineToRTF (Image _ _ (source, _)) = - return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}" + return $ "{\\cf1 [image: " <> source <> "]\\cf0}" inlineToRTF (Note contents) = do - body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents - return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - body ++ "}" + body <- T.concat <$> mapM (blockToRTF 0 AlignDefault) contents + return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " <> + body <> "}" diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs index 4dadb1073..2718b3f13 100644 --- a/src/Text/Pandoc/Writers/Roff.hs +++ b/src/Text/Pandoc/Writers/Roff.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Roff Copyright : Copyright (C) 2007-2019 John MacFarlane @@ -24,6 +25,8 @@ import Prelude import Data.Char (ord, isAscii) import Control.Monad.State.Strict import qualified Data.Map as Map +import Data.Text (Text) +import qualified Data.Text as Text import Data.String import Data.Maybe (fromMaybe, isJust, catMaybes) import Text.Pandoc.Class (PandocMonad) @@ -66,36 +69,38 @@ data EscapeMode = AllowUTF8 -- ^ use preferred man escapes | AsciiOnly -- ^ escape everything deriving Show -combiningAccentsMap :: Map.Map Char String +combiningAccentsMap :: Map.Map Char Text combiningAccentsMap = Map.fromList combiningAccents -essentialEscapes :: Map.Map Char String +essentialEscapes :: Map.Map Char Text essentialEscapes = Map.fromList standardEscapes -- | Escape special characters for roff. -escapeString :: EscapeMode -> String -> String -escapeString _ [] = [] -escapeString escapeMode ('\n':'.':xs) = - '\n':'\\':'&':'.':escapeString escapeMode xs -escapeString escapeMode (x:xs) = - case Map.lookup x essentialEscapes of - Just s -> s ++ escapeString escapeMode xs - Nothing - | isAscii x -> x : escapeString escapeMode xs - | otherwise -> - case escapeMode of - AllowUTF8 -> x : escapeString escapeMode xs - AsciiOnly -> - let accents = catMaybes $ takeWhile isJust - (map (\c -> Map.lookup c combiningAccentsMap) xs) - rest = drop (length accents) xs - s = case Map.lookup x characterCodeMap of - Just t -> "\\[" <> unwords (t:accents) <> "]" - Nothing -> "\\[" <> unwords - (printf "u%04X" (ord x) : accents) <> "]" - in s ++ escapeString escapeMode rest +escapeString :: EscapeMode -> Text -> Text +escapeString e = Text.concat . escapeString' e . Text.unpack + where + escapeString' _ [] = [] + escapeString' escapeMode ('\n':'.':xs) = + "\n\\&." : escapeString' escapeMode xs + escapeString' escapeMode (x:xs) = + case Map.lookup x essentialEscapes of + Just s -> s : escapeString' escapeMode xs + Nothing + | isAscii x -> Text.singleton x : escapeString' escapeMode xs + | otherwise -> + case escapeMode of + AllowUTF8 -> Text.singleton x : escapeString' escapeMode xs + AsciiOnly -> + let accents = catMaybes $ takeWhile isJust + (map (\c -> Map.lookup c combiningAccentsMap) xs) + rest = drop (length accents) xs + s = case Map.lookup x characterCodeMap of + Just t -> "\\[" <> Text.unwords (t:accents) <> "]" + Nothing -> "\\[" <> Text.unwords + (Text.pack (printf "u%04X" (ord x)) : accents) <> "]" + in s : escapeString' escapeMode rest -characterCodeMap :: Map.Map Char String +characterCodeMap :: Map.Map Char Text characterCodeMap = Map.fromList characterCodes fontChange :: (HasChars a, IsString a, PandocMonad m) => MS m (Doc a) diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 359a1bb3c..9aa19c2d9 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -83,11 +83,8 @@ metaToContext' :: (Monad m, TemplateTarget a) -> ([Inline] -> m (Doc a)) -> Meta -> m (Context a) -metaToContext' blockWriter inlineWriter (Meta metamap) = do - renderedMap <- mapM (metaValueToVal blockWriter inlineWriter) metamap - return $ Context - $ M.foldrWithKey (\k v x -> M.insert (T.pack k) v x) mempty - $ renderedMap +metaToContext' blockWriter inlineWriter (Meta metamap) = + Context <$> mapM (metaValueToVal blockWriter inlineWriter) metamap -- | Add variables to a template Context, replacing any existing values. addVariablesToContext :: TemplateTarget a @@ -109,8 +106,7 @@ metaValueToVal :: (Monad m, TemplateTarget a) -> MetaValue -> m (Val a) metaValueToVal blockWriter inlineWriter (MetaMap metamap) = - MapVal . Context . M.mapKeys T.pack <$> - mapM (metaValueToVal blockWriter inlineWriter) metamap + MapVal . Context <$> mapM (metaValueToVal blockWriter inlineWriter) metamap metaValueToVal blockWriter inlineWriter (MetaList xs) = ListVal <$> mapM (metaValueToVal blockWriter inlineWriter) xs metaValueToVal _ _ (MetaBool True) = return $ SimpleVal "true" @@ -122,15 +118,15 @@ metaValueToVal _ inlineWriter (MetaInlines is) = SimpleVal <$> inlineWriter is -- | Retrieve a field value from a template context. -getField :: FromContext a b => String -> Context a -> Maybe b -getField field (Context m) = M.lookup (T.pack field) m >>= fromVal +getField :: FromContext a b => T.Text -> Context a -> Maybe b +getField field (Context m) = M.lookup field m >>= fromVal -- | Set a field of a template context. If the field already has a value, -- convert it into a list with the new value appended to the old value(s). -- This is a utility function to be used in preparing template contexts. -setField :: ToContext a b => String -> b -> Context a -> Context a +setField :: ToContext a b => T.Text -> b -> Context a -> Context a setField field val (Context m) = - Context $ M.insertWith combine (T.pack field) (toVal val) m + Context $ M.insertWith combine field (toVal val) m where combine newval (ListVal xs) = ListVal (xs ++ [newval]) combine newval x = ListVal [x, newval] @@ -138,31 +134,31 @@ setField field val (Context m) = -- | Reset a field of a template context. If the field already has a -- value, the new value replaces it. -- This is a utility function to be used in preparing template contexts. -resetField :: ToContext a b => String -> b -> Context a -> Context a +resetField :: ToContext a b => T.Text -> b -> Context a -> Context a resetField field val (Context m) = - Context (M.insert (T.pack field) (toVal val) m) + Context (M.insert field (toVal val) m) -- | Set a field of a template context if it currently has no value. -- If it has a value, do nothing. -- This is a utility function to be used in preparing template contexts. -defField :: ToContext a b => String -> b -> Context a -> Context a +defField :: ToContext a b => T.Text -> b -> Context a -> Context a defField field val (Context m) = - Context (M.insertWith f (T.pack field) (toVal val) m) + Context (M.insertWith f field (toVal val) m) where f _newval oldval = oldval -- Produce an HTML tag with the given pandoc attributes. -tagWithAttrs :: HasChars a => String -> Attr -> Doc a +tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a tagWithAttrs tag (ident,classes,kvs) = hsep - ["<" <> text tag - ,if null ident + ["<" <> text (T.unpack tag) + ,if T.null ident then empty - else "id=" <> doubleQuotes (text ident) + else "id=" <> doubleQuotes (text $ T.unpack ident) ,if null classes then empty - else "class=" <> doubleQuotes (text (unwords classes)) - ,hsep (map (\(k,v) -> text k <> "=" <> - doubleQuotes (text (escapeStringForXML v))) kvs) + else "class=" <> doubleQuotes (text $ T.unpack (T.unwords classes)) + ,hsep (map (\(k,v) -> text (T.unpack k) <> "=" <> + doubleQuotes (text $ T.unpack (escapeStringForXML v))) kvs) ] <> ">" isDisplayMath :: Inline -> Bool @@ -198,20 +194,20 @@ fixDisplayMath (Para lst) not (isDisplayMath x || isDisplayMath y)) lst fixDisplayMath x = x -unsmartify :: WriterOptions -> String -> String -unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs -unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs -unsmartify opts ('\8211':xs) - | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs - | otherwise = "--" ++ unsmartify opts xs -unsmartify opts ('\8212':xs) - | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs - | otherwise = "---" ++ unsmartify opts xs -unsmartify opts ('\8220':xs) = '"' : unsmartify opts xs -unsmartify opts ('\8221':xs) = '"' : unsmartify opts xs -unsmartify opts ('\8216':xs) = '\'' : unsmartify opts xs -unsmartify opts (x:xs) = x : unsmartify opts xs -unsmartify _ [] = [] +unsmartify :: WriterOptions -> T.Text -> T.Text +unsmartify opts = T.concatMap $ \c -> case c of + '\8217' -> "'" + '\8230' -> "..." + '\8211' + | isEnabled Ext_old_dashes opts -> "-" + | otherwise -> "--" + '\8212' + | isEnabled Ext_old_dashes opts -> "--" + | otherwise -> "---" + '\8220' -> "\"" + '\8221' -> "\"" + '\8216' -> "'" + _ -> T.singleton c gridTable :: (Monad m, HasChars a) => WriterOptions @@ -315,22 +311,20 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do body $$ border '-' (repeat AlignDefault) widthsInChars - - -- | Retrieve the metadata value for a given @key@ -- and convert to Bool. -lookupMetaBool :: String -> Meta -> Bool +lookupMetaBool :: T.Text -> Meta -> Bool lookupMetaBool key meta = case lookupMeta key meta of - Just (MetaBlocks _) -> True - Just (MetaInlines _) -> True - Just (MetaString (_:_)) -> True - Just (MetaBool True) -> True - _ -> False + Just (MetaBlocks _) -> True + Just (MetaInlines _) -> True + Just (MetaString x) -> not (T.null x) + Just (MetaBool True) -> True + _ -> False -- | Retrieve the metadata value for a given @key@ -- and extract blocks. -lookupMetaBlocks :: String -> Meta -> [Block] +lookupMetaBlocks :: T.Text -> Meta -> [Block] lookupMetaBlocks key meta = case lookupMeta key meta of Just (MetaBlocks bs) -> bs @@ -340,7 +334,7 @@ lookupMetaBlocks key meta = -- | Retrieve the metadata value for a given @key@ -- and extract inlines. -lookupMetaInlines :: String -> Meta -> [Inline] +lookupMetaInlines :: T.Text -> Meta -> [Inline] lookupMetaInlines key meta = case lookupMeta key meta of Just (MetaString s) -> [Str s] @@ -351,16 +345,15 @@ lookupMetaInlines key meta = -- | Retrieve the metadata value for a given @key@ -- and convert to String. -lookupMetaString :: String -> Meta -> String +lookupMetaString :: T.Text -> Meta -> T.Text lookupMetaString key meta = case lookupMeta key meta of Just (MetaString s) -> s Just (MetaInlines ils) -> stringify ils Just (MetaBlocks bs) -> stringify bs - Just (MetaBool b) -> show b + Just (MetaBool b) -> T.pack (show b) _ -> "" - toSuperscript :: Char -> Maybe Char toSuperscript '1' = Just '\x00B9' toSuperscript '2' = Just '\x00B2' @@ -406,14 +399,14 @@ sectionToListItem opts (Div (ident,_,_) , lev < writerTOCDepth opts] where num = fromMaybe "" $ lookup "number" kvs - addNumber = if null num + addNumber = if T.null num then id else (Span ("",["toc-section-number"],[]) [Str num] :) . (Space :) headerText' = addNumber $ walk (deLink . deNote) ils - headerLink = if null ident + headerLink = if T.null ident then headerText' - else [Link nullAttr headerText' ('#':ident, "")] + else [Link nullAttr headerText' ("#" <> ident, "")] listContents = filter (not . null) $ map (sectionToListItem opts) subsecs sectionToListItem _ _ = [] diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index b9b5aaa85..78f7b2cad 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -14,9 +14,8 @@ Conversion of 'Pandoc' documents to Docbook XML. -} module Text.Pandoc.Writers.TEI (writeTEI) where import Prelude -import Data.Char (toLower) -import Data.List (isPrefixOf, stripPrefix) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (languages, languagesByExtension) @@ -89,13 +88,13 @@ listItemToTEI :: PandocMonad m => WriterOptions -> [Block] -> m (Doc Text) listItemToTEI opts item = inTagsIndented "item" <$> blocksToTEI opts (map plainToPara item) -imageToTEI :: PandocMonad m => WriterOptions -> Attr -> String -> m (Doc Text) +imageToTEI :: PandocMonad m => WriterOptions -> Attr -> Text -> m (Doc Text) imageToTEI opts attr src = return $ selfClosingTag "graphic" $ ("url", src) : idFromAttr opts attr ++ dims where dims = go Width "width" ++ go Height "height" go dir dstr = case dimension dir attr of - Just a -> [(dstr, show a)] + Just a -> [(dstr, tshow a)] Nothing -> [] -- | Convert a Pandoc block element to TEI. @@ -111,7 +110,7 @@ blockToTEI opts (Div attr@(_,"section":_,_) (Header lvl _ ils : xs)) = divType = case lvl of n | n == -1 -> "part" | n == 0 -> "chapter" - | n >= 1 && n <= 5 -> "level" ++ show n + | n >= 1 && n <= 5 -> "level" <> tshow n | otherwise -> "section" titleContents <- inlinesToTEI opts ils contents <- blocksToTEI opts xs' @@ -150,15 +149,15 @@ blockToTEI opts (LineBlock lns) = blockToTEI opts (BlockQuote blocks) = inTagsIndented "quote" <$> blocksToTEI opts blocks blockToTEI _ (CodeBlock (_,classes,_) str) = - return $ text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <> - flush (text (escapeStringForXML str) <> cr <> text "</ab>") + return $ literal ("<ab type='codeblock " <> lang <> "'>") <> cr <> + flush (literal (escapeStringForXML str) <> cr <> text "</ab>") where lang = if null langs then "" else escapeStringForXML (head langs) - isLang l = map toLower l `elem` map (map toLower) languages + isLang l = T.toLower l `elem` map T.toLower languages langsFrom s = if isLang s then [s] - else languagesByExtension . map toLower $ s + else languagesByExtension . T.toLower $ s langs = concatMap langsFrom classes blockToTEI opts (BulletList lst) = do let attribs = [("type", "unordered")] @@ -178,13 +177,13 @@ blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = do else do fi <- blocksToTEI opts $ map plainToPara first re <- listItemsToTEI opts rest - return $ inTags True "item" [("n",show start)] fi $$ re + return $ inTags True "item" [("n",tshow start)] fi $$ re return $ inTags True "list" attribs items blockToTEI opts (DefinitionList lst) = do let attribs = [("type", "definition")] inTags True "list" attribs <$> deflistItemsToTEI opts lst blockToTEI _ b@(RawBlock f str) - | f == "tei" = return $ text str + | f == "tei" = return $ literal str -- raw TEI block (should such a thing exist). | otherwise = do report $ BlockNotRendered b @@ -230,7 +229,7 @@ inlinesToTEI opts lst = hcat <$> mapM (inlineToTEI opts) lst -- | Convert an inline element to TEI. inlineToTEI :: PandocMonad m => WriterOptions -> Inline -> m (Doc Text) -inlineToTEI _ (Str str) = return $ text $ escapeStringForXML str +inlineToTEI _ (Str str) = return $ literal $ escapeStringForXML str inlineToTEI opts (Emph lst) = inTags False "hi" [("rendition","simple:italic")] <$> inlinesToTEI opts lst inlineToTEI opts (Strong lst) = @@ -254,16 +253,16 @@ inlineToTEI opts (Cite _ lst) = inlineToTEI opts (Span _ ils) = inlinesToTEI opts ils inlineToTEI _ (Code _ str) = return $ - inTags False "seg" [("type","code")] $ text (escapeStringForXML str) + inTags False "seg" [("type","code")] $ literal (escapeStringForXML str) -- Distinguish display from inline math by wrapping the former in a "figure." inlineToTEI _ (Math t str) = return $ case t of InlineMath -> inTags False "formula" [("notation","TeX")] $ - text str + literal str DisplayMath -> inTags True "figure" [("type","math")] $ - inTags False "formula" [("notation","TeX")] $ text str + inTags False "formula" [("notation","TeX")] $ literal str -inlineToTEI _ il@(RawInline f x) | f == "tei" = return $ text x +inlineToTEI _ il@(RawInline f x) | f == "tei" = return $ literal x | otherwise = empty <$ report (InlineNotRendered il) inlineToTEI _ LineBreak = return $ selfClosingTag "lb" [] @@ -273,8 +272,8 @@ inlineToTEI _ Space = inlineToTEI _ SoftBreak = return space inlineToTEI opts (Link attr txt (src, _)) - | Just email <- stripPrefix "mailto:" src = do - let emailLink = text $ + | Just email <- T.stripPrefix "mailto:" src = do + let emailLink = literal $ escapeStringForXML email case txt of [Str s] | escapeURI s == email -> @@ -283,17 +282,17 @@ inlineToTEI opts (Link attr txt (src, _)) linktext <- inlinesToTEI opts txt return $ linktext <+> char '(' <> emailLink <> char ')' | otherwise = - (if "#" `isPrefixOf` src - then inTags False "ref" $ ("target", drop 1 src) + (if "#" `T.isPrefixOf` src + then inTags False "ref" $ ("target", T.drop 1 src) : idFromAttr opts attr else inTags False "ref" $ ("target", src) : idFromAttr opts attr ) <$> inlinesToTEI opts txt inlineToTEI opts (Image attr description (src, tit)) = do - let titleDoc = if null tit + let titleDoc = if T.null tit then empty else inTags False "figDesc" [] - (text $ escapeStringForXML tit) + (literal $ escapeStringForXML tit) imageDesc <- if null description then return empty else inTags False "head" [] @@ -303,8 +302,8 @@ inlineToTEI opts (Image attr description (src, tit)) = do inlineToTEI opts (Note contents) = inTagsIndented "note" <$> blocksToTEI opts contents -idFromAttr :: WriterOptions -> Attr -> [(String, String)] +idFromAttr :: WriterOptions -> Attr -> [(Text, Text)] idFromAttr opts (id',_,_) = - if null id' + if T.null id' then [] - else [("xml:id", writerIdentifierPrefix opts ++ id')] + else [("xml:id", writerIdentifierPrefix opts <> id')] diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 5c5eb7fd3..387858fd3 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -39,7 +39,7 @@ import Text.Printf (printf) data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout , stEscapeComma :: Bool -- in a context where we need @comma - , stIdentifiers :: Set.Set String -- header ids used already + , stIdentifiers :: Set.Set Text -- header ids used already , stOptions :: WriterOptions -- writer options } @@ -85,7 +85,7 @@ pandocToTexinfo options (Pandoc meta blocks) = do Just tpl -> renderTemplate tpl context -- | Escape things as needed for Texinfo. -stringToTexinfo :: String -> String +stringToTexinfo :: Text -> Text stringToTexinfo = escapeStringUsing texinfoEscapes where texinfoEscapes = [ ('{', "@{") , ('}', "@}") @@ -106,8 +106,8 @@ escapeCommas parser = do return res -- | Puts contents into Texinfo command. -inCmd :: String -> Doc Text -> Doc Text -inCmd cmd contents = char '@' <> text cmd <> braces contents +inCmd :: Text -> Doc Text -> Doc Text +inCmd cmd contents = char '@' <> literal cmd <> braces contents -- | Convert Pandoc block element to Texinfo. blockToTexinfo :: PandocMonad m @@ -122,13 +122,14 @@ blockToTexinfo (Plain lst) = inlineListToTexinfo lst -- title beginning with fig: indicates that the image is a figure -blockToTexinfo (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else (\c -> text "@caption" <> braces c) `fmap` - inlineListToTexinfo txt - img <- inlineToTexinfo (Image attr txt (src,tit)) - return $ text "@float" $$ img $$ capt $$ text "@end float" +blockToTexinfo (Para [Image attr txt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt = do + capt <- if null txt + then return empty + else (\c -> text "@caption" <> braces c) `fmap` + inlineListToTexinfo txt + img <- inlineToTexinfo (Image attr txt (src,tit)) + return $ text "@float" $$ img $$ capt $$ text "@end float" blockToTexinfo (Para lst) = inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo @@ -145,13 +146,13 @@ blockToTexinfo (BlockQuote lst) = do blockToTexinfo (CodeBlock _ str) = return $ blankline $$ text "@verbatim" $$ - flush (text str) $$ + flush (literal str) $$ text "@end verbatim" <> blankline blockToTexinfo b@(RawBlock f str) - | f == "texinfo" = return $ text str + | f == "texinfo" = return $ literal str | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" + return $ text "@tex" $$ literal str $$ text "@end tex" | otherwise = do report $ BlockNotRendered b return empty @@ -211,18 +212,18 @@ blockToTexinfo (Header level (ident,_,_) lst) txt <- inlineListToTexinfo lst idsUsed <- gets stIdentifiers opts <- gets stOptions - let id' = if null ident + let id' = if T.null ident then uniqueIdent (writerExtensions opts) lst idsUsed else ident modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } sec <- seccmd level return $ if (level > 0) && (level <= 4) then blankline <> text "@node " <> node $$ - text sec <> txt $$ - text "@anchor" <> braces (text $ '#':id') + literal sec <> txt $$ + text "@anchor" <> braces (literal $ "#" <> id') else txt where - seccmd :: PandocMonad m => Int -> TI m String + seccmd :: PandocMonad m => Int -> TI m Text seccmd 1 = return "@chapter " seccmd 2 = return "@section " seccmd 3 = return "@subsection " @@ -266,13 +267,13 @@ tableRowToTexinfo :: PandocMonad m tableRowToTexinfo = tableAnyRowToTexinfo "@item " tableAnyRowToTexinfo :: PandocMonad m - => String + => Text -> [Alignment] -> [[Block]] -> TI m (Doc Text) tableAnyRowToTexinfo itemtype aligns cols = zipWithM alignedBlock aligns cols >>= - return . (text itemtype $$) . foldl (\row item -> row $$ + return . (literal itemtype $$) . foldl (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty alignedBlock :: PandocMonad m @@ -375,8 +376,8 @@ inlineListToTexinfo lst = hcat <$> mapM inlineToTexinfo lst inlineListForNode :: PandocMonad m => [Inline] -- ^ Inlines to convert -> TI m (Doc Text) -inlineListForNode = return . text . stringToTexinfo . - filter (not . disallowedInNode) . stringify +inlineListForNode = return . literal . stringToTexinfo . + T.filter (not . disallowedInNode) . stringify -- periods, commas, colons, and parentheses are disallowed in node names disallowedInNode :: Char -> Bool @@ -413,7 +414,7 @@ inlineToTexinfo (SmallCaps lst) = inCmd "sc" <$> inlineListToTexinfo lst inlineToTexinfo (Code _ str) = - return $ text $ "@code{" ++ stringToTexinfo str ++ "}" + return $ literal $ "@code{" <> stringToTexinfo str <> "}" inlineToTexinfo (Quoted SingleQuote lst) = do contents <- inlineListToTexinfo lst @@ -425,12 +426,12 @@ inlineToTexinfo (Quoted DoubleQuote lst) = do inlineToTexinfo (Cite _ lst) = inlineListToTexinfo lst -inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) -inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str +inlineToTexinfo (Str str) = return $ literal (stringToTexinfo str) +inlineToTexinfo (Math _ str) = return $ inCmd "math" $ literal str inlineToTexinfo il@(RawInline f str) | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" - | f == "texinfo" = return $ text str + return $ text "@tex" $$ literal str $$ text "@end tex" + | f == "texinfo" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty @@ -443,35 +444,36 @@ inlineToTexinfo SoftBreak = do WrapPreserve -> return cr inlineToTexinfo Space = return space -inlineToTexinfo (Link _ txt (src@('#':_), _)) = do - contents <- escapeCommas $ inlineListToTexinfo txt - return $ text "@ref" <> - braces (text (stringToTexinfo src) <> text "," <> contents) -inlineToTexinfo (Link _ txt (src, _)) = - case txt of - [Str x] | escapeURI x == src -> -- autolink - return $ text $ "@url{" ++ x ++ "}" - _ -> do contents <- escapeCommas $ inlineListToTexinfo txt - let src1 = stringToTexinfo src - return $ text ("@uref{" ++ src1 ++ ",") <> contents <> - char '}' +inlineToTexinfo (Link _ txt (src, _)) + | Just ('#', _) <- T.uncons src = do + contents <- escapeCommas $ inlineListToTexinfo txt + return $ text "@ref" <> + braces (literal (stringToTexinfo src) <> text "," <> contents) + | otherwise = case txt of + [Str x] | escapeURI x == src -> -- autolink + return $ literal $ "@url{" <> x <> "}" + _ -> do + contents <- escapeCommas $ inlineListToTexinfo txt + let src1 = stringToTexinfo src + return $ literal ("@uref{" <> src1 <> ",") <> contents <> + char '}' inlineToTexinfo (Image attr alternate (source, _)) = do content <- escapeCommas $ inlineListToTexinfo alternate opts <- gets stOptions let showDim dim = case dimension dim attr of - (Just (Pixel a)) -> showInInch opts (Pixel a) ++ "in" + (Just (Pixel a)) -> showInInch opts (Pixel a) <> "in" (Just (Percent _)) -> "" - (Just d) -> show d + (Just d) -> tshow d Nothing -> "" - return $ text ("@image{" ++ base ++ ',':(showDim Width) ++ ',':(showDim Height) ++ ",") - <> content <> text "," <> text (ext ++ "}") + return $ literal ("@image{" <> base <> "," <> showDim Width <> "," <> showDim Height <> ",") + <> content <> text "," <> literal (ext <> "}") where - ext = drop 1 $ takeExtension source' - base = dropExtension source' + ext = T.drop 1 $ T.pack $ takeExtension source' + base = T.pack $ dropExtension source' source' = if isURI source - then source - else unEscapeString source + then T.unpack source + else unEscapeString $ T.unpack source inlineToTexinfo (Note contents) = do contents' <- blockListToTexinfo contents diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 1a7c386e0..c0c5727d7 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Textile Copyright : Copyright (C) 2010-2019 John MacFarlane @@ -16,8 +18,8 @@ module Text.Pandoc.Writers.Textile ( writeTextile ) where import Prelude import Control.Monad.State.Strict import Data.Char (isSpace) -import Data.List (intercalate) -import Data.Text (Text, pack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -30,10 +32,10 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.XML (escapeStringForXML) data WriterState = WriterState { - stNotes :: [String] -- Footnotes - , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" - , stStartNum :: Maybe Int -- Start number if first list item - , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list + stNotes :: [Text] -- Footnotes + , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" + , stStartNum :: Maybe Int -- Start number if first list item + , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list } type TW = StateT WriterState @@ -52,11 +54,11 @@ pandocToTextile :: PandocMonad m => WriterOptions -> Pandoc -> TW m Text pandocToTextile opts (Pandoc meta blocks) = do metadata <- metaToContext opts - (fmap (literal . pack) . blockListToTextile opts) - (fmap (literal . pack) . inlineListToTextile opts) meta + (fmap literal . blockListToTextile opts) + (fmap literal . inlineListToTextile opts) meta body <- blockListToTextile opts blocks - notes <- gets $ unlines . reverse . stNotes - let main = pack $ body ++ if null notes then "" else "\n\n" ++ notes + notes <- gets $ T.unlines . reverse . stNotes + let main = body <> if T.null notes then "" else "\n\n" <> notes let context = defField "body" main metadata return $ case writerTemplate opts of @@ -72,7 +74,7 @@ withUseTags action = do return result -- | Escape one character as needed for Textile. -escapeCharForTextile :: Char -> String +escapeCharForTextile :: Char -> Text escapeCharForTextile x = case x of '&' -> "&" '<' -> "<" @@ -88,17 +90,17 @@ escapeCharForTextile x = case x of '\x2013' -> " - " '\x2019' -> "'" '\x2026' -> "..." - c -> [c] + c -> T.singleton c -- | Escape string as needed for Textile. -escapeStringForTextile :: String -> String -escapeStringForTextile = concatMap escapeCharForTextile +escapeTextForTextile :: Text -> Text +escapeTextForTextile = T.concatMap escapeCharForTextile -- | Convert Pandoc block element to Textile. blockToTextile :: PandocMonad m => WriterOptions -- ^ Options -> Block -- ^ Block element - -> TW m String + -> TW m Text blockToTextile _ Null = return "" @@ -106,24 +108,24 @@ blockToTextile opts (Div attr bs) = do let startTag = render Nothing $ tagWithAttrs "div" attr let endTag = "</div>" contents <- blockListToTextile opts bs - return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n" + return $ startTag <> "\n\n" <> contents <> "\n\n" <> endTag <> "\n" blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines -- title beginning with fig: indicates that the image is a figure -blockToTextile opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToTextile opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do capt <- blockToTextile opts (Para txt) im <- inlineToTextile opts (Image attr txt (src,tit)) - return $ im ++ "\n" ++ capt + return $ im <> "\n" <> capt blockToTextile opts (Para inlines) = do useTags <- gets stUseTags listLevel <- gets stListLevel contents <- inlineListToTextile opts inlines return $ if useTags - then "<p>" ++ contents ++ "</p>" - else contents ++ if null listLevel then "\n" else "" + then "<p>" <> contents <> "</p>" + else contents <> if null listLevel then "\n" else "" blockToTextile opts (LineBlock lns) = blockToTextile opts $ linesToPara lns @@ -138,41 +140,41 @@ blockToTextile _ HorizontalRule = return "<hr />\n" blockToTextile opts (Header level (ident,classes,keyvals) inlines) = do contents <- inlineListToTextile opts inlines - let identAttr = if null ident then "" else '#':ident - let attribs = if null identAttr && null classes + let identAttr = if T.null ident then "" else "#" <> ident + let attribs = if T.null identAttr && null classes then "" - else "(" ++ unwords classes ++ identAttr ++ ")" - let lang = maybe "" (\x -> "[" ++ x ++ "]") $ lookup "lang" keyvals - let styles = maybe "" (\x -> "{" ++ x ++ "}") $ lookup "style" keyvals - let prefix = 'h' : show level ++ attribs ++ styles ++ lang ++ ". " - return $ prefix ++ contents ++ "\n" - -blockToTextile _ (CodeBlock (_,classes,_) str) | any (all isSpace) (lines str) = - return $ "<pre" ++ classes' ++ ">\n" ++ escapeStringForXML str ++ + else "(" <> T.unwords classes <> identAttr <> ")" + let lang = maybe "" (\x -> "[" <> x <> "]") $ lookup "lang" keyvals + let styles = maybe "" (\x -> "{" <> x <> "}") $ lookup "style" keyvals + let prefix = "h" <> tshow level <> attribs <> styles <> lang <> ". " + return $ prefix <> contents <> "\n" + +blockToTextile _ (CodeBlock (_,classes,_) str) | any (T.all isSpace) (T.lines str) = + return $ "<pre" <> classes' <> ">\n" <> escapeStringForXML str <> "\n</pre>\n" where classes' = if null classes then "" - else " class=\"" ++ unwords classes ++ "\"" + else " class=\"" <> T.unwords classes <> "\"" blockToTextile _ (CodeBlock (_,classes,_) str) = - return $ "bc" ++ classes' ++ ". " ++ str ++ "\n\n" + return $ "bc" <> classes' <> ". " <> str <> "\n\n" where classes' = if null classes then "" - else "(" ++ unwords classes ++ ")" + else "(" <> T.unwords classes <> ")" blockToTextile opts (BlockQuote bs@[Para _]) = do contents <- blockListToTextile opts bs - return $ "bq. " ++ contents ++ "\n\n" + return $ "bq. " <> contents <> "\n\n" blockToTextile opts (BlockQuote blocks) = do contents <- blockListToTextile opts blocks - return $ "<blockquote>\n\n" ++ contents ++ "\n</blockquote>\n" + return $ "<blockquote>\n\n" <> contents <> "\n</blockquote>\n" blockToTextile opts (Table [] aligns widths headers rows') | all (==0) widths = do - hs <- mapM (liftM (("_. " ++) . stripTrailingNewlines) . blockListToTextile opts) headers - let cellsToRow cells = "|" ++ intercalate "|" cells ++ "|" - let header = if all null headers then "" else cellsToRow hs ++ "\n" + hs <- mapM (liftM (("_. " <>) . stripTrailingNewlines) . blockListToTextile opts) headers + let cellsToRow cells = "|" <> T.intercalate "|" cells <> "|" + let header = if all null headers then "" else cellsToRow hs <> "\n" let blocksToCell (align, bs) = do contents <- stripTrailingNewlines <$> blockListToTextile opts bs let alignMarker = case align of @@ -180,32 +182,32 @@ blockToTextile opts (Table [] aligns widths headers rows') | AlignRight -> ">. " AlignCenter -> "=. " AlignDefault -> "" - return $ alignMarker ++ contents + return $ alignMarker <> contents let rowToCells = mapM blocksToCell . zip aligns bs <- mapM rowToCells rows' - let body = unlines $ map cellsToRow bs - return $ header ++ body + let body = T.unlines $ map cellsToRow bs + return $ header <> body blockToTextile opts (Table capt aligns widths headers rows') = do - let alignStrings = map alignmentToString aligns + let alignStrings = map alignmentToText aligns captionDoc <- if null capt then return "" else do c <- inlineListToTextile opts capt - return $ "<caption>" ++ c ++ "</caption>\n" - let percent w = show (truncate (100*w) :: Integer) ++ "%" + return $ "<caption>" <> c <> "</caption>\n" + let percent w = tshow (truncate (100*w) :: Integer) <> "%" let coltags = if all (== 0.0) widths then "" - else unlines $ map - (\w -> "<col width=\"" ++ percent w ++ "\" />") widths + else T.unlines $ map + (\w -> "<col width=\"" <> percent w <> "\" />") widths head' <- if all null headers then return "" else do hs <- tableRowToTextile opts alignStrings 0 headers - return $ "<thead>\n" ++ hs ++ "\n</thead>\n" + return $ "<thead>\n" <> hs <> "\n</thead>\n" body' <- zipWithM (tableRowToTextile opts alignStrings) [1..] rows' - return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++ - "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n" + return $ "<table>\n" <> captionDoc <> coltags <> head' <> + "<tbody>\n" <> T.unlines body' <> "</tbody>\n</table>\n" blockToTextile opts x@(BulletList items) = do oldUseTags <- gets stUseTags @@ -213,13 +215,13 @@ blockToTextile opts x@(BulletList items) = do if useTags then do contents <- withUseTags $ mapM (listItemToTextile opts) items - return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n" + return $ "<ul>\n" <> vcat contents <> "\n</ul>\n" else do - modify $ \s -> s { stListLevel = stListLevel s ++ "*" } + modify $ \s -> s { stListLevel = stListLevel s <> "*" } level <- gets $ length . stListLevel contents <- mapM (listItemToTextile opts) items modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ (if level > 1 then "" else "\n") + return $ vcat contents <> (if level > 1 then "" else "\n") blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do oldUseTags <- gets stUseTags @@ -227,10 +229,10 @@ blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do if useTags then do contents <- withUseTags $ mapM (listItemToTextile opts) items - return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ + return $ "<ol" <> listAttribsToString attribs <> ">\n" <> vcat contents <> "\n</ol>\n" else do - modify $ \s -> s { stListLevel = stListLevel s ++ "#" + modify $ \s -> s { stListLevel = stListLevel s <> "#" , stStartNum = if start > 1 then Just start else Nothing } @@ -238,52 +240,52 @@ blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do contents <- mapM (listItemToTextile opts) items modify $ \s -> s { stListLevel = init (stListLevel s), stStartNum = Nothing } - return $ vcat contents ++ (if level > 1 then "" else "\n") + return $ vcat contents <> (if level > 1 then "" else "\n") blockToTextile opts (DefinitionList items) = do contents <- withUseTags $ mapM (definitionListItemToTextile opts) items - return $ "<dl>\n" ++ vcat contents ++ "\n</dl>\n" + return $ "<dl>\n" <> vcat contents <> "\n</dl>\n" -- Auxiliary functions for lists: -- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String +listAttribsToString :: ListAttributes -> Text listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle + let numstyle' = camelCaseToHyphenated $ tshow numstyle in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ + then " start=\"" <> tshow startnum <> "\"" + else "") <> (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + then " style=\"list-style-type: " <> numstyle' <> ";\"" else "") -- | Convert bullet or ordered list item (list of blocks) to Textile. listItemToTextile :: PandocMonad m - => WriterOptions -> [Block] -> TW m String + => WriterOptions -> [Block] -> TW m Text listItemToTextile opts items = do contents <- blockListToTextile opts items useTags <- gets stUseTags if useTags - then return $ "<li>" ++ contents ++ "</li>" + then return $ "<li>" <> contents <> "</li>" else do marker <- gets stListLevel mbstart <- gets stStartNum case mbstart of Just n -> do modify $ \s -> s{ stStartNum = Nothing } - return $ marker ++ show n ++ " " ++ contents - Nothing -> return $ marker ++ " " ++ contents + return $ T.pack marker <> tshow n <> " " <> contents + Nothing -> return $ T.pack marker <> " " <> contents -- | Convert definition list item (label, list of blocks) to Textile. definitionListItemToTextile :: PandocMonad m => WriterOptions -> ([Inline],[[Block]]) - -> TW m String + -> TW m Text definitionListItemToTextile opts (label, items) = do labelText <- inlineListToTextile opts label contents <- mapM (blockListToTextile opts) items - return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents) + return $ "<dt>" <> labelText <> "</dt>\n" <> + T.intercalate "\n" (map (\d -> "<dd>" <> d <> "</dd>") contents) -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -318,18 +320,18 @@ isPlainOrPara (Para _) = True isPlainOrPara _ = False -- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" +vcat :: [Text] -> Text +vcat = T.intercalate "\n" -- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki, -- and Textile writers, and should be abstracted out.) tableRowToTextile :: PandocMonad m => WriterOptions - -> [String] + -> [Text] -> Int -> [[Block]] - -> TW m String + -> TW m Text tableRowToTextile opts alignStrings rownum cols' = do let celltype = if rownum == 0 then "th" else "td" let rowclass = case rownum of @@ -339,10 +341,10 @@ tableRowToTextile opts alignStrings rownum cols' = do cols'' <- zipWithM (\alignment item -> tableItemToTextile opts celltype alignment item) alignStrings cols' - return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" + return $ "<tr class=\"" <> rowclass <> "\">\n" <> T.unlines cols'' <> "</tr>" -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of +alignmentToText :: Alignment -> Text +alignmentToText alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" @@ -350,13 +352,13 @@ alignmentToString alignment = case alignment of tableItemToTextile :: PandocMonad m => WriterOptions - -> String - -> String + -> Text + -> Text -> [Block] - -> TW m String + -> TW m Text tableItemToTextile opts celltype align' item = do - let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ - x ++ "</" ++ celltype ++ ">" + let mkcell x = "<" <> celltype <> " align=\"" <> align' <> "\">" <> + x <> "</" <> celltype <> ">" contents <- blockListToTextile opts item return $ mkcell contents @@ -364,73 +366,73 @@ tableItemToTextile opts celltype align' item = do blockListToTextile :: PandocMonad m => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> TW m String + -> TW m Text blockListToTextile opts blocks = vcat <$> mapM (blockToTextile opts) blocks -- | Convert list of Pandoc inline elements to Textile. inlineListToTextile :: PandocMonad m - => WriterOptions -> [Inline] -> TW m String + => WriterOptions -> [Inline] -> TW m Text inlineListToTextile opts lst = - concat <$> mapM (inlineToTextile opts) lst + T.concat <$> mapM (inlineToTextile opts) lst -- | Convert Pandoc inline element to Textile. -inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m String +inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m Text inlineToTextile opts (Span _ lst) = inlineListToTextile opts lst inlineToTextile opts (Emph lst) = do contents <- inlineListToTextile opts lst - return $ if '_' `elem` contents - then "<em>" ++ contents ++ "</em>" - else "_" ++ contents ++ "_" + return $ if '_' `elemText` contents + then "<em>" <> contents <> "</em>" + else "_" <> contents <> "_" inlineToTextile opts (Strong lst) = do contents <- inlineListToTextile opts lst - return $ if '*' `elem` contents - then "<strong>" ++ contents ++ "</strong>" - else "*" ++ contents ++ "*" + return $ if '*' `elemText` contents + then "<strong>" <> contents <> "</strong>" + else "*" <> contents <> "*" inlineToTextile opts (Strikeout lst) = do contents <- inlineListToTextile opts lst - return $ if '-' `elem` contents - then "<del>" ++ contents ++ "</del>" - else "-" ++ contents ++ "-" + return $ if '-' `elemText` contents + then "<del>" <> contents <> "</del>" + else "-" <> contents <> "-" inlineToTextile opts (Superscript lst) = do contents <- inlineListToTextile opts lst - return $ if '^' `elem` contents - then "<sup>" ++ contents ++ "</sup>" - else "[^" ++ contents ++ "^]" + return $ if '^' `elemText` contents + then "<sup>" <> contents <> "</sup>" + else "[^" <> contents <> "^]" inlineToTextile opts (Subscript lst) = do contents <- inlineListToTextile opts lst - return $ if '~' `elem` contents - then "<sub>" ++ contents ++ "</sub>" - else "[~" ++ contents ++ "~]" + return $ if '~' `elemText` contents + then "<sub>" <> contents <> "</sub>" + else "[~" <> contents <> "~]" inlineToTextile opts (SmallCaps lst) = inlineListToTextile opts lst inlineToTextile opts (Quoted SingleQuote lst) = do contents <- inlineListToTextile opts lst - return $ "'" ++ contents ++ "'" + return $ "'" <> contents <> "'" inlineToTextile opts (Quoted DoubleQuote lst) = do contents <- inlineListToTextile opts lst - return $ "\"" ++ contents ++ "\"" + return $ "\"" <> contents <> "\"" inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst inlineToTextile _ (Code _ str) = - return $ if '@' `elem` str - then "<tt>" ++ escapeStringForXML str ++ "</tt>" - else "@" ++ str ++ "@" + return $ if '@' `elemText` str + then "<tt>" <> escapeStringForXML str <> "</tt>" + else "@" <> str <> "@" -inlineToTextile _ (Str str) = return $ escapeStringForTextile str +inlineToTextile _ (Str str) = return $ escapeTextForTextile str inlineToTextile _ (Math _ str) = - return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</span>" + return $ "<span class=\"math\">" <> escapeStringForXML str <> "</span>" inlineToTextile opts il@(RawInline f str) | f == Format "html" || f == Format "textile" = return str @@ -455,36 +457,36 @@ inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do _ -> inlineListToTextile opts txt let classes = if null cls || cls == ["uri"] && label == "$" then "" - else "(" ++ unwords cls ++ ")" - return $ "\"" ++ classes ++ label ++ "\":" ++ src + else "(" <> T.unwords cls <> ")" + return $ "\"" <> classes <> label <> "\":" <> src inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do alt' <- inlineListToTextile opts alt - let txt = if null tit - then if null alt' + let txt = if T.null tit + then if T.null alt' then "" - else "(" ++ alt' ++ ")" - else "(" ++ tit ++ ")" + else "(" <> alt' <> ")" + else "(" <> tit <> ")" classes = if null cls then "" - else "(" ++ unwords cls ++ ")" - showDim dir = let toCss str = Just $ show dir ++ ":" ++ str ++ ";" + else "(" <> T.unwords cls <> ")" + showDim dir = let toCss str = Just $ tshow dir <> ":" <> str <> ";" in case dimension dir attr of - Just (Percent a) -> toCss $ show (Percent a) - Just dim -> toCss $ showInPixel opts dim ++ "px" + Just (Percent a) -> toCss $ tshow (Percent a) + Just dim -> toCss $ showInPixel opts dim <> "px" Nothing -> Nothing styles = case (showDim Width, showDim Height) of - (Just w, Just h) -> "{" ++ w ++ h ++ "}" - (Just w, Nothing) -> "{" ++ w ++ "height:auto;}" - (Nothing, Just h) -> "{" ++ "width:auto;" ++ h ++ "}" + (Just w, Just h) -> "{" <> w <> h <> "}" + (Just w, Nothing) -> "{" <> w <> "height:auto;}" + (Nothing, Just h) -> "{" <> "width:auto;" <> h <> "}" (Nothing, Nothing) -> "" - return $ "!" ++ classes ++ styles ++ source ++ txt ++ "!" + return $ "!" <> classes <> styles <> source <> txt <> "!" inlineToTextile opts (Note contents) = do curNotes <- gets stNotes let newnum = length curNotes + 1 contents' <- blockListToTextile opts contents - let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n" + let thisnote = "fn" <> tshow newnum <> ". " <> contents' <> "\n" modify $ \s -> s { stNotes = thisnote : curNotes } - return $ "[" ++ show newnum ++ "]" + return $ "[" <> tshow newnum <> "]" -- note - may not work for notes with multiple blocks diff --git a/src/Text/Pandoc/Writers/XWiki.hs b/src/Text/Pandoc/Writers/XWiki.hs index e6cd0b086..7afe845c7 100644 --- a/src/Text/Pandoc/Writers/XWiki.hs +++ b/src/Text/Pandoc/Writers/XWiki.hs @@ -38,12 +38,12 @@ import Prelude import Control.Monad.Reader (ReaderT, asks, local, runReaderT) import qualified Data.Set as Set import qualified Data.Text as Text -import Data.Text (Text, intercalate, pack, replace, split) +import Data.Text (Text, intercalate, replace, split) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared (escapeURI, isURI, linesToPara) +import Text.Pandoc.Shared import Text.Pandoc.Writers.MediaWiki (highlightingLangs) data WriterState = WriterState { @@ -65,10 +65,10 @@ vcat = intercalate "\n" -- If an id is provided, we can generate an anchor using the id macro -- https://extensions.xwiki.org/xwiki/bin/view/Extension/Id%20Macro -genAnchor :: String -> Text -genAnchor id' = if null id' +genAnchor :: Text -> Text +genAnchor id' = if Text.null id' then "" - else pack $ "{{id name=\"" ++ id' ++ "\" /}}" + else "{{id name=\"" <> id' <> "\" /}}" blockListToXWiki :: PandocMonad m => [Block] -> XWikiReader m Text blockListToXWiki blocks = @@ -93,7 +93,7 @@ blockToXWiki (LineBlock lns) = blockToXWiki $ linesToPara lns blockToXWiki b@(RawBlock f str) - | f == Format "xwiki" = return $ pack str + | f == Format "xwiki" = return str | otherwise = "" <$ report (BlockNotRendered b) blockToXWiki HorizontalRule = return "\n----\n" @@ -140,7 +140,7 @@ tableCellXWiki :: PandocMonad m => Bool -> [Block] -> XWikiReader m Text tableCellXWiki isHeader cell = do contents <- blockListToXWiki cell let isMultiline = (length . split (== '\n')) contents > 1 - let contents' = intercalate contents $ if isMultiline then [pack "(((", pack ")))"] else [mempty, mempty] + let contents' = intercalate contents $ if isMultiline then ["(((", ")))"] else [mempty, mempty] let cellBorder = if isHeader then "|=" else "|" return $ cellBorder <> contents' @@ -151,7 +151,7 @@ inlineListToXWiki lst = inlineToXWiki :: PandocMonad m => Inline -> XWikiReader m Text -inlineToXWiki (Str str) = return $ escapeXWikiString $ pack str +inlineToXWiki (Str str) = return $ escapeXWikiString str inlineToXWiki Space = return " " @@ -193,39 +193,37 @@ inlineToXWiki (Quoted DoubleQuote lst) = do contents <- inlineListToXWiki lst return $ "“" <> contents <> "”" -inlineToXWiki (Code (_,classes,_) contents') = do +inlineToXWiki (Code (_,classes,_) contents) = do let at = Set.fromList classes `Set.intersection` highlightingLangs - let contents = pack contents' return $ case Set.toList at of [] -> "{{code}}" <> contents <> "{{/code}}" - (l:_) -> "{{code language=\"" <> (pack l) <> "\"}}" <> contents <> "{{/code}}" + (l:_) -> "{{code language=\"" <> l <> "\"}}" <> contents <> "{{/code}}" inlineToXWiki (Cite _ lst) = inlineListToXWiki lst -- FIXME: optionally support this (plugin?) -inlineToXWiki (Math _ str) = return $ "{{formula}}" <> (pack str) <> "{{/formula}}" +inlineToXWiki (Math _ str) = return $ "{{formula}}" <> str <> "{{/formula}}" inlineToXWiki il@(RawInline frmt str) - | frmt == Format "xwiki" = return $ pack str + | frmt == Format "xwiki" = return str | otherwise = "" <$ report (InlineNotRendered il) -- TODO: Handle anchors inlineToXWiki (Link (id', _, _) txt (src, _)) = do label <- inlineListToXWiki txt case txt of - [Str s] | isURI src && escapeURI s == src -> return $ (pack src) <> (genAnchor id') - _ -> return $ "[[" <> label <> ">>" <> (pack src) <> "]]" <> (genAnchor id') + [Str s] | isURI src && escapeURI s == src -> return $ src <> (genAnchor id') + _ -> return $ "[[" <> label <> ">>" <> src <> "]]" <> (genAnchor id') inlineToXWiki (Image _ alt (source, tit)) = do alt' <- inlineListToXWiki alt let - titText = pack tit params = intercalate " " $ filter (not . Text.null) [ if Text.null alt' then "" else "alt=\"" <> alt' <> "\"", - if Text.null titText then "" else "title=\"" <> titText <> "\"" + if Text.null tit then "" else "title=\"" <> tit <> "\"" ] - return $ "[[image:" <> (pack source) <> (if Text.null params then "" else "||" <> params) <> "]]" + return $ "[[image:" <> source <> (if Text.null params then "" else "||" <> params) <> "]]" inlineToXWiki (Note contents) = do contents' <- blockListToXWiki contents diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index e1bc40351..7f7821fe2 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ZimWiki Copyright : Copyright (C) 2008-2019 John MacFarlane, 2017-2019 Alex Ivkin @@ -18,11 +20,12 @@ import Prelude import Control.Monad (zipWithM) import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) -import Data.List (intercalate, isInfixOf, isPrefixOf, transpose) +import Data.List (transpose) import qualified Data.Map as Map import Text.DocLayout (render, literal) import Data.Maybe (fromMaybe) -import Data.Text (Text, breakOnAll, pack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -30,13 +33,12 @@ import Text.Pandoc.Logging import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) -import Text.Pandoc.Shared (escapeURI, isURI, linesToPara, removeFormatting, - substitute, trimr) +import Text.Pandoc.Shared (escapeURI, isURI, linesToPara, removeFormatting, trimr) import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared (defField, metaToContext) data WriterState = WriterState { - stIndent :: String, -- Indent after the marker at the beginning of list items + stIndent :: Text, -- Indent after the marker at the beginning of list items stInTable :: Bool, -- Inside a table stInLink :: Bool -- Inside a link description } @@ -54,10 +56,10 @@ writeZimWiki opts document = evalStateT (pandocToZimWiki opts document) def pandocToZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> ZW m Text pandocToZimWiki opts (Pandoc meta blocks) = do metadata <- metaToContext opts - (fmap (literal . pack . trimr) . blockListToZimWiki opts) - (fmap (literal . pack . trimr) . inlineListToZimWiki opts) + (fmap (literal . trimr) . blockListToZimWiki opts) + (fmap (literal . trimr) . inlineListToZimWiki opts) meta - main <- pack <$> blockListToZimWiki opts blocks + main <- blockListToZimWiki opts blocks --let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n" let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata @@ -67,39 +69,39 @@ pandocToZimWiki opts (Pandoc meta blocks) = do Nothing -> main -- | Escape special characters for ZimWiki. -escapeString :: String -> String -escapeString = substitute "__" "''__''" . - substitute "**" "''**''" . - substitute "~~" "''~~''" . - substitute "//" "''//''" +escapeText :: Text -> Text +escapeText = T.replace "__" "''__''" . + T.replace "**" "''**''" . + T.replace "~~" "''~~''" . + T.replace "//" "''//''" -- | Convert Pandoc block element to ZimWiki. -blockToZimWiki :: PandocMonad m => WriterOptions -> Block -> ZW m String +blockToZimWiki :: PandocMonad m => WriterOptions -> Block -> ZW m Text blockToZimWiki _ Null = return "" blockToZimWiki opts (Div _attrs bs) = do contents <- blockListToZimWiki opts bs - return $ contents ++ "\n" + return $ contents <> "\n" blockToZimWiki opts (Plain inlines) = inlineListToZimWiki opts inlines -- title beginning with fig: indicates that the image is a figure -- ZimWiki doesn't support captions - so combine together alt and caption into alt -blockToZimWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToZimWiki opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do capt <- if null txt then return "" - else (" " ++) `fmap` inlineListToZimWiki opts txt + else (" " <>) `fmap` inlineListToZimWiki opts txt let opt = if null txt then "" - else "|" ++ if null tit then capt else tit ++ capt - return $ "{{" ++ src ++ imageDims opts attr ++ opt ++ "}}\n" + else "|" <> if T.null tit then capt else tit <> capt + return $ "{{" <> src <> imageDims opts attr <> opt <> "}}\n" blockToZimWiki opts (Para inlines) = do indent <- gets stIndent -- useTags <- gets stUseTags contents <- inlineListToZimWiki opts inlines - return $ contents ++ if null indent then "\n" else "" + return $ contents <> if T.null indent then "\n" else "" blockToZimWiki opts (LineBlock lns) = blockToZimWiki opts $ linesToPara lns @@ -115,63 +117,63 @@ blockToZimWiki _ HorizontalRule = return "\n----\n" blockToZimWiki opts (Header level _ inlines) = do contents <- inlineListToZimWiki opts $ removeFormatting inlines -- emphasis, links etc. not allowed in headers - let eqs = replicate ( 7 - level ) '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + let eqs = T.replicate ( 7 - level ) "=" + return $ eqs <> " " <> contents <> " " <> eqs <> "\n" blockToZimWiki _ (CodeBlock (_,classes,_) str) = do -- Remap languages into the gtksourceview2 convention that ZimWiki source code plugin is using let langal = [("javascript", "js"), ("bash", "sh"), ("winbatch", "dosbatch")] let langmap = Map.fromList langal return $ case classes of - [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- turn no lang block into a quote block - (x:_) -> "{{{code: lang=\"" ++ - fromMaybe x (Map.lookup x langmap) ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec + [] -> "'''\n" <> cleanupCode str <> "\n'''\n" -- turn no lang block into a quote block + (x:_) -> "{{{code: lang=\"" <> + fromMaybe x (Map.lookup x langmap) <> "\" linenumbers=\"True\"\n" <> str <> "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec blockToZimWiki opts (BlockQuote blocks) = do contents <- blockListToZimWiki opts blocks - return $ unlines $ map ("> " ++) $ lines contents + return $ T.unlines $ map ("> " <>) $ T.lines contents blockToZimWiki opts (Table capt aligns _ headers rows) = do captionDoc <- if null capt then return "" else do c <- inlineListToZimWiki opts capt - return $ "" ++ c ++ "\n" + return $ "" <> c <> "\n" headers' <- if all null headers then zipWithM (tableItemToZimWiki opts) aligns (head rows) else mapM (inlineListToZimWiki opts . removeFormatting)headers -- emphasis, links etc. are not allowed in table headers rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows - let widths = map (maximum . map length) $ transpose (headers':rows') + let widths = map (maximum . map T.length) $ transpose (headers':rows') let padTo (width, al) s = - case width - length s of + case width - T.length s of x | x > 0 -> if al == AlignLeft || al == AlignDefault - then s ++ replicate x ' ' + then s <> T.replicate x " " else if al == AlignRight - then replicate x ' ' ++ s - else replicate (x `div` 2) ' ' ++ - s ++ replicate (x - x `div` 2) ' ' + then T.replicate x " " <> s + else T.replicate (x `div` 2) " " <> + s <> T.replicate (x - x `div` 2) " " | otherwise -> s let borderCell (width, al) _ - | al == AlignLeft = ":"++ replicate (width-1) '-' - | al == AlignDefault = replicate width '-' - | al == AlignRight = replicate (width-1) '-' ++ ":" - | otherwise = ":" ++ replicate (width-2) '-' ++ ":" - let underheader = "|" ++ intercalate "|" (zipWith borderCell (zip widths aligns) headers') ++ "|" - let renderRow cells = "|" ++ intercalate "|" (zipWith padTo (zip widths aligns) cells) ++ "|" - return $ captionDoc ++ - (if null headers' then "" else renderRow headers' ++ "\n") ++ underheader ++ "\n" ++ - unlines (map renderRow rows') + | al == AlignLeft = ":"<> T.replicate (width-1) "-" + | al == AlignDefault = T.replicate width "-" + | al == AlignRight = T.replicate (width-1) "-" <> ":" + | otherwise = ":" <> T.replicate (width-2) "-" <> ":" + let underheader = "|" <> T.intercalate "|" (zipWith borderCell (zip widths aligns) headers') <> "|" + let renderRow cells = "|" <> T.intercalate "|" (zipWith padTo (zip widths aligns) cells) <> "|" + return $ captionDoc <> + (if null headers' then "" else renderRow headers' <> "\n") <> underheader <> "\n" <> + T.unlines (map renderRow rows') blockToZimWiki opts (BulletList items) = do contents <- mapM (listItemToZimWiki opts) items indent <- gets stIndent - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" blockToZimWiki opts (OrderedList _ items) = do contents <- zipWithM (orderedListItemToZimWiki opts) [1..] items indent <- gets stIndent - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" blockToZimWiki opts (DefinitionList items) = do contents <- mapM (definitionListItemToZimWiki opts) items @@ -180,71 +182,71 @@ blockToZimWiki opts (DefinitionList items) = do definitionListItemToZimWiki :: PandocMonad m => WriterOptions -> ([Inline],[[Block]]) - -> ZW m String + -> ZW m Text definitionListItemToZimWiki opts (label, items) = do labelText <- inlineListToZimWiki opts label contents <- mapM (blockListToZimWiki opts) items indent <- gets stIndent - return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents + return $ indent <> "* **" <> labelText <> "** " <> T.concat contents -- Auxiliary functions for lists: -indentFromHTML :: PandocMonad m => WriterOptions -> String -> ZW m String +indentFromHTML :: PandocMonad m => WriterOptions -> Text -> ZW m Text indentFromHTML _ str = do indent <- gets stIndent - if "<li>" `isInfixOf` str + if "<li>" `T.isInfixOf` str then return indent - else if "</li>" `isInfixOf` str + else if "</li>" `T.isInfixOf` str then return "\n" - else if "<li value=" `isInfixOf` str + else if "<li value=" `T.isInfixOf` str then return "" - else if "<ol>" `isInfixOf` str + else if "<ol>" `T.isInfixOf` str then do let olcount=countSubStrs "<ol>" str - modify $ \s -> s { stIndent = stIndent s ++ - replicate olcount '\t' } + modify $ \s -> s { stIndent = stIndent s <> + T.replicate olcount "\t" } return "" - else if "</ol>" `isInfixOf` str + else if "</ol>" `T.isInfixOf` str then do let olcount=countSubStrs "/<ol>" str - modify $ \s -> s{ stIndent = drop olcount (stIndent s) } + modify $ \s -> s{ stIndent = T.drop olcount (stIndent s) } return "" else return "" -countSubStrs :: String -> String -> Int -countSubStrs sub str = length $ breakOnAll (pack sub) (pack str) +countSubStrs :: Text -> Text -> Int +countSubStrs sub str = length $ T.breakOnAll sub str -cleanupCode :: String -> String -cleanupCode = substitute "<nowiki>" "" . substitute "</nowiki>" "" +cleanupCode :: Text -> Text +cleanupCode = T.replace "<nowiki>" "" . T.replace "</nowiki>" "" -vcat :: [String] -> String -vcat = intercalate "\n" +vcat :: [Text] -> Text +vcat = T.intercalate "\n" -- | Convert bullet list item (list of blocks) to ZimWiki. -listItemToZimWiki :: PandocMonad m => WriterOptions -> [Block] -> ZW m String +listItemToZimWiki :: PandocMonad m => WriterOptions -> [Block] -> ZW m Text listItemToZimWiki opts items = do indent <- gets stIndent - modify $ \s -> s { stIndent = indent ++ "\t" } + modify $ \s -> s { stIndent = indent <> "\t" } contents <- blockListToZimWiki opts items modify $ \s -> s{ stIndent = indent } - return $ indent ++ "* " ++ contents + return $ indent <> "* " <> contents -- | Convert ordered list item (list of blocks) to ZimWiki. orderedListItemToZimWiki :: PandocMonad m - => WriterOptions -> Int -> [Block] -> ZW m String + => WriterOptions -> Int -> [Block] -> ZW m Text orderedListItemToZimWiki opts itemnum items = do indent <- gets stIndent - modify $ \s -> s { stIndent = indent ++ "\t" } + modify $ \s -> s { stIndent = indent <> "\t" } contents <- blockListToZimWiki opts items modify $ \s -> s{ stIndent = indent } - return $ indent ++ show itemnum ++ ". " ++ contents + return $ indent <> T.pack (show itemnum) <> ". " <> contents -- Auxiliary functions for tables: tableItemToZimWiki :: PandocMonad m - => WriterOptions -> Alignment -> [Block] -> ZW m String + => WriterOptions -> Alignment -> [Block] -> ZW m Text tableItemToZimWiki opts align' item = do let mkcell x = (if align' == AlignRight || align' == AlignCenter then " " - else "") ++ x ++ + else "") <> x <> (if align' == AlignLeft || align' == AlignCenter then " " else "") @@ -255,45 +257,45 @@ tableItemToZimWiki opts align' item = do -- | Convert list of Pandoc block elements to ZimWiki. blockListToZimWiki :: PandocMonad m - => WriterOptions -> [Block] -> ZW m String + => WriterOptions -> [Block] -> ZW m Text blockListToZimWiki opts blocks = vcat <$> mapM (blockToZimWiki opts) blocks -- | Convert list of Pandoc inline elements to ZimWiki. inlineListToZimWiki :: PandocMonad m - => WriterOptions -> [Inline] -> ZW m String -inlineListToZimWiki opts lst = concat <$> mapM (inlineToZimWiki opts) lst + => WriterOptions -> [Inline] -> ZW m Text +inlineListToZimWiki opts lst = T.concat <$> mapM (inlineToZimWiki opts) lst -- | Convert Pandoc inline element to ZimWiki. inlineToZimWiki :: PandocMonad m - => WriterOptions -> Inline -> ZW m String + => WriterOptions -> Inline -> ZW m Text inlineToZimWiki opts (Emph lst) = do contents <- inlineListToZimWiki opts lst - return $ "//" ++ contents ++ "//" + return $ "//" <> contents <> "//" inlineToZimWiki opts (Strong lst) = do contents <- inlineListToZimWiki opts lst - return $ "**" ++ contents ++ "**" + return $ "**" <> contents <> "**" inlineToZimWiki opts (Strikeout lst) = do contents <- inlineListToZimWiki opts lst - return $ "~~" ++ contents ++ "~~" + return $ "~~" <> contents <> "~~" inlineToZimWiki opts (Superscript lst) = do contents <- inlineListToZimWiki opts lst - return $ "^{" ++ contents ++ "}" + return $ "^{" <> contents <> "}" inlineToZimWiki opts (Subscript lst) = do contents <- inlineListToZimWiki opts lst - return $ "_{" ++ contents ++ "}" + return $ "_{" <> contents <> "}" inlineToZimWiki opts (Quoted SingleQuote lst) = do contents <- inlineListToZimWiki opts lst - return $ "\8216" ++ contents ++ "\8217" + return $ "\8216" <> contents <> "\8217" inlineToZimWiki opts (Quoted DoubleQuote lst) = do contents <- inlineListToZimWiki opts lst - return $ "\8220" ++ contents ++ "\8221" + return $ "\8220" <> contents <> "\8221" inlineToZimWiki opts (Span _attrs ils) = inlineListToZimWiki opts ils @@ -301,24 +303,24 @@ inlineToZimWiki opts (SmallCaps lst) = inlineListToZimWiki opts lst inlineToZimWiki opts (Cite _ lst) = inlineListToZimWiki opts lst -inlineToZimWiki _ (Code _ str) = return $ "''" ++ str ++ "''" +inlineToZimWiki _ (Code _ str) = return $ "''" <> str <> "''" inlineToZimWiki _ (Str str) = do inTable <- gets stInTable inLink <- gets stInLink if inTable - then return $ substitute "|" "\\|" . escapeString $ str + then return $ T.replace "|" "\\|" . escapeText $ str else if inLink then return str - else return $ escapeString str + else return $ escapeText str -inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped +inlineToZimWiki _ (Math mathType str) = return $ delim <> str <> delim -- note: str should NOT be escaped where delim = case mathType of DisplayMath -> "$$" InlineMath -> "$" --- | f == Format "html" = return $ "<html>" ++ str ++ "</html>" +-- | f == Format "html" = return $ "<html>" <> str <> "</html>" inlineToZimWiki opts il@(RawInline f str) | f == Format "zimwiki" = return str | f == Format "html" = indentFromHTML opts str @@ -347,38 +349,39 @@ inlineToZimWiki opts (Link _ txt (src, _)) = do modify $ \s -> s { stInLink = False } let label'= if inTable then "" -- no label is allowed in a table - else "|"++label + else "|"<>label case txt of - [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" + [Str s] | "mailto:" `T.isPrefixOf` src -> return $ "<" <> s <> ">" | escapeURI s == src -> return src _ -> if isURI src - then return $ "[[" ++ src ++ label' ++ "]]" - else return $ "[[" ++ src' ++ label' ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page + then return $ "[[" <> src <> label' <> "]]" + else return $ "[[" <> src' <> label' <> "]]" + where + -- with leading / it's a link to a help page + src' = fromMaybe src $ T.stripPrefix "/" src + inlineToZimWiki opts (Image attr alt (source, tit)) = do alt' <- inlineListToZimWiki opts alt inTable <- gets stInTable let txt = case (tit, alt, inTable) of ("",[], _) -> "" - ("", _, False ) -> "|" ++ alt' - (_ , _, False ) -> "|" ++ tit + ("", _, False ) -> "|" <> alt' + (_ , _, False ) -> "|" <> tit (_ , _, True ) -> "" - return $ "{{" ++ source ++ imageDims opts attr ++ txt ++ "}}" + return $ "{{" <> source <> imageDims opts attr <> txt <> "}}" inlineToZimWiki opts (Note contents) = do -- no concept of notes in zim wiki, use a text block contents' <- blockListToZimWiki opts contents - return $ " **{Note:** " ++ trimr contents' ++ "**}**" + return $ " **{Note:** " <> trimr contents' <> "**}**" -imageDims :: WriterOptions -> Attr -> String +imageDims :: WriterOptions -> Attr -> Text imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) where toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing checkPct maybeDim = maybeDim - go (Just w) Nothing = "?" ++ w - go (Just w) (Just h) = "?" ++ w ++ "x" ++ h - go Nothing (Just h) = "?0x" ++ h + go (Just w) Nothing = "?" <> w + go (Just w) (Just h) = "?" <> w <> "x" <> h + go Nothing (Just h) = "?0x" <> h go Nothing Nothing = "" diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index f0cdf8302..21f6d4d46 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.XML Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -31,17 +33,17 @@ import qualified Data.Map as M import Data.String -- | Escape one character as needed for XML. -escapeCharForXML :: Char -> String +escapeCharForXML :: Char -> Text escapeCharForXML x = case x of '&' -> "&" '<' -> "<" '>' -> ">" '"' -> """ - c -> [c] + c -> T.singleton c -- | Escape string as needed for XML. Entity references are not preserved. -escapeStringForXML :: String -> String -escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar +escapeStringForXML :: Text -> Text +escapeStringForXML = T.concatMap escapeCharForXML . T.filter isLegalXMLChar where isLegalXMLChar c = c == '\t' || c == '\n' || c == '\r' || (c >= '\x20' && c <= '\xD7FF') || (c >= '\xE000' && c <= '\xFFFD') || @@ -49,44 +51,43 @@ escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar -- see https://www.w3.org/TR/xml/#charsets -- | Escape newline characters as -escapeNls :: String -> String -escapeNls (x:xs) - | x == '\n' = " " ++ escapeNls xs - | otherwise = x : escapeNls xs -escapeNls [] = [] +escapeNls :: Text -> Text +escapeNls = T.concatMap $ \x -> case x of + '\n' -> " " + c -> T.singleton c -- | Return a text object with a string of formatted XML attributes. -attributeList :: (HasChars a, IsString a) => [(String, String)] -> Doc a +attributeList :: (HasChars a, IsString a) => [(Text, Text)] -> Doc a attributeList = hcat . map - (\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++ - escapeNls (escapeStringForXML b) ++ "\"")) + (\(a, b) -> text (T.unpack $ " " <> escapeStringForXML a <> "=\"" <> + escapeNls (escapeStringForXML b) <> "\"")) -- | Put the supplied contents between start and end tags of tagType, -- with specified attributes and (if specified) indentation. -inTags:: (HasChars a, IsString a) - => Bool -> String -> [(String, String)] -> Doc a -> Doc a +inTags :: (HasChars a, IsString a) + => Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a inTags isIndented tagType attribs contents = - let openTag = char '<' <> text tagType <> attributeList attribs <> + let openTag = char '<' <> text (T.unpack tagType) <> attributeList attribs <> char '>' - closeTag = text "</" <> text tagType <> char '>' + closeTag = text "</" <> text (T.unpack tagType) <> char '>' in if isIndented then openTag $$ nest 2 contents $$ closeTag else openTag <> contents <> closeTag -- | Return a self-closing tag of tagType with specified attributes selfClosingTag :: (HasChars a, IsString a) - => String -> [(String, String)] -> Doc a + => Text -> [(Text, Text)] -> Doc a selfClosingTag tagType attribs = - char '<' <> text tagType <> attributeList attribs <> text " />" + char '<' <> text (T.unpack tagType) <> attributeList attribs <> text " />" -- | Put the supplied contents between start and end tags of tagType. inTagsSimple :: (HasChars a, IsString a) - => String -> Doc a -> Doc a + => Text -> Doc a -> Doc a inTagsSimple tagType = inTags False tagType [] -- | Put the supplied contents in indented block btw start and end tags. inTagsIndented :: (HasChars a, IsString a) - => String -> Doc a -> Doc a + => Text -> Doc a -> Doc a inTagsIndented tagType = inTags True tagType [] -- | Escape all non-ascii characters using numerical entities. @@ -118,18 +119,21 @@ html5EntityMap = foldr go mempty htmlEntities -- Unescapes XML entities -fromEntities :: String -> String -fromEntities ('&':xs) = - case lookupEntity ent' of - Just c -> c ++ fromEntities rest - Nothing -> '&' : fromEntities xs - where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of - (zs,';':ys) -> (zs,ys) - (zs, ys) -> (zs,ys) - ent' = case ent of - '#':'X':ys -> '#':'x':ys -- workaround tagsoup bug - '#':_ -> ent - _ -> ent ++ ";" - -fromEntities (x:xs) = x : fromEntities xs -fromEntities [] = [] +fromEntities :: Text -> Text +fromEntities = T.pack . fromEntities' + +fromEntities' :: Text -> String +fromEntities' (T.uncons -> Just ('&', xs)) = + case lookupEntity $ T.unpack ent' of + Just c -> c <> fromEntities' rest + Nothing -> "&" <> fromEntities' xs + where (ent, rest) = case T.break (\c -> isSpace c || c == ';') xs of + (zs,T.uncons -> Just (';',ys)) -> (zs,ys) + (zs, ys) -> (zs,ys) + ent' + | Just ys <- T.stripPrefix "#X" ent = "#x" <> ys -- workaround tagsoup bug + | Just ('#', _) <- T.uncons ent = ent + | otherwise = ent <> ";" +fromEntities' t = case T.uncons t of + Just (x, xs) -> x : fromEntities' xs + Nothing -> "" |