aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Shared.hs
diff options
context:
space:
mode:
authordespresc <christian.j.j.despres@gmail.com>2019-11-04 16:12:37 -0500
committerJohn MacFarlane <jgm@berkeley.edu>2019-11-12 16:03:45 -0800
commit90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch)
tree4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Shared.hs
parentd3966372f5049eea56213b069fc4d70d8af9144c (diff)
downloadpandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884. + Use pandoc-types 1.20 and texmath 0.12. + Text is now used instead of String, with a few exceptions. + In the MediaBag module, some of the types using Strings were switched to use FilePath instead (not Text). + In the Parsing module, new parsers `manyChar`, `many1Char`, `manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`, `mantyUntilChar` have been added: these are like their unsuffixed counterparts but pack some or all of their output. + `glob` in Text.Pandoc.Class still takes String since it seems to be intended as an interface to Glob, which uses strings. It seems to be used only once in the package, in the EPUB writer, so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r--src/Text/Pandoc/Shared.hs293
1 files changed, 175 insertions, 118 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 797a0a0b0..926116e23 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -8,6 +8,7 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Shared
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -22,14 +23,20 @@ Utility functions and definitions used by the various Pandoc modules.
module Text.Pandoc.Shared (
-- * List processing
splitBy,
+ splitTextBy,
splitByIndices,
splitStringByIndices,
+ splitTextByIndices,
substitute,
ordNub,
-- * Text processing
ToString (..),
+ ToText (..),
+ tshow,
backslashEscapes,
escapeStringUsing,
+ elemText,
+ notElemText,
stripTrailingNewlines,
trim,
triml,
@@ -37,6 +44,7 @@ module Text.Pandoc.Shared (
trimMath,
stripFirstAndLast,
camelCaseToHyphenated,
+ camelCaseStrToHyphenated,
toRomanNumeral,
escapeURI,
tabFilter,
@@ -90,6 +98,7 @@ module Text.Pandoc.Shared (
defaultBlocksSeparator,
-- * Safe read
safeRead,
+ safeStrRead,
-- * User data directory
defaultUserDataDirs,
-- * Version
@@ -133,8 +142,8 @@ import Text.DocLayout (charWidth)
import Text.Pandoc.Walk
-- | Version number of pandoc library.
-pandocVersion :: String
-pandocVersion = showVersion version
+pandocVersion :: T.Text
+pandocVersion = T.pack $ showVersion version
--
-- List processing
@@ -148,6 +157,13 @@ splitBy isSep lst =
rest' = dropWhile isSep rest
in first:splitBy isSep rest'
+splitTextBy :: (Char -> Bool) -> T.Text -> [T.Text]
+splitTextBy isSep t
+ | T.null t = []
+ | otherwise = let (first, rest) = T.break isSep t
+ rest' = T.dropWhile isSep rest
+ in first : splitTextBy isSep rest'
+
splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x) xs) rest
@@ -160,6 +176,9 @@ splitStringByIndices (x:xs) lst =
let (first, rest) = splitAt' x lst in
first : splitStringByIndices (map (\y -> y - x) xs) rest
+splitTextByIndices :: [Int] -> T.Text -> [T.Text]
+splitTextByIndices ns = fmap T.pack . splitStringByIndices ns . T.unpack
+
splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ [] = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
@@ -195,89 +214,115 @@ instance ToString String where
instance ToString T.Text where
toString = T.unpack
+class ToText a where
+ toText :: a -> T.Text
+
+instance ToText String where
+ toText = T.pack
+
+instance ToText T.Text where
+ toText = id
+
+tshow :: Show a => a -> T.Text
+tshow = T.pack . show
+
-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char] -- ^ list of special characters to escape
- -> [(Char, String)]
-backslashEscapes = map (\ch -> (ch, ['\\',ch]))
+ -> [(Char, T.Text)]
+backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
-- | Escape a string of characters, using an association list of
-- characters and strings.
-escapeStringUsing :: [(Char, String)] -> String -> String
-escapeStringUsing _ [] = ""
-escapeStringUsing escapeTable (x:xs) =
- case lookup x escapeTable of
- Just str -> str ++ rest
- Nothing -> x:rest
- where rest = escapeStringUsing escapeTable xs
+escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
+escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
+
+-- | @True@ exactly when the @Char@ appears in the @Text@.
+elemText :: Char -> T.Text -> Bool
+elemText c = T.any (== c)
+
+-- | @True@ exactly when the @Char@ does not appear in the @Text@.
+notElemText :: Char -> T.Text -> Bool
+notElemText c = T.all (/= c)
-- | Strip trailing newlines from string.
-stripTrailingNewlines :: String -> String
-stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
+stripTrailingNewlines :: T.Text -> T.Text
+stripTrailingNewlines = T.dropWhileEnd (== '\n')
-- | Remove leading and trailing space (including newlines) from string.
-trim :: String -> String
-trim = triml . trimr
+trim :: T.Text -> T.Text
+trim = T.dropAround (`elemText` " \r\n\t")
-- | Remove leading space (including newlines) from string.
-triml :: String -> String
-triml = dropWhile (`elem` " \r\n\t")
+triml :: T.Text -> T.Text
+triml = T.dropWhile (`elemText` " \r\n\t")
-- | Remove trailing space (including newlines) from string.
-trimr :: String -> String
-trimr = reverse . triml . reverse
+trimr :: T.Text -> T.Text
+trimr = T.dropWhileEnd (`elemText` " \r\n\t")
-- | Trim leading space and trailing space unless after \.
-trimMath :: String -> String
-trimMath = triml . reverse . stripspace . reverse
+trimMath :: T.Text -> T.Text
+trimMath = triml . T.reverse . stripBeginSpace . T.reverse -- no Text.spanEnd
where
- stripspace (c1:c2:cs)
- | c1 `elem` [' ','\t','\n','\r']
- , c2 /= '\\' = stripspace (c2:cs)
- stripspace cs = cs
+ stripBeginSpace t
+ | T.null pref = t
+ | Just ('\\', _) <- T.uncons suff = T.cons (T.last pref) suff
+ | otherwise = suff
+ where
+ (pref, suff) = T.span (`elemText` " \t\n\r") t
-- | Strip leading and trailing characters from string
-stripFirstAndLast :: String -> String
-stripFirstAndLast str =
- drop 1 $ take (length str - 1) str
+stripFirstAndLast :: T.Text -> T.Text
+stripFirstAndLast t = case T.uncons t of
+ Just (_, t') -> case T.unsnoc t' of
+ Just (t'', _) -> t''
+ _ -> t'
+ _ -> ""
-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-camelCaseToHyphenated :: String -> String
-camelCaseToHyphenated [] = ""
-camelCaseToHyphenated (a:b:rest)
+camelCaseToHyphenated :: T.Text -> T.Text
+camelCaseToHyphenated = T.pack . camelCaseStrToHyphenated . T.unpack
+
+-- This may not work as expected on general Unicode, if it contains
+-- letters with a longer lower case form than upper case. I don't know
+-- what the camel case practices of affected scripts are, though.
+camelCaseStrToHyphenated :: String -> String
+camelCaseStrToHyphenated [] = ""
+camelCaseStrToHyphenated (a:b:rest)
| isLower a
- , isUpper b = a:'-':toLower b:camelCaseToHyphenated rest
+ , isUpper b = a:'-':toLower b:camelCaseStrToHyphenated rest
-- handle ABCDef = abc-def
-camelCaseToHyphenated (a:b:c:rest)
+camelCaseStrToHyphenated (a:b:c:rest)
| isUpper a
, isUpper b
- , isLower c = toLower a:'-':toLower b:camelCaseToHyphenated (c:rest)
-camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest
+ , isLower c = toLower a:'-':toLower b:camelCaseStrToHyphenated (c:rest)
+camelCaseStrToHyphenated (a:rest) = toLower a:camelCaseStrToHyphenated rest
-- | Convert number < 4000 to uppercase roman numeral.
-toRomanNumeral :: Int -> String
+toRomanNumeral :: Int -> T.Text
toRomanNumeral x
| x >= 4000 || x < 0 = "?"
- | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
- | x >= 900 = "CM" ++ toRomanNumeral (x - 900)
- | x >= 500 = "D" ++ toRomanNumeral (x - 500)
- | x >= 400 = "CD" ++ toRomanNumeral (x - 400)
- | x >= 100 = "C" ++ toRomanNumeral (x - 100)
- | x >= 90 = "XC" ++ toRomanNumeral (x - 90)
- | x >= 50 = "L" ++ toRomanNumeral (x - 50)
- | x >= 40 = "XL" ++ toRomanNumeral (x - 40)
- | x >= 10 = "X" ++ toRomanNumeral (x - 10)
+ | x >= 1000 = "M" <> toRomanNumeral (x - 1000)
+ | x >= 900 = "CM" <> toRomanNumeral (x - 900)
+ | x >= 500 = "D" <> toRomanNumeral (x - 500)
+ | x >= 400 = "CD" <> toRomanNumeral (x - 400)
+ | x >= 100 = "C" <> toRomanNumeral (x - 100)
+ | x >= 90 = "XC" <> toRomanNumeral (x - 90)
+ | x >= 50 = "L" <> toRomanNumeral (x - 50)
+ | x >= 40 = "XL" <> toRomanNumeral (x - 40)
+ | x >= 10 = "X" <> toRomanNumeral (x - 10)
| x == 9 = "IX"
- | x >= 5 = "V" ++ toRomanNumeral (x - 5)
+ | x >= 5 = "V" <> toRomanNumeral (x - 5)
| x == 4 = "IV"
- | x >= 1 = "I" ++ toRomanNumeral (x - 1)
+ | x >= 1 = "I" <> toRomanNumeral (x - 1)
| otherwise = ""
-- | Escape whitespace and some punctuation characters in URI.
-escapeURI :: String -> String
-escapeURI = escapeURIString (not . needsEscaping)
- where needsEscaping c = isSpace c || c `elem`
- ['<','>','|','"','{','}','[',']','^', '`']
+escapeURI :: T.Text -> T.Text
+escapeURI = T.pack . escapeURIString (not . needsEscaping) . T.unpack
+ where needsEscaping c = isSpace c || c `elemText` "<>|\"{}[]^`"
+
-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
tabFilter :: Int -- ^ Tab stop
@@ -304,8 +349,11 @@ crFilter = T.filter (/= '\r')
-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
-- or equal to 1583, but MS Word only accepts dates starting 1601).
-normalizeDate :: String -> Maybe String
-normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
+normalizeDate :: T.Text -> Maybe T.Text
+normalizeDate = fmap T.pack . normalizeDate' . T.unpack
+
+normalizeDate' :: String -> Maybe String
+normalizeDate' s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
where rejectBadYear day = case toGregorian day of
(y, _, _) | y >= 1601 && y <= 9999 -> Just day
@@ -321,26 +369,26 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
-- | Generate infinite lazy list of markers for an ordered list,
-- depending on list attributes.
-orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
+orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [T.Text]
orderedListMarkers (start, numstyle, numdelim) =
- let singleton c = [c]
- nums = case numstyle of
- DefaultStyle -> map show [start..]
- Example -> map show [start..]
- Decimal -> map show [start..]
+ let nums = case numstyle of
+ DefaultStyle -> map tshow [start..]
+ Example -> map tshow [start..]
+ Decimal -> map tshow [start..]
UpperAlpha -> drop (start - 1) $ cycle $
- map singleton ['A'..'Z']
+ map T.singleton ['A'..'Z']
LowerAlpha -> drop (start - 1) $ cycle $
- map singleton ['a'..'z']
+ map T.singleton ['a'..'z']
UpperRoman -> map toRomanNumeral [start..]
- LowerRoman -> map (map toLower . toRomanNumeral) [start..]
+ LowerRoman -> map (T.toLower . toRomanNumeral) [start..]
inDelim str = case numdelim of
- DefaultDelim -> str ++ "."
- Period -> str ++ "."
- OneParen -> str ++ ")"
- TwoParens -> "(" ++ str ++ ")"
+ DefaultDelim -> str <> "."
+ Period -> str <> "."
+ OneParen -> str <> ")"
+ TwoParens -> "(" <> str <> ")"
in map inDelim nums
+
-- | Extract the leading and trailing spaces from inside an inline element
-- and place them outside the element. SoftBreaks count as Spaces for
-- these purposes.
@@ -387,15 +435,16 @@ deQuote x = x
-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
-- labels).
-stringify :: Walkable Inline a => a -> String
+stringify :: Walkable Inline a => a -> T.Text
stringify = query go . walk (deNote . deQuote)
- where go :: Inline -> [Char]
+ where go :: Inline -> T.Text
go Space = " "
go SoftBreak = " "
go (Str x) = x
go (Code _ x) = x
go (Math _ x) = x
- go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
+ go (RawInline (Format "html") (T.unpack -> ('<':'b':'r':_)))
+ = " " -- see #2105
go LineBreak = " "
go _ = ""
@@ -407,7 +456,7 @@ stringify = query go . walk (deNote . deQuote)
capitalize :: Walkable Inline a => a -> a
capitalize = walk go
where go :: Inline -> Inline
- go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
+ go (Str s) = Str $ T.toUpper s
go x = x
-- | Change final list item from @Para@ to @Plain@ if the list contains
@@ -463,7 +512,7 @@ isPara _ = False
-- | Convert Pandoc inline list to plain text identifier. HTML
-- identifiers must start with a letter, and may contain only
-- letters, digits, and the characters _-.
-inlineListToIdentifier :: Extensions -> [Inline] -> String
+inlineListToIdentifier :: Extensions -> [Inline] -> T.Text
inlineListToIdentifier exts =
dropNonLetter . filterAscii . toIdent . stringify . walk unEmojify
where
@@ -476,23 +525,23 @@ inlineListToIdentifier exts =
unEmoji x = x
dropNonLetter
| extensionEnabled Ext_gfm_auto_identifiers exts = id
- | otherwise = dropWhile (not . isAlpha)
+ | otherwise = T.dropWhile (not . isAlpha)
filterAscii
| extensionEnabled Ext_ascii_identifiers exts
- = mapMaybe toAsciiChar
+ = T.pack . mapMaybe toAsciiChar . T.unpack
| otherwise = id
toIdent
| extensionEnabled Ext_gfm_auto_identifiers exts =
- filterPunct . spaceToDash . map toLower
- | otherwise = intercalate "-" . words . filterPunct . map toLower
- filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c)
+ filterPunct . spaceToDash . T.toLower
+ | otherwise = T.intercalate "-" . T.words . filterPunct . T.toLower
+ filterPunct = T.filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c)
isAllowedPunct c
| extensionEnabled Ext_gfm_auto_identifiers exts
= c == '-' || c == '_' ||
generalCategory c `elem` [NonSpacingMark, SpacingCombiningMark,
EnclosingMark, ConnectorPunctuation]
| otherwise = c == '_' || c == '-' || c == '.'
- spaceToDash = map (\c -> if isSpace c then '-' else c)
+ spaceToDash = T.map (\c -> if isSpace c then '-' else c)
-- | Put a list of Pandoc blocks into a hierarchical structure:
@@ -529,7 +578,7 @@ makeSections numbering mbBaseLevel bs =
-- don't touch number if already present
case lookup "number" kvs of
Nothing | numbering ->
- ("number", intercalate "." (map show newnum)) : kvs
+ ("number", T.intercalate "." (map tshow newnum)) : kvs
_ -> kvs)
return $
Div divattr (Header level' attr title' : sectionContents') : rest'
@@ -542,7 +591,7 @@ makeSections numbering mbBaseLevel bs =
let inner' =
case inner of
(Div (dident',dclasses',dkvs') zs@(Header{}:zs') : ws)
- | null dident ->
+ | T.null dident ->
Div (dident',dclasses' ++ dclasses,dkvs' ++ dkvs) zs : ws
| otherwise -> -- keep id on header so we don't lose anchor
Div (dident,dclasses ++ dclasses',dkvs ++ dkvs')
@@ -564,7 +613,7 @@ headerLtEq _ _ = False
-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
-uniqueIdent :: Extensions -> [Inline] -> Set.Set String -> String
+uniqueIdent :: Extensions -> [Inline] -> Set.Set T.Text -> T.Text
uniqueIdent exts title' usedIdents =
if baseIdent `Set.member` usedIdents
then case find (\x -> not $ numIdent x `Set.member` usedIdents)
@@ -577,7 +626,7 @@ uniqueIdent exts title' usedIdents =
baseIdent = case inlineListToIdentifier exts title' of
"" -> "section"
x -> x
- numIdent n = baseIdent ++ "-" ++ show n
+ numIdent n = baseIdent <> "-" <> tshow n
-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
@@ -664,7 +713,7 @@ handleTaskListItem handleInlines exts bls =
-- | Set a field of a 'Meta' object. If the field already has a value,
-- convert it into a list with the new value appended to the old value(s).
addMetaField :: ToMetaValue a
- => String
+ => T.Text
-> a
-> Meta
-> Meta
@@ -686,12 +735,16 @@ makeMeta title authors date =
-- | Remove soft breaks between East Asian characters.
eastAsianLineBreakFilter :: Pandoc -> Pandoc
eastAsianLineBreakFilter = bottomUp go
- where go (x:SoftBreak:y:zs) =
- case (stringify x, stringify y) of
- (xs@(_:_), c:_)
- | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
- _ -> x:SoftBreak:y:zs
- go xs = xs
+ where go (x:SoftBreak:y:zs)
+ | Just (_, b) <- T.unsnoc $ stringify x
+ , Just (c, _) <- T.uncons $ stringify y
+ , charWidth b == 2
+ , charWidth c == 2
+ = x:y:zs
+ | otherwise
+ = x:SoftBreak:y:zs
+ go xs
+ = xs
-- | Builder for underline.
-- This probably belongs in Builder.hs in pandoc-types.
@@ -702,27 +755,28 @@ underlineSpan = B.spanWith ("", ["underline"], [])
-- | Set of HTML elements that are represented as Span with a class equal as
-- the element tag itself.
htmlSpanLikeElements :: Set.Set T.Text
-htmlSpanLikeElements = Set.fromList [T.pack "kbd", T.pack "mark", T.pack "dfn"]
+htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"]
-- | Returns the first sentence in a list of inlines, and the rest.
breakSentence :: [Inline] -> ([Inline], [Inline])
breakSentence [] = ([],[])
breakSentence xs =
- let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
- isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
- isSentenceEndInline LineBreak = True
- isSentenceEndInline _ = False
+ let isSentenceEndInline (Str ys)
+ | Just (_, c) <- T.unsnoc ys = c == '.' || c == '?'
+ isSentenceEndInline LineBreak = True
+ isSentenceEndInline _ = False
(as, bs) = break isSentenceEndInline xs
in case bs of
- [] -> (as, [])
- [c] -> (as ++ [c], [])
- (c:Space:cs) -> (as ++ [c], cs)
- (c:SoftBreak:cs) -> (as ++ [c], cs)
- (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
- (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
- (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
- (c:cs) -> (as ++ [c] ++ ds, es)
- where (ds, es) = breakSentence cs
+ [] -> (as, [])
+ [c] -> (as ++ [c], [])
+ (c:Space:cs) -> (as ++ [c], cs)
+ (c:SoftBreak:cs) -> (as ++ [c], cs)
+ (Str ".":Str s@(T.uncons -> Just (')',_)):cs)
+ -> (as ++ [Str ".", Str s], cs)
+ (x@(Str (T.stripPrefix ".)" -> Just _)):cs) -> (as ++ [x], cs)
+ (LineBreak:x@(Str (T.uncons -> Just ('.',_))):cs) -> (as ++[LineBreak], x:cs)
+ (c:cs) -> (as ++ [c] ++ ds, es)
+ where (ds, es) = breakSentence cs
-- | Split a list of inlines into sentences.
splitSentences :: [Inline] -> [[Inline]]
@@ -763,10 +817,11 @@ filterIpynbOutput mode = walk go
removeANSI (CodeBlock attr code) =
CodeBlock attr (removeANSIEscapes code)
removeANSI x = x
- removeANSIEscapes [] = []
- removeANSIEscapes ('\x1b':'[':cs) =
- removeANSIEscapes (drop 1 $ dropWhile (/='m') cs)
- removeANSIEscapes (c:cs) = c : removeANSIEscapes cs
+ removeANSIEscapes t
+ | Just cs <- T.stripPrefix "\x1b[" t =
+ removeANSIEscapes $ T.drop 1 $ T.dropWhile (/='m') cs
+ | Just (c, cs) <- T.uncons t = T.cons c $ removeANSIEscapes cs
+ | otherwise = ""
go x = x
--
@@ -774,12 +829,12 @@ filterIpynbOutput mode = walk go
--
-- | Render HTML tags.
-renderTags' :: [Tag String] -> String
+renderTags' :: [Tag T.Text] -> T.Text
renderTags' = renderTagsOptions
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
"meta", "link"]
, optRawTag = matchTags ["script", "style"] }
- where matchTags tags = flip elem tags . map toLower
+ where matchTags tags = flip elem tags . T.toLower
--
-- File handling
@@ -826,8 +881,8 @@ collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
-- Convert the path part of a file: URI to a regular path.
-- On windows, @/c:/foo@ should be @c:/foo@.
-- On linux, @/foo@ should be @/foo@.
-uriPathToPath :: String -> FilePath
-uriPathToPath path =
+uriPathToPath :: T.Text -> FilePath
+uriPathToPath (T.unpack -> path) =
#ifdef _WINDOWS
case path of
'/':ps -> ps
@@ -853,7 +908,7 @@ filteredFilesFromArchive zf f =
-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
-- the unofficial schemes doi, javascript, isbn, pmid.
-schemes :: Set.Set String
+schemes :: Set.Set T.Text
schemes = Set.fromList
-- Official IANA schemes
[ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
@@ -905,11 +960,11 @@ schemes = Set.fromList
-- | Check if the string is a valid URL with a IANA or frequently used but
-- unofficial scheme (see @schemes@).
-isURI :: String -> Bool
-isURI = maybe False hasKnownScheme . parseURI
+isURI :: T.Text -> Bool
+isURI = maybe False hasKnownScheme . parseURI . T.unpack
where
- hasKnownScheme = (`Set.member` schemes) . map toLower .
- filter (/= ':') . uriScheme
+ hasKnownScheme = (`Set.member` schemes) . T.toLower .
+ T.filter (/= ':') . T.pack . uriScheme
---
--- Squash blocks into inlines
@@ -962,12 +1017,14 @@ defaultBlocksSeparator =
-- Safe read
--
-safeRead :: (MonadPlus m, Read a) => String -> m a
-safeRead s = case reads s of
+safeRead :: (MonadPlus m, Read a) => T.Text -> m a
+safeRead = safeStrRead . T.unpack
+
+safeStrRead :: (MonadPlus m, Read a) => String -> m a
+safeStrRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> mzero
-
--
-- User data directory
--