aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Shared.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r--src/Text/Pandoc/Shared.hs351
1 files changed, 254 insertions, 97 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5b0d9b6b4..a91ca9115 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,5 +1,5 @@
{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
- FlexibleContexts, ScopedTypeVariables #-}
+ FlexibleContexts, ScopedTypeVariables, PatternGuards #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -55,7 +55,11 @@ module Text.Pandoc.Shared (
normalizeSpaces,
extractSpaces,
normalize,
+ normalizeInlines,
+ normalizeBlocks,
+ removeFormatting,
stringify,
+ capitalize,
compactify,
compactify',
compactify'DL,
@@ -74,17 +78,21 @@ module Text.Pandoc.Shared (
readDataFile,
readDataFileUTF8,
fetchItem,
+ fetchItem',
openURL,
+ collapseFilePath,
-- * Error handling
err,
warn,
-- * Safe read
- safeRead
+ safeRead,
+ -- * Temp directory
+ withTempDir
) where
import Text.Pandoc.Definition
import Text.Pandoc.Walk
-import Text.Pandoc.Generic
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
@@ -92,14 +100,15 @@ import System.Environment (getProgName)
import System.Exit (exitWith, ExitCode(..))
import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
-import Data.List ( find, isPrefixOf, intercalate )
+import Data.List ( find, stripPrefix, intercalate )
import qualified Data.Map as M
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
- unEscapeString, parseURIReference )
+ unEscapeString, parseURIReference, isAllowedInURI )
import qualified Data.Set as Set
import System.Directory
+import System.FilePath (joinPath, splitDirectories)
import Text.Pandoc.MIME (getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension )
+import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
@@ -108,6 +117,7 @@ import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
+import System.IO.Temp
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as BS
@@ -115,6 +125,7 @@ import qualified Data.ByteString.Char8 as B8
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
+import qualified Data.Text as T (toUpper, pack, unpack)
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
@@ -174,9 +185,9 @@ substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
- if target `isPrefixOf` lst
- then replacement ++ substitute target replacement (drop (length target) lst)
- else x : substitute target replacement xs
+ case stripPrefix target lst of
+ Just lst' -> replacement ++ substitute target replacement lst'
+ Nothing -> x : substitute target replacement xs
ordNub :: (Ord a) => [a] -> [a]
ordNub l = go Set.empty l
@@ -334,10 +345,10 @@ isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False
-- | Extract the leading and trailing spaces from inside an inline element
--- and place them outside the element.
+-- and place them outside the element.
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
-extractSpaces f is =
+extractSpaces f is =
let contents = B.unMany is
left = case viewl contents of
(Space :< _) -> B.space
@@ -350,72 +361,160 @@ extractSpaces f is =
-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
-- empty elements, etc.
-normalize :: (Eq a, Data a) => a -> a
-normalize = topDown removeEmptyBlocks .
- topDown consolidateInlines .
- bottomUp (removeEmptyInlines . removeTrailingInlineSpaces)
-
-removeEmptyBlocks :: [Block] -> [Block]
-removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs
-removeEmptyBlocks [] = []
-
-removeEmptyInlines :: [Inline] -> [Inline]
-removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs
-removeEmptyInlines (x : xs) = x : removeEmptyInlines xs
-removeEmptyInlines [] = []
-
-removeTrailingInlineSpaces :: [Inline] -> [Inline]
-removeTrailingInlineSpaces = reverse . removeLeadingInlineSpaces . reverse
-
-removeLeadingInlineSpaces :: [Inline] -> [Inline]
-removeLeadingInlineSpaces = dropWhile isSpaceOrEmpty
-
-consolidateInlines :: [Inline] -> [Inline]
-consolidateInlines (Str x : ys) =
+normalize :: Pandoc -> Pandoc
+normalize (Pandoc (Meta meta) blocks) =
+ Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
+ where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
+ go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs
+ go (MetaList ms) = MetaList $ map go ms
+ go (MetaMap m) = MetaMap $ M.map go m
+ go x = x
+
+normalizeBlocks :: [Block] -> [Block]
+normalizeBlocks (Null : xs) = normalizeBlocks xs
+normalizeBlocks (Div attr bs : xs) =
+ Div attr (normalizeBlocks bs) : normalizeBlocks xs
+normalizeBlocks (BlockQuote bs : xs) =
+ case normalizeBlocks bs of
+ [] -> normalizeBlocks xs
+ bs' -> BlockQuote bs' : normalizeBlocks xs
+normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
+normalizeBlocks (BulletList items : xs) =
+ BulletList (map normalizeBlocks items) : normalizeBlocks xs
+normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
+normalizeBlocks (OrderedList attr items : xs) =
+ OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
+normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
+normalizeBlocks (DefinitionList items : xs) =
+ DefinitionList (map go items) : normalizeBlocks xs
+ where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
+normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
+normalizeBlocks (RawBlock f x : xs) =
+ case normalizeBlocks xs of
+ (RawBlock f' x' : rest) | f' == f ->
+ RawBlock f (x ++ ('\n':x')) : rest
+ rest -> RawBlock f x : rest
+normalizeBlocks (Para ils : xs) =
+ case normalizeInlines ils of
+ [] -> normalizeBlocks xs
+ ils' -> Para ils' : normalizeBlocks xs
+normalizeBlocks (Plain ils : xs) =
+ case normalizeInlines ils of
+ [] -> normalizeBlocks xs
+ ils' -> Plain ils' : normalizeBlocks xs
+normalizeBlocks (Header lev attr ils : xs) =
+ Header lev attr (normalizeInlines ils) : normalizeBlocks xs
+normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
+ Table (normalizeInlines capt) aligns widths
+ (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
+ : normalizeBlocks xs
+normalizeBlocks (x:xs) = x : normalizeBlocks xs
+normalizeBlocks [] = []
+
+normalizeInlines :: [Inline] -> [Inline]
+normalizeInlines (Str x : ys) =
case concat (x : map fromStr strs) of
- "" -> consolidateInlines rest
- n -> Str n : consolidateInlines rest
+ "" -> rest
+ n -> Str n : rest
where
- (strs, rest) = span isStr ys
+ (strs, rest) = span isStr $ normalizeInlines ys
isStr (Str _) = True
isStr _ = False
fromStr (Str z) = z
- fromStr _ = error "consolidateInlines - fromStr - not a Str"
-consolidateInlines (Space : ys) = Space : rest
+ fromStr _ = error "normalizeInlines - fromStr - not a Str"
+normalizeInlines (Space : ys) =
+ if null rest
+ then []
+ else Space : rest
where isSp Space = True
isSp _ = False
- rest = consolidateInlines $ dropWhile isSp ys
-consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
- Emph (xs ++ ys) : zs
-consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
- Strong (xs ++ ys) : zs
-consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $
- Subscript (xs ++ ys) : zs
-consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $
- Superscript (xs ++ ys) : zs
-consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $
- SmallCaps (xs ++ ys) : zs
-consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $
- Strikeout (xs ++ ys) : zs
-consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' =
- consolidateInlines $ RawInline f (x ++ y) : zs
-consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
- consolidateInlines $ Code a1 (x ++ y) : zs
-consolidateInlines (x : xs) = x : consolidateInlines xs
-consolidateInlines [] = []
+ rest = dropWhile isSp $ normalizeInlines ys
+normalizeInlines (Emph xs : zs) =
+ case normalizeInlines zs of
+ (Emph ys : rest) -> normalizeInlines $
+ Emph (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Emph xs' : rest
+normalizeInlines (Strong xs : zs) =
+ case normalizeInlines zs of
+ (Strong ys : rest) -> normalizeInlines $
+ Strong (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Strong xs' : rest
+normalizeInlines (Subscript xs : zs) =
+ case normalizeInlines zs of
+ (Subscript ys : rest) -> normalizeInlines $
+ Subscript (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Subscript xs' : rest
+normalizeInlines (Superscript xs : zs) =
+ case normalizeInlines zs of
+ (Superscript ys : rest) -> normalizeInlines $
+ Superscript (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Superscript xs' : rest
+normalizeInlines (SmallCaps xs : zs) =
+ case normalizeInlines zs of
+ (SmallCaps ys : rest) -> normalizeInlines $
+ SmallCaps (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> SmallCaps xs' : rest
+normalizeInlines (Strikeout xs : zs) =
+ case normalizeInlines zs of
+ (Strikeout ys : rest) -> normalizeInlines $
+ Strikeout (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Strikeout xs' : rest
+normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
+normalizeInlines (RawInline f xs : zs) =
+ case normalizeInlines zs of
+ (RawInline f' ys : rest) | f == f' -> normalizeInlines $
+ RawInline f (xs ++ ys) : rest
+ rest -> RawInline f xs : rest
+normalizeInlines (Code _ "" : ys) = normalizeInlines ys
+normalizeInlines (Code attr xs : zs) =
+ case normalizeInlines zs of
+ (Code attr' ys : rest) | attr == attr' -> normalizeInlines $
+ Code attr (xs ++ ys) : rest
+ rest -> Code attr xs : rest
+-- allow empty spans, they may carry identifiers etc.
+-- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
+normalizeInlines (Span attr xs : zs) =
+ case normalizeInlines zs of
+ (Span attr' ys : rest) | attr == attr' -> normalizeInlines $
+ Span attr (normalizeInlines $ xs ++ ys) : rest
+ rest -> Span attr (normalizeInlines xs) : rest
+normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
+ normalizeInlines ys
+normalizeInlines (Quoted qt ils : ys) =
+ Quoted qt (normalizeInlines ils) : normalizeInlines ys
+normalizeInlines (Link ils t : ys) =
+ Link (normalizeInlines ils) t : normalizeInlines ys
+normalizeInlines (Image ils t : ys) =
+ Image (normalizeInlines ils) t : normalizeInlines ys
+normalizeInlines (Cite cs ils : ys) =
+ Cite cs (normalizeInlines ils) : normalizeInlines ys
+normalizeInlines (x : xs) = x : normalizeInlines xs
+normalizeInlines [] = []
+
+-- | Extract inlines, removing formatting.
+removeFormatting :: Walkable Inline a => a -> [Inline]
+removeFormatting = query go . walk deNote
+ where go :: Inline -> [Inline]
+ go (Str xs) = [Str xs]
+ go Space = [Space]
+ go (Code _ x) = [Str x]
+ go (Math _ x) = [Str x]
+ go LineBreak = [Space]
+ go _ = []
+ deNote (Note _) = Str ""
+ deNote x = x
-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
@@ -432,6 +531,17 @@ stringify = query go . walk deNote
deNote (Note _) = Str ""
deNote x = x
+-- | Bring all regular text in a pandoc structure to uppercase.
+--
+-- This function correctly handles cases where a lowercase character doesn't
+-- match to a single uppercase character – e.g. “Straße” would be converted
+-- to “STRASSE”, not “STRAßE”.
+capitalize :: Walkable Inline a => a -> a
+capitalize = walk go
+ where go :: Inline -> Inline
+ go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
+ go x = x
+
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
@@ -463,20 +573,22 @@ compactify' items =
_ -> items
_ -> items
--- | Like @compactify'@, but akts on items of definition lists.
+-- | Like @compactify'@, but acts on items of definition lists.
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
compactify'DL items =
let defs = concatMap snd items
- defBlocks = reverse $ concatMap B.toList defs
- in case defBlocks of
- (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
- then let (t,ds) = last items
- lastDef = B.toList $ last ds
- ds' = init ds ++
- [B.fromList $ init lastDef ++ [Plain x]]
- in init items ++ [(t, ds')]
- else items
- _ -> items
+ in case reverse (concatMap B.toList defs) of
+ (Para x:xs)
+ | not (any isPara xs) ->
+ let (t,ds) = last items
+ lastDef = B.toList $ last ds
+ ds' = init ds ++
+ if null lastDef
+ then [B.fromList lastDef]
+ else [B.fromList $ init lastDef ++ [Plain x]]
+ in init items ++ [(t, ds')]
+ | otherwise -> items
+ _ -> items
isPara :: Block -> Bool
isPara (Para _) = True
@@ -669,28 +781,38 @@ readDataFileUTF8 userDir fname =
-- Returns raw content and maybe mime type.
fetchItem :: Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
-fetchItem sourceURL s
- | isURI s = openURL s
- | otherwise =
- case sourceURL >>= parseURIReference of
- Just u -> case parseURIReference s of
- Just s' -> openURL $ show $
- s' `nonStrictRelativeTo` u
- Nothing -> openURL $ show u ++ "/" ++ s
- Nothing -> E.try readLocalFile
+fetchItem sourceURL s =
+ case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
+ (_, s') | isURI s' -> openURL s'
+ (Just u, s') -> -- try fetching from relative path at source
+ case parseURIReference s' of
+ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
+ Nothing -> openURL s' -- will throw error
+ (Nothing, _) -> E.try readLocalFile -- get from local file system
where readLocalFile = do
- let mime = case takeExtension s of
- ".gz" -> getMimeType $ dropExtension s
- x -> getMimeType x
- cont <- BS.readFile s
+ cont <- BS.readFile fp
return (cont, mime)
+ dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
+ fp = unEscapeString $ dropFragmentAndQuery s
+ mime = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ x -> getMimeType x
+ ensureEscaped = escapeURIString isAllowedInURI
+
+-- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
+fetchItem' :: MediaBag -> Maybe String -> String
+ -> IO (Either E.SomeException (BS.ByteString, Maybe String))
+fetchItem' media sourceURL s = do
+ case lookupMedia s media of
+ Nothing -> fetchItem sourceURL s
+ Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
openURL u
- | "data:" `isPrefixOf` u =
- let mime = takeWhile (/=',') $ drop 5 u
- contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
+ | Just u' <- stripPrefix "data:" u =
+ let mime = takeWhile (/=',') u'
+ contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'
in return $ Right (decodeLenient contents, Just mime)
#ifdef HTTP_CLIENT
| otherwise = withSocketsDo $ E.try $ do
@@ -734,6 +856,29 @@ warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+-- | Remove intermediate "." and ".." directories from a path.
+--
+-- @
+-- collapseFilePath "./foo" == "foo"
+-- collapseFilePath "/bar/../baz" == "/baz"
+-- collapseFilePath "/../baz" == "/../baz"
+-- collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar"
+-- collapseFilePath "parent/foo/baz/../../bar" == "parent/bar"
+-- collapseFilePath "parent/foo/.." == "parent"
+-- collapseFilePath "/parent/foo/../../bar" == "/bar"
+-- @
+collapseFilePath :: FilePath -> FilePath
+collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories
+ where
+ go rs "." = rs
+ go r@(p:rs) ".." = case p of
+ ".." -> ("..":r)
+ "/" -> ("..":r)
+ _ -> rs
+ go _ "/" = ["/"]
+ go rs x = x:rs
+
+
--
-- Safe read
--
@@ -743,3 +888,15 @@ safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> fail $ "Could not read `" ++ s ++ "'"
+
+--
+-- Temp directory
+--
+
+withTempDir :: String -> (FilePath -> IO a) -> IO a
+withTempDir =
+#ifdef _WINDOWS
+ withTempDirectory "."
+#else
+ withSystemTempDirectory
+#endif