aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Shared.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r--src/Text/Pandoc/Shared.hs184
1 files changed, 137 insertions, 47 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 09086da1f..5b0d9b6b4 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE DeriveDataTypeable, CPP #-}
+{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
+ FlexibleContexts, ScopedTypeVariables #-}
{-
-Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Shared
- Copyright : Copyright (C) 2006-2013 John MacFarlane
+ Copyright : Copyright (C) 2006-2014 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -34,6 +35,7 @@ module Text.Pandoc.Shared (
splitByIndices,
splitStringByIndices,
substitute,
+ ordNub,
-- * Text processing
backslashEscapes,
escapeStringUsing,
@@ -51,10 +53,12 @@ module Text.Pandoc.Shared (
-- * Pandoc block and inline list processing
orderedListMarkers,
normalizeSpaces,
+ extractSpaces,
normalize,
stringify,
compactify,
compactify',
+ compactify'DL,
Element (..),
hierarchicalize,
uniqueIdent,
@@ -79,8 +83,9 @@ module Text.Pandoc.Shared (
) where
import Text.Pandoc.Definition
+import Text.Pandoc.Walk
import Text.Pandoc.Generic
-import Text.Pandoc.Builder (Blocks, ToMetaValue(..))
+import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
import System.Environment (getProgName)
@@ -89,12 +94,15 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate )
import qualified Data.Map as M
-import Network.URI ( escapeURIString, isAbsoluteURI, unEscapeString )
+import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
+ unEscapeString, parseURIReference )
+import qualified Data.Set as Set
import System.Directory
import Text.Pandoc.MIME (getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension )
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
+import qualified Control.Exception as E
import Control.Monad (msum, unless)
import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
@@ -103,8 +111,10 @@ import System.IO (stderr)
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as BS
-import Data.ByteString.Lazy (toChunks)
import qualified Data.ByteString.Char8 as B8
+import Text.Pandoc.Compat.Monoid
+import Data.ByteString.Base64 (decodeLenient)
+import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
@@ -112,10 +122,16 @@ import System.FilePath ( joinPath, splitDirectories )
#else
import Paths_pandoc (getDataFileName)
#endif
-#ifdef HTTP_CONDUIT
-import Network.HTTP.Conduit (httpLbs, parseUrl, withManager,
- responseBody, responseHeaders)
+#ifdef HTTP_CLIENT
+import Data.ByteString.Lazy (toChunks)
+import Network.HTTP.Client (httpLbs, parseUrl, withManager,
+ responseBody, responseHeaders,
+ Request(port,host))
+import Network.HTTP.Client.Internal (addProxy)
+import Network.HTTP.Client.TLS (tlsManagerSettings)
+import System.Environment (getEnv)
import Network.HTTP.Types.Header ( hContentType)
+import Network (withSocketsDo)
#else
import Network.URI (parseURI)
import Network.HTTP (findHeader, rspBody,
@@ -162,6 +178,13 @@ substitute target replacement lst@(x:xs) =
then replacement ++ substitute target replacement (drop (length target) lst)
else x : substitute target replacement xs
+ordNub :: (Ord a) => [a] -> [a]
+ordNub l = go Set.empty l
+ where
+ go _ [] = []
+ go s (x:xs) = if x `Set.member` s then go s xs
+ else x : go (Set.insert x s) xs
+
--
-- Text processing
--
@@ -225,9 +248,9 @@ toRomanNumeral x =
_ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
_ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
_ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
- _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
+ _ | x == 9 -> "IX"
_ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
- _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
+ _ | x == 4 -> "IV"
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
_ -> ""
@@ -265,7 +288,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
where parsetimeWith = parseTime defaultTimeLocale
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
- "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"]
+ "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"]
--
-- Pandoc block and inline list processing
@@ -310,6 +333,20 @@ isSpaceOrEmpty Space = True
isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False
+-- | Extract the leading and trailing spaces from inside an inline element
+-- and place them outside the element.
+
+extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
+extractSpaces f is =
+ let contents = B.unMany is
+ left = case viewl contents of
+ (Space :< _) -> B.space
+ _ -> mempty
+ right = case viewr contents of
+ (_ :> Space) -> B.space
+ _ -> mempty in
+ (left <> f (B.trimInlines . B.Many $ contents) <> right)
+
-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
-- empty elements, etc.
@@ -380,9 +417,11 @@ consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
consolidateInlines (x : xs) = x : consolidateInlines xs
consolidateInlines [] = []
--- | Convert list of inlines to a string with formatting removed.
-stringify :: [Inline] -> String
-stringify = queryWith go
+-- | Convert pandoc structure to a string with formatting removed.
+-- Footnotes are skipped (since we don't want their contents in link
+-- labels).
+stringify :: Walkable Inline a => a -> String
+stringify = query go . walk deNote
where go :: Inline -> [Char]
go Space = " "
go (Str x) = x
@@ -390,6 +429,8 @@ stringify = queryWith go
go (Math _ x) = x
go LineBreak = " "
go _ = ""
+ deNote (Note _) = Str ""
+ deNote x = x
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
@@ -422,6 +463,21 @@ compactify' items =
_ -> items
_ -> items
+-- | Like @compactify'@, but akts on items of definition lists.
+compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
+compactify'DL items =
+ let defs = concatMap snd items
+ defBlocks = reverse $ concatMap B.toList defs
+ in case defBlocks of
+ (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
+ then let (t,ds) = last items
+ lastDef = B.toList $ last ds
+ ds' = init ds ++
+ [B.fromList $ init lastDef ++ [Plain x]]
+ in init items ++ [(t, ds')]
+ else items
+ _ -> items
+
isPara :: Block -> Bool
isPara (Para _) = True
isPara _ = False
@@ -432,6 +488,29 @@ data Element = Blk Block
-- lvl num attributes label contents
deriving (Eq, Read, Show, Typeable, Data)
+instance Walkable Inline Element where
+ walk f (Blk x) = Blk (walk f x)
+ walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
+ walkM f (Blk x) = Blk `fmap` walkM f x
+ walkM f (Sec lev nums attr ils elts) = do
+ ils' <- walkM f ils
+ elts' <- walkM f elts
+ return $ Sec lev nums attr ils' elts'
+ query f (Blk x) = query f x
+ query f (Sec _ _ _ ils elts) = query f ils <> query f elts
+
+instance Walkable Block Element where
+ walk f (Blk x) = Blk (walk f x)
+ walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
+ walkM f (Blk x) = Blk `fmap` walkM f x
+ walkM f (Sec lev nums attr ils elts) = do
+ ils' <- walkM f ils
+ elts' <- walkM f elts
+ return $ Sec lev nums attr ils' elts'
+ query f (Blk x) = query f x
+ query f (Sec _ _ _ ils elts) = query f ils <> query f elts
+
+
-- | Convert Pandoc inline list to plain text identifier. HTML
-- identifiers must start with a letter, and may contain only
-- letters, digits, and the characters _-.
@@ -492,14 +571,14 @@ isHeaderBlock _ = False
-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
-headerShift n = bottomUp shift
+headerShift n = walk shift
where shift :: Block -> Block
shift (Header level attr inner) = Header (level + n) attr inner
shift x = x
-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
-isTightList = and . map firstIsPlain
+isTightList = all firstIsPlain
where firstIsPlain (Plain _ : _) = True
firstIsPlain _ = False
@@ -512,8 +591,10 @@ addMetaField :: ToMetaValue a
-> Meta
addMetaField key val (Meta meta) =
Meta $ M.insertWith combine key (toMetaValue val) meta
- where combine newval (MetaList xs) = MetaList (xs ++ [newval])
+ where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
combine newval x = MetaList [x, newval]
+ tolist (MetaList ys) = ys
+ tolist y = [y]
-- | Create 'Meta' from old-style title, authors, date. This is
-- provided to ease the transition from the old API.
@@ -531,14 +612,10 @@ makeMeta title authors date =
-- | Render HTML tags.
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
- renderOptions{ optMinimize = \x ->
- let y = map toLower x
- in y == "hr" || y == "br" ||
- y == "img" || y == "meta" ||
- y == "link"
- , optRawTag = \x ->
- let y = map toLower x
- in y == "script" || y == "style" }
+ renderOptions{ optMinimize = matchTags ["hr", "br", "img",
+ "meta", "link"]
+ , optRawTag = matchTags ["script", "style"] }
+ where matchTags = \tags -> flip elem tags . map toLower
--
-- File handling
@@ -557,8 +634,7 @@ readDefaultDataFile :: FilePath -> IO BS.ByteString
readDefaultDataFile fname =
#ifdef EMBED_DATA_FILES
case lookup (makeCanonical fname) dataFiles of
- Nothing -> ioError $ userError
- $ "Data file `" ++ fname ++ "' does not exist"
+ Nothing -> err 97 $ "Could not find data file " ++ fname
Just contents -> return contents
where makeCanonical = joinPath . transformPathParts . splitDirectories
transformPathParts = reverse . foldl go []
@@ -566,7 +642,12 @@ readDefaultDataFile fname =
go (_:as) ".." = as
go as x = x : as
#else
- getDataFileName ("data" </> fname) >>= BS.readFile
+ getDataFileName ("data" </> fname) >>= checkExistence >>= BS.readFile
+ where checkExistence fn = do
+ exists <- doesFileExist fn
+ if exists
+ then return fn
+ else err 97 ("Could not find data file " ++ fname)
#endif
-- | Read file from specified user data directory or, if not found there, from
@@ -586,34 +667,45 @@ readDataFileUTF8 userDir fname =
-- | Fetch an image or other item from the local filesystem or the net.
-- Returns raw content and maybe mime type.
-fetchItem :: String -> String -> IO (BS.ByteString, Maybe String)
-fetchItem sourceDir s =
- case s of
- _ | isAbsoluteURI s -> openURL s
- | isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s
- | otherwise -> do
+fetchItem :: Maybe String -> String
+ -> IO (Either E.SomeException (BS.ByteString, Maybe String))
+fetchItem sourceURL s
+ | isURI s = openURL s
+ | otherwise =
+ case sourceURL >>= parseURIReference of
+ Just u -> case parseURIReference s of
+ Just s' -> openURL $ show $
+ s' `nonStrictRelativeTo` u
+ Nothing -> openURL $ show u ++ "/" ++ s
+ Nothing -> E.try readLocalFile
+ where readLocalFile = do
let mime = case takeExtension s of
- ".gz" -> getMimeType $ dropExtension s
- x -> getMimeType x
- let f = sourceDir </> s
- cont <- BS.readFile f
+ ".gz" -> getMimeType $ dropExtension s
+ x -> getMimeType x
+ cont <- BS.readFile s
return (cont, mime)
-- | Read from a URL and return raw data and maybe mime type.
-openURL :: String -> IO (BS.ByteString, Maybe String)
+openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
openURL u
| "data:" `isPrefixOf` u =
let mime = takeWhile (/=',') $ drop 5 u
contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
- in return (contents, Just mime)
-#ifdef HTTP_CONDUIT
- | otherwise = do
+ in return $ Right (decodeLenient contents, Just mime)
+#ifdef HTTP_CLIENT
+ | otherwise = withSocketsDo $ E.try $ do
req <- parseUrl u
- resp <- withManager $ httpLbs req
+ (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
+ let req' = case proxy of
+ Left _ -> req
+ Right pr -> case parseUrl pr of
+ Just r -> addProxy (host r) (port r) req
+ Nothing -> req
+ resp <- withManager tlsManagerSettings $ httpLbs req'
return (BS.concat $ toChunks $ responseBody resp,
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
#else
- | otherwise = getBodyAndMimeType `fmap` browse
+ | otherwise = E.try $ getBodyAndMimeType `fmap` browse
(do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
setOutHandler $ const (return ())
setAllowRedirects True
@@ -651,5 +743,3 @@ safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> fail $ "Could not read `" ++ s ++ "'"
-
-