diff options
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 883 |
1 files changed, 0 insertions, 883 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs deleted file mode 100644 index 268a5052e..000000000 --- a/src/Text/Pandoc/Shared.hs +++ /dev/null @@ -1,883 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, - FlexibleContexts, ScopedTypeVariables, PatternGuards, - ViewPatterns #-} -{- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Utility functions and definitions used by the various Pandoc modules. --} -module Text.Pandoc.Shared ( - -- * List processing - splitBy, - splitByIndices, - splitStringByIndices, - substitute, - ordNub, - -- * Text processing - backslashEscapes, - escapeStringUsing, - stripTrailingNewlines, - trim, - triml, - trimr, - stripFirstAndLast, - camelCaseToHyphenated, - toRomanNumeral, - escapeURI, - tabFilter, - -- * Date/time - normalizeDate, - -- * Pandoc block and inline list processing - orderedListMarkers, - normalizeSpaces, - extractSpaces, - removeFormatting, - deNote, - stringify, - capitalize, - compactify, - compactifyDL, - linesToPara, - Element (..), - hierarchicalize, - uniqueIdent, - inlineListToIdentifier, - isHeaderBlock, - headerShift, - isTightList, - addMetaField, - makeMeta, - -- * TagSoup HTML handling - renderTags', - -- * File handling - inDirectory, - getDefaultReferenceDocx, - getDefaultReferenceODT, - readDataFile, - readDataFileUTF8, - openURL, - collapseFilePath, - filteredFilesFromArchive, - -- * Error handling - err, - warn, - mapLeft, - -- * for squashing blocks - blocksToInlines, - -- * Safe read - safeRead, - -- * Temp directory - withTempDir, - -- * Version - pandocVersion - ) where - -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) -import qualified Text.Pandoc.Builder as B -import qualified Text.Pandoc.UTF8 as UTF8 -import System.Exit (exitWith, ExitCode(..)) -import Data.Char ( toLower, isLower, isUpper, isAlpha, - isLetter, isDigit, isSpace ) -import Data.List ( find, stripPrefix, intercalate ) -import Data.Maybe (mapMaybe) -import Data.Version ( showVersion ) -import qualified Data.Map as M -import Network.URI ( escapeURIString, unEscapeString ) -import qualified Data.Set as Set -import System.Directory -import System.FilePath (splitDirectories, isPathSeparator) -import qualified System.FilePath.Posix as Posix -import Text.Pandoc.MIME (MimeType) -import System.FilePath ( (</>) ) -import Data.Generics (Typeable, Data) -import qualified Control.Monad.State as S -import Control.Monad.Trans (MonadIO (..)) -import qualified Control.Exception as E -import Control.Monad (msum, unless, MonadPlus(..)) -import Text.Pandoc.Pretty (charWidth) -import Text.Pandoc.Compat.Time -import Data.Time.Clock.POSIX -import System.IO (stderr) -import System.IO.Temp -import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), - renderOptions) -import Data.Monoid ((<>)) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as B8 -import Data.ByteString.Base64 (decodeLenient) -import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) -import qualified Data.Text as T (toUpper, pack, unpack) -import Data.ByteString.Lazy (toChunks, fromChunks) -import qualified Data.ByteString.Lazy as BL -import Paths_pandoc (version) - -import Codec.Archive.Zip - -#ifdef EMBED_DATA_FILES -import Text.Pandoc.Data (dataFiles) -#else -import Paths_pandoc (getDataFileName) -#endif -#ifdef HTTP_CLIENT -import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, - Request(port,host,requestHeaders)) -import Network.HTTP.Client (parseRequest) -import Network.HTTP.Client (newManager) -import Network.HTTP.Client.Internal (addProxy) -import Network.HTTP.Client.TLS (tlsManagerSettings) -import System.Environment (getEnv) -import Network.HTTP.Types.Header ( hContentType, hUserAgent) -import Network (withSocketsDo) -#else -import Network.URI (parseURI) -import Network.HTTP (findHeader, rspBody, - RequestMethod(..), HeaderName(..), mkRequest) -import Network.Browser (browse, setAllowRedirects, setOutHandler, request) -#endif - --- | Version number of pandoc library. -pandocVersion :: String -pandocVersion = showVersion version - --- --- List processing --- - --- | Split list by groups of one or more sep. -splitBy :: (a -> Bool) -> [a] -> [[a]] -splitBy _ [] = [] -splitBy isSep lst = - let (first, rest) = break isSep lst - rest' = dropWhile isSep rest - in first:(splitBy isSep rest') - -splitByIndices :: [Int] -> [a] -> [[a]] -splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest) - where (first, rest) = splitAt x lst - --- | Split string into chunks divided at specified indices. -splitStringByIndices :: [Int] -> [Char] -> [[Char]] -splitStringByIndices [] lst = [lst] -splitStringByIndices (x:xs) lst = - let (first, rest) = splitAt' x lst in - first : (splitStringByIndices (map (\y -> y - x) xs) rest) - -splitAt' :: Int -> [Char] -> ([Char],[Char]) -splitAt' _ [] = ([],[]) -splitAt' n xs | n <= 0 = ([],xs) -splitAt' n (x:xs) = (x:ys,zs) - where (ys,zs) = splitAt' (n - charWidth x) xs - --- | Replace each occurrence of one sublist in a list with another. -substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] -substitute _ _ [] = [] -substitute [] _ xs = xs -substitute target replacement lst@(x:xs) = - case stripPrefix target lst of - Just lst' -> replacement ++ substitute target replacement lst' - Nothing -> x : substitute target replacement xs - -ordNub :: (Ord a) => [a] -> [a] -ordNub l = go Set.empty l - where - go _ [] = [] - go s (x:xs) = if x `Set.member` s then go s xs - else x : go (Set.insert x s) xs - --- --- Text processing --- - --- | Returns an association list of backslash escapes for the --- designated characters. -backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, String)] -backslashEscapes = map (\ch -> (ch, ['\\',ch])) - --- | Escape a string of characters, using an association list of --- characters and strings. -escapeStringUsing :: [(Char, String)] -> String -> String -escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = - case (lookup x escapeTable) of - Just str -> str ++ rest - Nothing -> x:rest - where rest = escapeStringUsing escapeTable xs - --- | Strip trailing newlines from string. -stripTrailingNewlines :: String -> String -stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse - --- | Remove leading and trailing space (including newlines) from string. -trim :: String -> String -trim = triml . trimr - --- | Remove leading space (including newlines) from string. -triml :: String -> String -triml = dropWhile (`elem` " \r\n\t") - --- | Remove trailing space (including newlines) from string. -trimr :: String -> String -trimr = reverse . triml . reverse - --- | Strip leading and trailing characters from string -stripFirstAndLast :: String -> String -stripFirstAndLast str = - drop 1 $ take ((length str) - 1) str - --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). -camelCaseToHyphenated :: String -> String -camelCaseToHyphenated [] = "" -camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = - a:'-':(toLower b):(camelCaseToHyphenated rest) -camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) - --- | Convert number < 4000 to uppercase roman numeral. -toRomanNumeral :: Int -> String -toRomanNumeral x - | x >= 4000 || x < 0 = "?" - | x >= 1000 = "M" ++ toRomanNumeral (x - 1000) - | x >= 900 = "CM" ++ toRomanNumeral (x - 900) - | x >= 500 = "D" ++ toRomanNumeral (x - 500) - | x >= 400 = "CD" ++ toRomanNumeral (x - 400) - | x >= 100 = "C" ++ toRomanNumeral (x - 100) - | x >= 90 = "XC" ++ toRomanNumeral (x - 90) - | x >= 50 = "L" ++ toRomanNumeral (x - 50) - | x >= 40 = "XL" ++ toRomanNumeral (x - 40) - | x >= 10 = "X" ++ toRomanNumeral (x - 10) - | x == 9 = "IX" - | x >= 5 = "V" ++ toRomanNumeral (x - 5) - | x == 4 = "IV" - | x >= 1 = "I" ++ toRomanNumeral (x - 1) - | otherwise = "" - --- | Escape whitespace and some punctuation characters in URI. -escapeURI :: String -> String -escapeURI = escapeURIString (not . needsEscaping) - where needsEscaping c = isSpace c || c `elem` - ['<','>','|','"','{','}','[',']','^', '`'] - - --- | Convert tabs to spaces and filter out DOS line endings. --- Tabs will be preserved if tab stop is set to 0. -tabFilter :: Int -- ^ Tab stop - -> String -- ^ Input - -> String -tabFilter tabStop = - let go _ [] = "" - go _ ('\n':xs) = '\n' : go tabStop xs - go _ ('\r':'\n':xs) = '\n' : go tabStop xs - go _ ('\r':xs) = '\n' : go tabStop xs - go spsToNextStop ('\t':xs) = - if tabStop == 0 - then '\t' : go tabStop xs - else replicate spsToNextStop ' ' ++ go tabStop xs - go 1 (x:xs) = - x : go tabStop xs - go spsToNextStop (x:xs) = - x : go (spsToNextStop - 1) xs - in go tabStop - --- --- Date/time --- - --- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We --- limit years to the range 1601-9999 (ISO 8601 accepts greater than --- or equal to 1583, but MS Word only accepts dates starting 1601). -normalizeDate :: String -> Maybe String -normalizeDate s = fmap (formatTime defaultTimeLocale "%F") - (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day) - where rejectBadYear day = case toGregorian day of - (y, _, _) | y >= 1601 && y <= 9999 -> Just day - _ -> Nothing - parsetimeWith = -#if MIN_VERSION_time(1,5,0) - parseTimeM True defaultTimeLocale -#else - parseTime defaultTimeLocale -#endif - formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", - "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", - "%Y%m%d", "%Y%m", "%Y"] - --- --- Pandoc block and inline list processing --- - --- | Generate infinite lazy list of markers for an ordered list, --- depending on list attributes. -orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] -orderedListMarkers (start, numstyle, numdelim) = - let singleton c = [c] - nums = case numstyle of - DefaultStyle -> map show [start..] - Example -> map show [start..] - Decimal -> map show [start..] - UpperAlpha -> drop (start - 1) $ cycle $ - map singleton ['A'..'Z'] - LowerAlpha -> drop (start - 1) $ cycle $ - map singleton ['a'..'z'] - UpperRoman -> map toRomanNumeral [start..] - LowerRoman -> map (map toLower . toRomanNumeral) [start..] - inDelim str = case numdelim of - DefaultDelim -> str ++ "." - Period -> str ++ "." - OneParen -> str ++ ")" - TwoParens -> "(" ++ str ++ ")" - in map inDelim nums - --- | Normalize a list of inline elements: remove leading and trailing --- @Space@ elements, collapse double @Space@s into singles, and --- remove empty Str elements. -normalizeSpaces :: [Inline] -> [Inline] -normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty - where cleanup [] = [] - cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of - [] -> [] - (x:xs) -> Space : x : cleanup xs - cleanup ((Str ""):rest) = cleanup rest - cleanup (x:rest) = x : cleanup rest - -isSpaceOrEmpty :: Inline -> Bool -isSpaceOrEmpty Space = True -isSpaceOrEmpty (Str "") = True -isSpaceOrEmpty _ = False - --- | Extract the leading and trailing spaces from inside an inline element --- and place them outside the element. SoftBreaks count as Spaces for --- these purposes. -extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines -extractSpaces f is = - let contents = B.unMany is - left = case viewl contents of - (Space :< _) -> B.space - (SoftBreak :< _) -> B.softbreak - _ -> mempty - right = case viewr contents of - (_ :> Space) -> B.space - (_ :> SoftBreak) -> B.softbreak - _ -> mempty in - (left <> f (B.trimInlines . B.Many $ contents) <> right) - --- | Extract inlines, removing formatting. -removeFormatting :: Walkable Inline a => a -> [Inline] -removeFormatting = query go . walk deNote - where go :: Inline -> [Inline] - go (Str xs) = [Str xs] - go Space = [Space] - go SoftBreak = [SoftBreak] - go (Code _ x) = [Str x] - go (Math _ x) = [Str x] - go LineBreak = [Space] - go _ = [] - -deNote :: Inline -> Inline -deNote (Note _) = Str "" -deNote x = x - --- | Convert pandoc structure to a string with formatting removed. --- Footnotes are skipped (since we don't want their contents in link --- labels). -stringify :: Walkable Inline a => a -> String -stringify = query go . walk deNote - where go :: Inline -> [Char] - go Space = " " - go SoftBreak = " " - go (Str x) = x - go (Code _ x) = x - go (Math _ x) = x - go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 - go LineBreak = " " - go _ = "" - --- | Bring all regular text in a pandoc structure to uppercase. --- --- This function correctly handles cases where a lowercase character doesn't --- match to a single uppercase character – e.g. “Straße” would be converted --- to “STRASSE”, not “STRAßE”. -capitalize :: Walkable Inline a => a -> a -capitalize = walk go - where go :: Inline -> Inline - go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s) - go x = x - --- | Change final list item from @Para@ to @Plain@ if the list contains --- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather --- than @[Block]@. -compactify :: [Blocks] -- ^ List of list items (each a list of blocks) - -> [Blocks] -compactify [] = [] -compactify items = - let (others, final) = (init items, last items) - in case reverse (B.toList final) of - (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of - -- if this is only Para, change to Plain - [_] -> others ++ [B.fromList (reverse $ Plain a : xs)] - _ -> items - _ -> items - --- | Like @compactify@, but acts on items of definition lists. -compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] -compactifyDL items = - let defs = concatMap snd items - in case reverse (concatMap B.toList defs) of - (Para x:xs) - | not (any isPara xs) -> - let (t,ds) = last items - lastDef = B.toList $ last ds - ds' = init ds ++ - if null lastDef - then [B.fromList lastDef] - else [B.fromList $ init lastDef ++ [Plain x]] - in init items ++ [(t, ds')] - | otherwise -> items - _ -> items - --- | Combine a list of lines by adding hard linebreaks. -combineLines :: [[Inline]] -> [Inline] -combineLines = intercalate [LineBreak] - --- | Convert a list of lines into a paragraph with hard line breaks. This is --- useful e.g. for rudimentary support of LineBlock elements in writers. -linesToPara :: [[Inline]] -> Block -linesToPara = Para . combineLines - -isPara :: Block -> Bool -isPara (Para _) = True -isPara _ = False - --- | Data structure for defining hierarchical Pandoc documents -data Element = Blk Block - | Sec Int [Int] Attr [Inline] [Element] - -- lvl num attributes label contents - deriving (Eq, Read, Show, Typeable, Data) - -instance Walkable Inline Element where - walk f (Blk x) = Blk (walk f x) - walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) - walkM f (Blk x) = Blk `fmap` walkM f x - walkM f (Sec lev nums attr ils elts) = do - ils' <- walkM f ils - elts' <- walkM f elts - return $ Sec lev nums attr ils' elts' - query f (Blk x) = query f x - query f (Sec _ _ _ ils elts) = query f ils <> query f elts - -instance Walkable Block Element where - walk f (Blk x) = Blk (walk f x) - walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) - walkM f (Blk x) = Blk `fmap` walkM f x - walkM f (Sec lev nums attr ils elts) = do - ils' <- walkM f ils - elts' <- walkM f elts - return $ Sec lev nums attr ils' elts' - query f (Blk x) = query f x - query f (Sec _ _ _ ils elts) = query f ils <> query f elts - - --- | Convert Pandoc inline list to plain text identifier. HTML --- identifiers must start with a letter, and may contain only --- letters, digits, and the characters _-. -inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier = - dropWhile (not . isAlpha) . intercalate "-" . words . - map (nbspToSp . toLower) . - filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . - stringify - where nbspToSp '\160' = ' ' - nbspToSp x = x - --- | Convert list of Pandoc blocks into (hierarchical) list of Elements -hierarchicalize :: [Block] -> [Element] -hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) [] - -hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element] -hierarchicalizeWithIds [] = return [] -hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do - lastnum <- S.get - let lastnum' = take level lastnum - let newnum = case length lastnum' of - x | "unnumbered" `elem` classes -> [] - | x >= level -> init lastnum' ++ [last lastnum' + 1] - | otherwise -> lastnum ++ - replicate (level - length lastnum - 1) 0 ++ [1] - unless (null newnum) $ S.put newnum - let (sectionContents, rest) = break (headerLtEq level) xs - sectionContents' <- hierarchicalizeWithIds sectionContents - rest' <- hierarchicalizeWithIds rest - return $ Sec level newnum attr title' sectionContents' : rest' -hierarchicalizeWithIds ((Div ("",["references"],[]) - (Header level (ident,classes,kvs) title' : xs)):ys) = - hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs) - title') : (xs ++ ys)) -hierarchicalizeWithIds (x:rest) = do - rest' <- hierarchicalizeWithIds rest - return $ (Blk x) : rest' - -headerLtEq :: Int -> Block -> Bool -headerLtEq level (Header l _ _) = l <= level -headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level -headerLtEq _ _ = False - --- | Generate a unique identifier from a list of inlines. --- Second argument is a list of already used identifiers. -uniqueIdent :: [Inline] -> Set.Set String -> String -uniqueIdent title' usedIdents - = let baseIdent = case inlineListToIdentifier title' of - "" -> "section" - x -> x - numIdent n = baseIdent ++ "-" ++ show n - in if baseIdent `Set.member` usedIdents - then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of - Just x -> numIdent x - Nothing -> baseIdent -- if we have more than 60,000, allow repeats - else baseIdent - --- | True if block is a Header block. -isHeaderBlock :: Block -> Bool -isHeaderBlock (Header _ _ _) = True -isHeaderBlock _ = False - --- | Shift header levels up or down. -headerShift :: Int -> Pandoc -> Pandoc -headerShift n = walk shift - where shift :: Block -> Block - shift (Header level attr inner) = Header (level + n) attr inner - shift x = x - --- | Detect if a list is tight. -isTightList :: [[Block]] -> Bool -isTightList = all firstIsPlain - where firstIsPlain (Plain _ : _) = True - firstIsPlain _ = False - --- | Set a field of a 'Meta' object. If the field already has a value, --- convert it into a list with the new value appended to the old value(s). -addMetaField :: ToMetaValue a - => String - -> a - -> Meta - -> Meta -addMetaField key val (Meta meta) = - Meta $ M.insertWith combine key (toMetaValue val) meta - where combine newval (MetaList xs) = MetaList (xs ++ tolist newval) - combine newval x = MetaList [x, newval] - tolist (MetaList ys) = ys - tolist y = [y] - --- | Create 'Meta' from old-style title, authors, date. This is --- provided to ease the transition from the old API. -makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta -makeMeta title authors date = - addMetaField "title" (B.fromList title) - $ addMetaField "author" (map B.fromList authors) - $ addMetaField "date" (B.fromList date) - $ nullMeta - --- --- TagSoup HTML handling --- - --- | Render HTML tags. -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = matchTags ["hr", "br", "img", - "meta", "link"] - , optRawTag = matchTags ["script", "style"] } - where matchTags = \tags -> flip elem tags . map toLower - --- --- File handling --- - --- | Perform an IO action in a directory, returning to starting directory. -inDirectory :: FilePath -> IO a -> IO a -inDirectory path action = E.bracket - getCurrentDirectory - setCurrentDirectory - (const $ setCurrentDirectory path >> action) - -getDefaultReferenceDocx :: Maybe FilePath -> IO Archive -getDefaultReferenceDocx datadir = do - let paths = ["[Content_Types].xml", - "_rels/.rels", - "docProps/app.xml", - "docProps/core.xml", - "word/document.xml", - "word/fontTable.xml", - "word/footnotes.xml", - "word/numbering.xml", - "word/settings.xml", - "word/webSettings.xml", - "word/styles.xml", - "word/_rels/document.xml.rels", - "word/_rels/footnotes.xml.rels", - "word/theme/theme1.xml"] - let toLazy = fromChunks . (:[]) - let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$> - getCurrentTime - contents <- toLazy <$> readDataFile datadir - ("docx/" ++ path) - return $ toEntry path epochtime contents - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- doesFileExist (d </> "reference.docx") - if exists - then return (Just (d </> "reference.docx")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> BL.readFile arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - -getDefaultReferenceODT :: Maybe FilePath -> IO Archive -getDefaultReferenceODT datadir = do - let paths = ["mimetype", - "manifest.rdf", - "styles.xml", - "content.xml", - "meta.xml", - "settings.xml", - "Configurations2/accelerator/current.xml", - "Thumbnails/thumbnail.png", - "META-INF/manifest.xml"] - let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime - contents <- (fromChunks . (:[])) `fmap` - readDataFile datadir ("odt/" ++ path) - return $ toEntry path epochtime contents - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- doesFileExist (d </> "reference.odt") - if exists - then return (Just (d </> "reference.odt")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> BL.readFile arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - - -readDefaultDataFile :: FilePath -> IO BS.ByteString -readDefaultDataFile "reference.docx" = - (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing -readDefaultDataFile "reference.odt" = - (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing -readDefaultDataFile fname = -#ifdef EMBED_DATA_FILES - case lookup (makeCanonical fname) dataFiles of - Nothing -> err 97 $ "Could not find data file " ++ fname - Just contents -> return contents - where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories - transformPathParts = reverse . foldl go [] - go as "." = as - go (_:as) ".." = as - go as x = x : as -#else - getDataFileName fname' >>= checkExistence >>= BS.readFile - where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname - -checkExistence :: FilePath -> IO FilePath -checkExistence fn = do - exists <- doesFileExist fn - if exists - then return fn - else err 97 ("Could not find data file " ++ fn) -#endif - --- | Read file from specified user data directory or, if not found there, from --- Cabal data directory. -readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString -readDataFile Nothing fname = readDefaultDataFile fname -readDataFile (Just userDir) fname = do - exists <- doesFileExist (userDir </> fname) - if exists - then BS.readFile (userDir </> fname) - else readDefaultDataFile fname - --- | Same as 'readDataFile' but returns a String instead of a ByteString. -readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String -readDataFileUTF8 userDir fname = - UTF8.toString `fmap` readDataFile userDir fname - --- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) -openURL u - | Just u'' <- stripPrefix "data:" u = - let mime = takeWhile (/=',') u'' - contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'' - in return $ Right (decodeLenient contents, Just mime) -#ifdef HTTP_CLIENT - | otherwise = withSocketsDo $ E.try $ do - let parseReq = parseRequest - (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy" - (useragent :: Either E.SomeException String) <- E.try $ getEnv "USER_AGENT" - req <- parseReq u - req' <- case proxy of - Left _ -> return req - Right pr -> (parseReq pr >>= \r -> - return $ addProxy (host r) (port r) req) - `mplus` return req - req'' <- case useragent of - Left _ -> return req' - Right ua -> do - let headers = requestHeaders req' - let useragentheader = (hUserAgent, B8.pack ua) - let headers' = useragentheader:headers - return $ req' {requestHeaders = headers'} - resp <- newManager tlsManagerSettings >>= httpLbs req'' - return (BS.concat $ toChunks $ responseBody resp, - UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) -#else - | otherwise = E.try $ getBodyAndMimeType `fmap` browse - (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." - setOutHandler $ const (return ()) - setAllowRedirects True - request (getRequest' u')) - where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r) - getRequest' uriString = case parseURI uriString of - Nothing -> error ("Not a valid URL: " ++ - uriString) - Just v -> mkRequest GET v - u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI -#endif - --- --- Error reporting --- - -err :: MonadIO m => Int -> String -> m a -err exitCode msg = liftIO $ do - UTF8.hPutStrLn stderr msg - exitWith $ ExitFailure exitCode - return undefined - -warn :: MonadIO m => String -> m () -warn msg = liftIO $ do - UTF8.hPutStrLn stderr $ "[warning] " ++ msg - -mapLeft :: (a -> b) -> Either a c -> Either b c -mapLeft f (Left x) = Left (f x) -mapLeft _ (Right x) = Right x - --- | Remove intermediate "." and ".." directories from a path. --- --- > collapseFilePath "./foo" == "foo" --- > collapseFilePath "/bar/../baz" == "/baz" --- > collapseFilePath "/../baz" == "/../baz" --- > collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar" --- > collapseFilePath "parent/foo/baz/../../bar" == "parent/bar" --- > collapseFilePath "parent/foo/.." == "parent" --- > collapseFilePath "/parent/foo/../../bar" == "/bar" -collapseFilePath :: FilePath -> FilePath -collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories - where - go rs "." = rs - go r@(p:rs) ".." = case p of - ".." -> ("..":r) - (checkPathSeperator -> Just True) -> ("..":r) - _ -> rs - go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]] - go rs x = x:rs - isSingleton [] = Nothing - isSingleton [x] = Just x - isSingleton _ = Nothing - checkPathSeperator = fmap isPathSeparator . isSingleton - --- --- File selection from the archive --- -filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)] -filteredFilesFromArchive zf f = - mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf)) - where - fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) - fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) - ---- ---- Squash blocks into inlines ---- - -blockToInlines :: Block -> [Inline] -blockToInlines (Plain ils) = ils -blockToInlines (Para ils) = ils -blockToInlines (LineBlock lns) = combineLines lns -blockToInlines (CodeBlock attr str) = [Code attr str] -blockToInlines (RawBlock fmt str) = [RawInline fmt str] -blockToInlines (BlockQuote blks) = blocksToInlines blks -blockToInlines (OrderedList _ blkslst) = - concatMap blocksToInlines blkslst -blockToInlines (BulletList blkslst) = - concatMap blocksToInlines blkslst -blockToInlines (DefinitionList pairslst) = - concatMap f pairslst - where - f (ils, blkslst) = ils ++ - [Str ":", Space] ++ - (concatMap blocksToInlines blkslst) -blockToInlines (Header _ _ ils) = ils -blockToInlines (HorizontalRule) = [] -blockToInlines (Table _ _ _ headers rows) = - intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl - where - tbl = headers : rows -blockToInlines (Div _ blks) = blocksToInlines blks -blockToInlines Null = [] - -blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline] -blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks - -blocksToInlines :: [Block] -> [Inline] -blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space] - - --- --- Safe read --- - -safeRead :: (MonadPlus m, Read a) => String -> m a -safeRead s = case reads s of - (d,x):_ - | all isSpace x -> return d - _ -> mzero - --- --- Temp directory --- - -withTempDir :: String -> (FilePath -> IO a) -> IO a -withTempDir = -#ifdef _WINDOWS - withTempDirectory "." -#else - withSystemTempDirectory -#endif |