-- | Utility functions and definitions used by the various Pandoc modules. module Text.Pandoc.Shared ( -- * Text processing gsub, joinWithSep, tabsToSpaces, backslashEscape, escapePreservingRegex, endsWith, stripTrailingNewlines, removeLeadingTrailingSpace, removeLeadingSpace, removeTrailingSpace, -- * Parsing readWith, testStringWith, HeaderType (..), ParserContext (..), ParserState (..), defaultParserState, -- * Native format prettyprinting prettyPandoc, -- * Pandoc block list processing consolidateList, isNoteBlock, splitBySpace, normalizeSpaces, compactify, generateReference, WriterOptions (..), KeyTable, keyTable, lookupKeySrc, refsMatch, replaceReferenceLinks, replaceRefLinksBlockList ) where import Text.Pandoc.Definition import Text.ParserCombinators.Parsec import Text.Pandoc.HtmlEntities ( decodeEntities ) import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex ) import Char ( toLower ) import List ( find, groupBy ) -- | Parse a string with a given parser and state. readWith :: GenParser Char ParserState a -- ^ parser -> ParserState -- ^ initial state -> String -- ^ input string -> a readWith parser state input = case runParser parser state "source" input of Left err -> error $ "\nError:\n" ++ show err Right result -> result -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a) => GenParser Char ParserState a -> String -> IO () testStringWith parser str = putStrLn $ show $ readWith parser defaultParserState str -- | Parser state data HeaderType = SingleHeader Char -- ^ Single line of characters underneath | DoubleHeader Char -- ^ Lines of characters above and below deriving (Eq, Show) data ParserContext = BlockQuoteState -- ^ Used when running parser on contents of blockquote | ListItemState -- ^ Used when running parser on list item contents | NullState -- ^ Default state deriving (Eq, Show) data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML and LaTeX? stateParserContext :: ParserContext, -- ^ What are we parsing? stateKeyBlocks :: [Block], -- ^ List of reference key blocks stateKeysUsed :: [[Inline]], -- ^ List of references used so far stateNoteBlocks :: [Block], -- ^ List of note blocks stateTabStop :: Int, -- ^ Tab stop stateStandalone :: Bool, -- ^ If @True@, parse bibliographic info stateTitle :: [Inline], -- ^ Title of document stateAuthors :: [String], -- ^ Authors of document stateDate :: String, -- ^ Date of document stateHeaderTable :: [HeaderType] } -- ^ List of header types used, in what order (for reStructuredText only) deriving Show defaultParserState :: ParserState defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, stateKeyBlocks = [], stateKeysUsed = [], stateNoteBlocks = [], stateTabStop = 4, stateStandalone = False, stateTitle = [], stateAuthors = [], stateDate = [], stateHeaderTable = [] } -- | Consolidate @Str@s and @Space@s in an inline list into one big @Str@. -- Collapse adjacent @Space@s. consolidateList :: [Inline] -> [Inline] consolidateList ((Str a):(Str b):rest) = consolidateList ((Str (a ++ b)):rest) consolidateList ((Str a):Space:rest) = consolidateList ((Str (a ++ " ")):rest) consolidateList (Space:(Str a):rest) = consolidateList ((Str (" " ++ a)):rest) consolidateList (Space:Space:rest) = consolidateList ((Str " "):rest) consolidateList (inline:rest) = inline:(consolidateList rest) consolidateList [] = [] -- | Indent string as a block. indentBy :: Int -- ^ Number of spaces to indent the block -> Int -- ^ Number of spaces to indent first line, relative to block -> String -- ^ Contents of block to indent -> String indentBy num first [] = "" indentBy num first str = let (firstLine:restLines) = lines str firstLineIndent = num + first in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines) -- | Prettyprint list of Pandoc blocks elements. prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks -> [Block] -- ^ List of blocks -> String prettyBlockList indent [] = indentBy indent 0 "[]" prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" -- | Prettyprint Pandoc block element. prettyBlock :: Block -> String prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks) prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++ (prettyBlockList 2 blocks) prettyBlock (OrderedList blockLists) = "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" prettyBlock (BulletList blockLists) = "BulletList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" prettyBlock block = show block -- | Prettyprint Pandoc document. prettyPandoc :: Pandoc -> String prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ ")\n" ++ (prettyBlockList 0 blocks) -- | Convert tabs to spaces (with adjustable tab stop). tabsToSpaces :: Int -- ^ Tabstop -> String -- ^ String to convert -> String tabsToSpaces tabstop str = unlines (map (tabsInLine tabstop tabstop) (lines str)) -- | Convert tabs to spaces in one line. tabsInLine :: Int -- ^ Number of spaces to next tab stop -> Int -- ^ Tabstop -> String -- ^ Line to convert -> String tabsInLine num tabstop "" = "" tabsInLine num tabstop (c:cs) = let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in let nextnumraw = (num - (length replacement)) in let nextnum = if (nextnumraw < 1) then (nextnumraw + tabstop) else nextnumraw in replacement ++ (tabsInLine nextnum tabstop cs) -- | Substitute string for every occurrence of regular expression. gsub :: String -- ^ Regular expression (as string) to substitute for -> String -- ^ String to substitute for the regex -> String -- ^ String to be substituted in -> String gsub regex replacement str = subRegex (mkRegex regex) str replacement -- | Escape designated characters with backslash. backslashEscape :: [Char] -- ^ list of special characters to escape -> String -- ^ string input -> String backslashEscape special [] = [] backslashEscape special (x:xs) = if x `elem` special then '\\':x:(backslashEscape special xs) else x:(backslashEscape special xs) -- | Escape string by applying a function, but don't touch anything that matches regex. escapePreservingRegex :: (String -> String) -- ^ Escaping function -> Regex -- ^ Regular expression -> String -- ^ String to be escaped -> String escapePreservingRegex escapeFunction regex str = case (matchRegexAll regex str) of Nothing -> escapeFunction str Just (before, matched, after, _) -> (escapeFunction before) ++ matched ++ (escapePreservingRegex escapeFunction regex after) -- | Returns @True@ if string ends with given character. endsWith :: Char -> [Char] -> Bool endsWith char [] = False endsWith char str = (char == last str) -- | Returns @True@ if block is a @Note@ block isNoteBlock :: Block -> Bool isNoteBlock (Note ref blocks) = True isNoteBlock _ = False -- | Joins a list of lists, separated by another list. joinWithSep :: [a] -- ^ List to use as separator -> [[a]] -- ^ Lists to join -> [a] joinWithSep sep [] = [] joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst -- | Strip trailing newlines from string. stripTrailingNewlines :: String -> String stripTrailingNewlines "" = "" stripTrailingNewlines str = if (last str) == '\n' then stripTrailingNewlines (init str) else str -- | Remove leading and trailing space (including newlines) from string. removeLeadingTrailingSpace :: String -> String removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace -- | Remove leading space (including newlines) from string. removeLeadingSpace :: String -> String removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || (x == '\t')) -- | Remove trailing space (including newlines) from string. removeTrailingSpace :: String -> String removeTrailingSpace = reverse . removeLeadingSpace . reverse -- | Split list of inlines into groups separated by a space. splitBySpace :: [Inline] -> [[Inline]] splitBySpace lst = filter (\a -> (/= Space) (head a)) (groupBy (\a b -> (/= Space) a && (/= Space) b) lst) -- | Normalize a list of inline elements: remove leading and trailing -- @Space@ elements, and collapse double @Space@s into singles. normalizeSpaces :: [Inline] -> [Inline] normalizeSpaces [] = [] normalizeSpaces list = let removeDoubles [] = [] removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) removeDoubles (x:rest) = x:(removeDoubles rest) in let removeLeading [] = [] removeLeading lst = if ((head lst) == Space) then tail lst else lst in let removeTrailing [] = [] removeTrailing lst = if ((last lst) == Space) then init lst else lst in removeLeading $ removeTrailing $ removeDoubles list -- | Change final list item from @Para@ to @Plain@ if the list should be compact. compactify :: [[Block]] -- ^ List of list items (each a list of blocks) -> [[Block]] compactify [] = [] compactify items = let final = last items others = init items in case final of [Para a] -> if any containsPara others then items else others ++ [[Plain a]] otherwise -> items containsPara :: [Block] -> Bool containsPara [] = False containsPara ((Para a):rest) = True containsPara ((BulletList items):rest) = (any containsPara items) || (containsPara rest) containsPara ((OrderedList items):rest) = (any containsPara items) || (containsPara rest) containsPara (x:rest) = containsPara rest -- | Options for writers data WriterOptions = WriterOptions { writerStandalone :: Bool -- ^ If @True@, writer header and footer , writerTitlePrefix :: String -- ^ Prefix for HTML titles , writerHeader :: String -- ^ Header for the document , writerIncludeBefore :: String -- ^ String to include before the document body , writerIncludeAfter :: String -- ^ String to include after the document body , writerSmartypants :: Bool -- ^ If @True@, use smart quotes, dashes, and ellipses , writerS5 :: Bool -- ^ @True@ if we're writing S5 instead of normal HTML , writerIncremental :: Bool -- ^ If @True@, display S5 lists incrementally , writerNumberSections :: Bool -- ^ If @True@, number sections in LaTeX , writerTabStop :: Int } -- ^ Tabstop for conversion between spaces and tabs deriving Show -- -- Functions for constructing lists of reference keys -- -- | Returns @Just@ numerical key reference if there's already a key -- for the specified target in the list of blocks, otherwise @Nothing@. keyFoundIn :: [Block] -- ^ List of key blocks to search -> Target -- ^ Target to search for -> Maybe String keyFoundIn [] src = Nothing keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src) then Just num else keyFoundIn rest src keyFoundIn (_:rest) src = keyFoundIn rest src -- | Return next unique numerical key, given keyList nextUniqueKey :: [[Inline]] -> String nextUniqueKey keys = let nums = [1..10000] notAKey n = not (any (== [Str (show n)]) keys) in case (find notAKey nums) of Just x -> show x Nothing -> error "Could not find unique key for reference link" -- | Generate a reference for a URL (either an existing reference, if -- there is one, or a new one, if there isn't) and update parser state. generateReference :: String -- ^ URL -> String -- ^ Title -> GenParser tok ParserState Target generateReference url title = do let src = Src (decodeEntities url) (decodeEntities title) state <- getState let keyBlocks = stateKeyBlocks state let keysUsed = stateKeysUsed state case (keyFoundIn keyBlocks src) of Just num -> return (Ref [Str num]) Nothing -> do let nextNum = nextUniqueKey keysUsed updateState (\st -> st {stateKeyBlocks = (Key [Str nextNum] src):keyBlocks, stateKeysUsed = [Str nextNum]:keysUsed}) return (Ref [Str nextNum]) -- -- code to replace reference links with real links and remove unneeded key blocks -- type KeyTable = [([Inline], Target)] -- | Returns @True@ if block is a Key block isRefBlock :: Block -> Bool isRefBlock (Key _ _) = True isRefBlock _ = False -- | Returns a pair of a list of pairs of keys and associated sources, and a new -- list of blocks with the included key blocks deleted. keyTable :: [Block] -> (KeyTable, [Block]) keyTable [] = ([],[]) keyTable ((Key ref target):lst) = (((ref, target):table), rest) where (table, rest) = keyTable lst keyTable (Null:lst) = keyTable lst -- get rid of Nulls keyTable (Blank:lst) = keyTable lst -- get rid of Blanks keyTable ((BlockQuote blocks):lst) = ((table1 ++ table2), ((BlockQuote rest1):rest2)) where (table1, rest1) = keyTable blocks (table2, rest2) = keyTable lst keyTable ((Note ref blocks):lst) = ((table1 ++ table2), ((Note ref rest1):rest2)) where (table1, rest1) = keyTable blocks (table2, rest2) = keyTable lst keyTable ((OrderedList blockLists):lst) = ((table1 ++ table2), ((OrderedList rest1):rest2)) where results = map keyTable blockLists rest1 = map snd results table1 = concatMap fst results (table2, rest2) = keyTable lst keyTable ((BulletList blockLists):lst) = ((table1 ++ table2), ((BulletList rest1):rest2)) where results = map keyTable blockLists rest1 = map snd results table1 = concatMap fst results (table2, rest2) = keyTable lst keyTable (other:lst) = (table, (other:rest)) where (table, rest) = keyTable lst -- | Look up key in key table and return target object. lookupKeySrc :: KeyTable -- ^ Key table -> [Inline] -- ^ Key -> Maybe Target lookupKeySrc table key = case table of [] -> Nothing (k, src):rest -> if (refsMatch k key) then Just src else lookupKeySrc rest key -- | Returns @True@ if keys match (case insensitive). refsMatch :: [Inline] -> [Inline] -> Bool refsMatch ((Str x):restx) ((Str y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((Code x):restx) ((Code y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((TeX x):restx) ((TeX y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((NoteRef x):restx) ((NoteRef y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((Emph x):restx) ((Emph y):resty) = refsMatch x y && refsMatch restx resty refsMatch ((Strong x):restx) ((Strong y):resty) = refsMatch x y && refsMatch restx resty refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty refsMatch [] x = null x refsMatch x [] = null x -- | Replace reference links with explicit links in list of blocks, removing key blocks. replaceReferenceLinks :: [Block] -> [Block] replaceReferenceLinks blocks = let (keytable, purged) = keyTable blocks in replaceRefLinksBlockList keytable purged -- | Use key table to replace reference links with explicit links in a list of blocks replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block] replaceRefLinksBlockList keytable lst = map (replaceRefLinksBlock keytable) lst -- | Use key table to replace reference links with explicit links in a block replaceRefLinksBlock :: KeyTable -> Block -> Block replaceRefLinksBlock keytable (Plain lst) = Plain (map (replaceRefLinksInline keytable) lst) replaceRefLinksBlock keytable (Para lst) = Para (map (replaceRefLinksInline keytable) lst) replaceRefLinksBlock keytable (Header lvl lst) = Header lvl (map (replaceRefLinksInline keytable) lst) replaceRefLinksBlock keytable (BlockQuote lst) = BlockQuote (map (replaceRefLinksBlock keytable) lst) replaceRefLinksBlock keytable (Note ref lst) = Note ref (map (replaceRefLinksBlock keytable) lst) replaceRefLinksBlock keytable (OrderedList lst) = OrderedList (map (replaceRefLinksBlockList keytable) lst) replaceRefLinksBlock keytable (BulletList lst) = BulletList (map (replaceRefLinksBlockList keytable) lst) replaceRefLinksBlock keytable other = other -- | Use key table to replace reference links with explicit links in an inline element. replaceRefLinksInline :: KeyTable -> Inline -> Inline replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef) where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of Nothing -> (Ref ref) Just src -> src newText = map (replaceRefLinksInline keytable) text replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef) where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of Nothing -> (Ref ref) Just src -> src newText = map (replaceRefLinksInline keytable) text replaceRefLinksInline keytable (Emph lst) = Emph (map (replaceRefLinksInline keytable) lst) replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst) replaceRefLinksInline keytable other = other