diff options
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 417 |
1 files changed, 417 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs new file mode 100644 index 000000000..dc58dd6b4 --- /dev/null +++ b/src/Text/Pandoc/Shared.hs @@ -0,0 +1,417 @@ +-- | Utility functions and definitions used by the various Pandoc modules. +module Text.Pandoc.Shared ( + -- * Text processing + gsub, + joinWithSep, + tabsToSpaces, + backslashEscape, + escapePreservingRegex, + endsWith, + stripTrailingNewlines, + removeLeadingTrailingSpace, + removeLeadingSpace, + removeTrailingSpace, + -- * Parsing + readWith, + testStringWith, + HeaderType (..), + ParserContext (..), + ParserState (..), + defaultParserState, + -- * Native format prettyprinting + prettyPandoc, + -- * Pandoc block list processing + consolidateList, + isNoteBlock, + splitBySpace, + normalizeSpaces, + compactify, + generateReference, + WriterOptions (..), + KeyTable, + keyTable, + lookupKeySrc, + refsMatch, + replaceReferenceLinks, + replaceRefLinksBlockList + ) where +import Text.Pandoc.Definition +import Text.ParserCombinators.Parsec +import Text.Pandoc.HtmlEntities ( decodeEntities ) +import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex ) +import Char ( toLower ) +import List ( find, groupBy ) + +-- | Parse a string with a given parser and state. +readWith :: GenParser Char ParserState a -- ^ parser + -> ParserState -- ^ initial state + -> String -- ^ input string + -> a +readWith parser state input = + case runParser parser state "source" input of + Left err -> error $ "\nError:\n" ++ show err + Right result -> result + +-- | Parse a string with @parser@ (for testing). +testStringWith :: (Show a) => + GenParser Char ParserState a + -> String + -> IO () +testStringWith parser str = putStrLn $ show $ readWith parser defaultParserState str + +-- | Parser state + +data HeaderType + = SingleHeader Char -- ^ Single line of characters underneath + | DoubleHeader Char -- ^ Lines of characters above and below + deriving (Eq, Show) + +data ParserContext + = BlockQuoteState -- ^ Used when running parser on contents of blockquote + | ListItemState -- ^ Used when running parser on list item contents + | NullState -- ^ Default state + deriving (Eq, Show) + +data ParserState = ParserState + { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML and LaTeX? + stateParserContext :: ParserContext, -- ^ What are we parsing? + stateKeyBlocks :: [Block], -- ^ List of reference key blocks + stateKeysUsed :: [[Inline]], -- ^ List of references used so far + stateNoteBlocks :: [Block], -- ^ List of note blocks + stateTabStop :: Int, -- ^ Tab stop + stateStandalone :: Bool, -- ^ If @True@, parse bibliographic info + stateTitle :: [Inline], -- ^ Title of document + stateAuthors :: [String], -- ^ Authors of document + stateDate :: String, -- ^ Date of document + stateHeaderTable :: [HeaderType] } -- ^ List of header types used, in what order (for reStructuredText only) + deriving Show + +defaultParserState :: ParserState +defaultParserState = + ParserState { stateParseRaw = False, + stateParserContext = NullState, + stateKeyBlocks = [], + stateKeysUsed = [], + stateNoteBlocks = [], + stateTabStop = 4, + stateStandalone = False, + stateTitle = [], + stateAuthors = [], + stateDate = [], + stateHeaderTable = [] } + +-- | Consolidate @Str@s and @Space@s in an inline list into one big @Str@. +-- Collapse adjacent @Space@s. +consolidateList :: [Inline] -> [Inline] +consolidateList ((Str a):(Str b):rest) = consolidateList ((Str (a ++ b)):rest) +consolidateList ((Str a):Space:rest) = consolidateList ((Str (a ++ " ")):rest) +consolidateList (Space:(Str a):rest) = consolidateList ((Str (" " ++ a)):rest) +consolidateList (Space:Space:rest) = consolidateList ((Str " "):rest) +consolidateList (inline:rest) = inline:(consolidateList rest) +consolidateList [] = [] + +-- | Indent string as a block. +indentBy :: Int -- ^ Number of spaces to indent the block + -> Int -- ^ Number of spaces to indent first line, relative to block + -> String -- ^ Contents of block to indent + -> String +indentBy num first [] = "" +indentBy num first str = + let (firstLine:restLines) = lines str + firstLineIndent = num + first in + (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines) + +-- | Prettyprint list of Pandoc blocks elements. +prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks + -> [Block] -- ^ List of blocks + -> String +prettyBlockList indent [] = indentBy indent 0 "[]" +prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" + +-- | Prettyprint Pandoc block element. +prettyBlock :: Block -> String +prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks) +prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++ (prettyBlockList 2 blocks) +prettyBlock (OrderedList blockLists) = "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock (BulletList blockLists) = "BulletList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock block = show block + +-- | Prettyprint Pandoc document. +prettyPandoc :: Pandoc -> String +prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ ")\n" ++ (prettyBlockList 0 blocks) + +-- | Convert tabs to spaces (with adjustable tab stop). +tabsToSpaces :: Int -- ^ Tabstop + -> String -- ^ String to convert + -> String +tabsToSpaces tabstop str = + unlines (map (tabsInLine tabstop tabstop) (lines str)) + +-- | Convert tabs to spaces in one line. +tabsInLine :: Int -- ^ Number of spaces to next tab stop + -> Int -- ^ Tabstop + -> String -- ^ Line to convert + -> String +tabsInLine num tabstop "" = "" +tabsInLine num tabstop (c:cs) = + let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in + let nextnumraw = (num - (length replacement)) in + let nextnum = if (nextnumraw < 1) then (nextnumraw + tabstop) else nextnumraw in + replacement ++ (tabsInLine nextnum tabstop cs) + +-- | Substitute string for every occurrence of regular expression. +gsub :: String -- ^ Regular expression (as string) to substitute for + -> String -- ^ String to substitute for the regex + -> String -- ^ String to be substituted in + -> String +gsub regex replacement str = subRegex (mkRegex regex) str replacement + +-- | Escape designated characters with backslash. +backslashEscape :: [Char] -- ^ list of special characters to escape + -> String -- ^ string input + -> String +backslashEscape special [] = [] +backslashEscape special (x:xs) = if x `elem` special then + '\\':x:(backslashEscape special xs) + else + x:(backslashEscape special xs) + +-- | Escape string by applying a function, but don't touch anything that matches regex. +escapePreservingRegex :: (String -> String) -- ^ Escaping function + -> Regex -- ^ Regular expression + -> String -- ^ String to be escaped + -> String +escapePreservingRegex escapeFunction regex str = + case (matchRegexAll regex str) of + Nothing -> escapeFunction str + Just (before, matched, after, _) -> + (escapeFunction before) ++ matched ++ + (escapePreservingRegex escapeFunction regex after) + +-- | Returns @True@ if string ends with given character. +endsWith :: Char -> [Char] -> Bool +endsWith char [] = False +endsWith char str = (char == last str) + +-- | Returns @True@ if block is a @Note@ block +isNoteBlock :: Block -> Bool +isNoteBlock (Note ref blocks) = True +isNoteBlock _ = False + +-- | Joins a list of lists, separated by another list. +joinWithSep :: [a] -- ^ List to use as separator + -> [[a]] -- ^ Lists to join + -> [a] +joinWithSep sep [] = [] +joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst + +-- | Strip trailing newlines from string. +stripTrailingNewlines :: String -> String +stripTrailingNewlines "" = "" +stripTrailingNewlines str = + if (last str) == '\n' then + stripTrailingNewlines (init str) + else + str + +-- | Remove leading and trailing space (including newlines) from string. +removeLeadingTrailingSpace :: String -> String +removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace + +-- | Remove leading space (including newlines) from string. +removeLeadingSpace :: String -> String +removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || (x == '\t')) + +-- | Remove trailing space (including newlines) from string. +removeTrailingSpace :: String -> String +removeTrailingSpace = reverse . removeLeadingSpace . reverse + +-- | Split list of inlines into groups separated by a space. +splitBySpace :: [Inline] -> [[Inline]] +splitBySpace lst = filter (\a -> (/= Space) (head a)) + (groupBy (\a b -> (/= Space) a && (/= Space) b) lst) + +-- | Normalize a list of inline elements: remove leading and trailing +-- @Space@ elements, and collapse double @Space@s into singles. +normalizeSpaces :: [Inline] -> [Inline] +normalizeSpaces [] = [] +normalizeSpaces list = + let removeDoubles [] = [] + removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) + removeDoubles (x:rest) = x:(removeDoubles rest) in + let removeLeading [] = [] + removeLeading lst = if ((head lst) == Space) then tail lst else lst in + let removeTrailing [] = [] + removeTrailing lst = if ((last lst) == Space) then init lst else lst in + removeLeading $ removeTrailing $ removeDoubles list + +-- | Change final list item from @Para@ to @Plain@ if the list should be compact. +compactify :: [[Block]] -- ^ List of list items (each a list of blocks) + -> [[Block]] +compactify [] = [] +compactify items = + let final = last items + others = init items in + case final of + [Para a] -> if any containsPara others then items else others ++ [[Plain a]] + otherwise -> items + +containsPara :: [Block] -> Bool +containsPara [] = False +containsPara ((Para a):rest) = True +containsPara ((BulletList items):rest) = (any containsPara items) || (containsPara rest) +containsPara ((OrderedList items):rest) = (any containsPara items) || (containsPara rest) +containsPara (x:rest) = containsPara rest + +-- | Options for writers +data WriterOptions = WriterOptions + { writerStandalone :: Bool -- ^ If @True@, writer header and footer + , writerTitlePrefix :: String -- ^ Prefix for HTML titles + , writerHeader :: String -- ^ Header for the document + , writerIncludeBefore :: String -- ^ String to include before the document body + , writerIncludeAfter :: String -- ^ String to include after the document body + , writerSmartypants :: Bool -- ^ If @True@, use smart quotes, dashes, and ellipses + , writerS5 :: Bool -- ^ @True@ if we're writing S5 instead of normal HTML + , writerIncremental :: Bool -- ^ If @True@, display S5 lists incrementally + , writerNumberSections :: Bool -- ^ If @True@, number sections in LaTeX + , writerTabStop :: Int } -- ^ Tabstop for conversion between spaces and tabs + deriving Show + +-- +-- Functions for constructing lists of reference keys +-- + +-- | Returns @Just@ numerical key reference if there's already a key +-- for the specified target in the list of blocks, otherwise @Nothing@. +keyFoundIn :: [Block] -- ^ List of key blocks to search + -> Target -- ^ Target to search for + -> Maybe String +keyFoundIn [] src = Nothing +keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src) then + Just num + else + keyFoundIn rest src +keyFoundIn (_:rest) src = keyFoundIn rest src + +-- | Return next unique numerical key, given keyList +nextUniqueKey :: [[Inline]] -> String +nextUniqueKey keys = + let nums = [1..10000] + notAKey n = not (any (== [Str (show n)]) keys) in + case (find notAKey nums) of + Just x -> show x + Nothing -> error "Could not find unique key for reference link" + +-- | Generate a reference for a URL (either an existing reference, if +-- there is one, or a new one, if there isn't) and update parser state. +generateReference :: String -- ^ URL + -> String -- ^ Title + -> GenParser tok ParserState Target +generateReference url title = do + let src = Src (decodeEntities url) (decodeEntities title) + state <- getState + let keyBlocks = stateKeyBlocks state + let keysUsed = stateKeysUsed state + case (keyFoundIn keyBlocks src) of + Just num -> return (Ref [Str num]) + Nothing -> do + let nextNum = nextUniqueKey keysUsed + updateState (\st -> st {stateKeyBlocks = (Key [Str nextNum] src):keyBlocks, + stateKeysUsed = [Str nextNum]:keysUsed}) + return (Ref [Str nextNum]) + +-- +-- code to replace reference links with real links and remove unneeded key blocks +-- + +type KeyTable = [([Inline], Target)] + +-- | Returns @True@ if block is a Key block +isRefBlock :: Block -> Bool +isRefBlock (Key _ _) = True +isRefBlock _ = False + +-- | Returns a pair of a list of pairs of keys and associated sources, and a new +-- list of blocks with the included key blocks deleted. +keyTable :: [Block] -> (KeyTable, [Block]) +keyTable [] = ([],[]) +keyTable ((Key ref target):lst) = (((ref, target):table), rest) + where (table, rest) = keyTable lst +keyTable (Null:lst) = keyTable lst -- get rid of Nulls +keyTable (Blank:lst) = keyTable lst -- get rid of Blanks +keyTable ((BlockQuote blocks):lst) = ((table1 ++ table2), ((BlockQuote rest1):rest2)) + where (table1, rest1) = keyTable blocks + (table2, rest2) = keyTable lst +keyTable ((Note ref blocks):lst) = ((table1 ++ table2), ((Note ref rest1):rest2)) + where (table1, rest1) = keyTable blocks + (table2, rest2) = keyTable lst +keyTable ((OrderedList blockLists):lst) = ((table1 ++ table2), ((OrderedList rest1):rest2)) + where results = map keyTable blockLists + rest1 = map snd results + table1 = concatMap fst results + (table2, rest2) = keyTable lst +keyTable ((BulletList blockLists):lst) = ((table1 ++ table2), ((BulletList rest1):rest2)) + where results = map keyTable blockLists + rest1 = map snd results + table1 = concatMap fst results + (table2, rest2) = keyTable lst +keyTable (other:lst) = (table, (other:rest)) + where (table, rest) = keyTable lst + +-- | Look up key in key table and return target object. +lookupKeySrc :: KeyTable -- ^ Key table + -> [Inline] -- ^ Key + -> Maybe Target +lookupKeySrc table key = case table of + [] -> Nothing + (k, src):rest -> if (refsMatch k key) then Just src else lookupKeySrc rest key + +-- | Returns @True@ if keys match (case insensitive). +refsMatch :: [Inline] -> [Inline] -> Bool +refsMatch ((Str x):restx) ((Str y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Code x):restx) ((Code y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((TeX x):restx) ((TeX y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((NoteRef x):restx) ((NoteRef y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Emph x):restx) ((Emph y):resty) = refsMatch x y && refsMatch restx resty +refsMatch ((Strong x):restx) ((Strong y):resty) = refsMatch x y && refsMatch restx resty +refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty +refsMatch [] x = null x +refsMatch x [] = null x + +-- | Replace reference links with explicit links in list of blocks, removing key blocks. +replaceReferenceLinks :: [Block] -> [Block] +replaceReferenceLinks blocks = + let (keytable, purged) = keyTable blocks in + replaceRefLinksBlockList keytable purged + +-- | Use key table to replace reference links with explicit links in a list of blocks +replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block] +replaceRefLinksBlockList keytable lst = map (replaceRefLinksBlock keytable) lst + +-- | Use key table to replace reference links with explicit links in a block +replaceRefLinksBlock :: KeyTable -> Block -> Block +replaceRefLinksBlock keytable (Plain lst) = Plain (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (Para lst) = Para (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (Header lvl lst) = Header lvl (map (replaceRefLinksInline keytable) lst) +replaceRefLinksBlock keytable (BlockQuote lst) = BlockQuote (map (replaceRefLinksBlock keytable) lst) +replaceRefLinksBlock keytable (Note ref lst) = Note ref (map (replaceRefLinksBlock keytable) lst) +replaceRefLinksBlock keytable (OrderedList lst) = OrderedList (map (replaceRefLinksBlockList keytable) lst) +replaceRefLinksBlock keytable (BulletList lst) = BulletList (map (replaceRefLinksBlockList keytable) lst) +replaceRefLinksBlock keytable other = other + +-- | Use key table to replace reference links with explicit links in an inline element. +replaceRefLinksInline :: KeyTable -> Inline -> Inline +replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef) + where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of + Nothing -> (Ref ref) + Just src -> src + newText = map (replaceRefLinksInline keytable) text +replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef) + where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of + Nothing -> (Ref ref) + Just src -> src + newText = map (replaceRefLinksInline keytable) text +replaceRefLinksInline keytable (Emph lst) = Emph (map (replaceRefLinksInline keytable) lst) +replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst) +replaceRefLinksInline keytable other = other |