From 7deee9c874f6ee3606460052a5dc22b342166a19 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Fri, 23 Nov 2007 03:51:21 +0000 Subject: Reverted changes in r1086 (implicit section header references). This caused too much of a performance hit. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1093 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/Markdown.hs | 167 ++++++++++++++++-------------------- src/Text/Pandoc/Shared.hs | 56 +----------- src/Text/Pandoc/Writers/HTML.hs | 46 ++++++++++ 3 files changed, 123 insertions(+), 146 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 7168cee4d..ded9f2136 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha @@ -27,8 +27,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( - readMarkdown +module Text.Pandoc.Readers.Markdown ( + readMarkdown ) where import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex ) @@ -37,9 +37,9 @@ import Data.Char ( isAlphaNum ) import Data.Maybe ( fromMaybe ) import Network.URI ( isURI ) import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) -import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, +import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType, htmlBlockElement ) @@ -69,14 +69,14 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" indentSpaces = try $ do state <- getState let tabStop = stateTabStop state - try (count tabStop (char ' ')) <|> + try (count tabStop (char ' ')) <|> (many (char ' ') >> string "\t") "indentation" nonindentSpaces = do state <- getState let tabStop = stateTabStop state sps <- many (char ' ') - if length sps < tabStop + if length sps < tabStop then return sps else unexpected "indented line" @@ -91,8 +91,8 @@ failUnlessSmart = do if stateSmart state then return () else fail "Smart typography feature" -- | Parse an inline Str element with a given content. -inlineString str = try $ do - (Str res) <- inline +inlineString str = try $ do + (Str res) <- inline if res == str then return res else fail $ "unexpected Str content" -- | Parse a sequence of inline elements between a string @@ -103,9 +103,9 @@ inlinesInBalanced opener closer = try $ do string opener result <- manyTill ( (do lookAhead (inlineString opener) -- because it might be a link... - bal <- inlinesInBalanced opener closer + bal <- inlinesInBalanced opener closer return $ [Str opener] ++ bal ++ [Str closer]) - <|> (count 1 inline)) + <|> (count 1 inline)) (try (string closer)) return $ concat result @@ -115,7 +115,7 @@ inlinesInBalanced opener closer = try $ do titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline -authorsLine = try $ do +authorsLine = try $ do char '%' skipSpaces authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") @@ -143,24 +143,15 @@ parseMarkdown = do startPos <- getPosition -- go through once just to get list of reference keys -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> headerReference <|> - lineClump) eof >>= return . concat + docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= + return . concat setInput docMinusKeys setPosition startPos st <- getState - -- get headers and construct implicit references unless strict - if stateStrict st - then return () - else do let oldkeys = stateKeys st - let headers = reverse $ stateHeaders st - let idents = uniqueIdentifiers headers - let implicitRefs = zipWith (\hd ident -> (hd, ("#" ++ ident, ""))) - headers idents - updateState $ \st -> st { stateKeys = oldkeys ++ implicitRefs } -- go through again for notes unless strict... if stateStrict st then return () - else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= + else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= return . concat st <- getState let reversedNotes = stateNotes st @@ -169,10 +160,10 @@ parseMarkdown = do setPosition startPos -- now parse it for real... (title, author, date) <- option ([],[],"") titleBlock - blocks <- parseBlocks + blocks <- parseBlocks return $ Pandoc (Meta title author date) $ filter (/= Null) blocks --- +-- -- initial pass for references and notes -- @@ -195,17 +186,7 @@ referenceKey = try $ do -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -headerReference = try $ do - failIfStrict - startPos <- getPosition - (Header level text) <- lookAhead $ atxHeader <|> setextHeader - st <- getState - let headers = stateHeaders st - updateState $ \st -> st { stateHeaders = text:headers } - endPos <- getPosition - lineClump -- return the raw header, because we need to parse it later - -referenceTitle = try $ do +referenceTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces tit <- (charsInBalanced '(' ')' >>= return . unwords . words) @@ -220,7 +201,7 @@ rawLine = do notFollowedBy blankline notFollowedBy' noteMarker contents <- many1 nonEndline - end <- option "" (newline >> optional indentSpaces >> return "\n") + end <- option "" (newline >> optional indentSpaces >> return "\n") return $ contents ++ end rawLines = many1 rawLine >>= return . concat @@ -249,7 +230,7 @@ noteBlock = try $ do parseBlocks = manyTill block eof -block = choice [ header +block = choice [ header , table , codeBlock , hrule @@ -304,7 +285,7 @@ hrule = try $ do indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") codeBlock = do - contents <- many1 (indentedLine <|> + contents <- many1 (indentedLine <|> try (do b <- blanklines l <- indentedLine return $ b ++ l)) @@ -319,7 +300,7 @@ emacsBoxQuote = try $ do failIfStrict string ",----" manyTill anyChar newline - raw <- manyTill + raw <- manyTill (try (char '|' >> optional (char ' ') >> manyTill anyChar newline)) (try (string "`----")) blanklines @@ -329,7 +310,7 @@ emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') emailBlockQuote = try $ do emailBlockQuoteStart - raw <- sepBy (many (nonEndline <|> + raw <- sepBy (many (nonEndline <|> (try (endline >> notFollowedBy emailBlockQuoteStart >> return '\n')))) (try (newline >> emailBlockQuoteStart)) @@ -337,12 +318,12 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote = do +blockQuote = do raw <- emailBlockQuote <|> emacsBoxQuote -- parse the extracted block, which may contain various block elements: contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" return $ BlockQuote contents - + -- -- list blocks -- @@ -377,7 +358,7 @@ orderedListStart style delim = try $ do then do many1 digit char '.' return 1 - else orderedListMarker style delim + else orderedListMarker style delim if delim == Period && (style == UpperAlpha || (style == UpperRoman && num `elem` [1, 5, 10, 50, 100, 500, 1000])) then char '\t' <|> (spaceChar >> spaceChar) @@ -401,7 +382,7 @@ rawListItem start = try $ do blanks <- many blankline return $ concat result ++ blanks --- continuation of a list item - indented and separated by blankline +-- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations listContinuation start = try $ do @@ -417,7 +398,7 @@ listContinuationLine start = try $ do result <- manyTill anyChar newline return $ result ++ "\n" -listItem start = try $ do +listItem start = try $ do first <- rawListItem start continuations <- many (listContinuation start) -- parsing with ListItemState forces markers at beginning of lines to @@ -437,7 +418,7 @@ orderedList = try $ do items <- many1 (listItem (orderedListStart style delim)) return $ OrderedList (start, style, delim) $ compactify items -bulletList = many1 (listItem bulletListStart) >>= +bulletList = many1 (listItem bulletListStart) >>= return . BulletList . compactify -- definition lists @@ -478,7 +459,7 @@ definitionList = do -- paragraph block -- -para = try $ do +para = try $ do result <- many1 inline newline blanklines <|> do st <- getState @@ -487,9 +468,9 @@ para = try $ do else lookAhead emacsBoxQuote >> return "" return $ Para $ normalizeSpaces result -plain = many1 inline >>= return . Plain . normalizeSpaces +plain = many1 inline >>= return . Plain . normalizeSpaces --- +-- -- raw html -- @@ -506,25 +487,25 @@ htmlBlock = do else rawHtmlBlocks -- True if tag is self-closing -isSelfClosing tag = +isSelfClosing tag = isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag strictHtmlBlock = try $ do - tag <- anyHtmlBlockTag + tag <- anyHtmlBlockTag let tag' = extractTagType tag - if isSelfClosing tag || tag' == "hr" + if isSelfClosing tag || tag' == "hr" then return tag - else do contents <- many (notFollowedBy' (htmlEndTag tag') >> + else do contents <- many (notFollowedBy' (htmlEndTag tag') >> (htmlElement <|> (count 1 anyChar))) end <- htmlEndTag tag' return $ tag ++ concat contents ++ end rawHtmlBlocks = do - htmlBlocks <- many1 rawHtmlBlock + htmlBlocks <- many1 rawHtmlBlock let combined = concatMap (\(RawHtml str) -> str) htmlBlocks let combined' = if not (null combined) && last combined == '\n' - then init combined -- strip extra newline - else combined + then init combined -- strip extra newline + else combined return $ RawHtml combined' -- @@ -535,7 +516,7 @@ rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment -- -- Tables --- +-- -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. @@ -544,7 +525,7 @@ dashedLine ch = do sp <- many spaceChar return $ (length dashes, length $ dashes ++ sp) --- Parse a table header with dashed lines of '-' preceded by +-- Parse a table header with dashed lines of '-' preceded by -- one line of text. simpleTableHeader = try $ do rawContent <- anyLine @@ -567,7 +548,7 @@ tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ + return $ map removeLeadingTrailingSpace $ tail $ splitByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). @@ -584,8 +565,8 @@ multilineRow indices = do widthsFromIndices :: Int -- Number of columns on terminal -> [Int] -- Indices -> [Float] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = +widthsFromIndices _ [] = [] +widthsFromIndices numColumns indices = let lengths = zipWith (-) indices (0:indices) totLength = sum lengths quotient = if totLength > numColumns @@ -624,14 +605,14 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines multilineTable = tableWith multilineTableHeader multilineRow tableFooter multilineTableHeader = try $ do - tableSep + tableSep rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) initSp <- nonindentSpaces dashes <- many1 (dashedLine '-') newline let (lengths, lines) = unzip dashes let indices = scanl (+) (length initSp) lines - let rawHeadsList = transpose $ map + let rawHeadsList = transpose $ map (\ln -> tail $ splitByIndices (init indices) ln) rawContent let rawHeads = map (joinWithSep " ") rawHeadsList @@ -644,7 +625,7 @@ multilineTableHeader = try $ do alignType :: [String] -> Int -> Alignment alignType [] len = AlignDefault alignType strLst len = - let str = head $ sortBy (comparing length) $ + let str = head $ sortBy (comparing length) $ map removeTrailingSpace strLst leftSpace = if null str then False else (str !! 0) `elem` " \t" rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" @@ -656,7 +637,7 @@ alignType strLst len = table = failIfStrict >> (simpleTable <|> multilineTable) "table" --- +-- -- inline -- @@ -686,7 +667,7 @@ inline = choice [ str escapedChar = do char '\\' state <- getState - result <- option '\\' $ if stateStrict state + result <- option '\\' $ if stateStrict state then oneOf "\\`*_{}[]()>#+-.!~" else satisfy (not . isAlphaNum) return $ Str [result] @@ -700,17 +681,17 @@ ltSign = do specialCharsMinusLt = filter (/= '<') specialChars -symbol = do +symbol = do result <- oneOf specialCharsMinusLt return $ Str [result] -- parses inline code, between n `s and n `s -code = try $ do +code = try $ do starts <- many1 (char '`') skipSpaces result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> - (char '\n' >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> + (char '\n' >> return " ")) + (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) return $ Code $ removeLeadingTrailingSpace $ concat result @@ -726,30 +707,30 @@ math = try $ do return $ TeX ("$" ++ (joinWithSep " " words) ++ "$") emph = ((enclosed (char '*') (char '*') inline) <|> - (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= + (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= return . Emph . normalizeSpaces -strong = ((enclosed (string "**") (try $ string "**") inline) <|> +strong = ((enclosed (string "**") (try $ string "**") inline) <|> (enclosed (string "__") (try $ string "__") inline)) >>= return . Strong . normalizeSpaces strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= return . Strikeout . normalizeSpaces -superscript = failIfStrict >> enclosed (char '^') (char '^') +superscript = failIfStrict >> enclosed (char '^') (char '^') (notFollowedBy' whitespace >> inline) >>= -- may not contain Space return . Superscript subscript = failIfStrict >> enclosed (char '~') (char '~') (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Subscript + return . Subscript -smartPunctuation = failUnlessSmart >> +smartPunctuation = failUnlessSmart >> choice [ quoted, apostrophe, dash, ellipses ] apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe -quoted = doubleQuoted <|> singleQuoted +quoted = doubleQuoted <|> singleQuoted withQuoteContext context parser = do oldState <- getState @@ -765,7 +746,7 @@ singleQuoted = try $ do withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= return . Quoted SingleQuote . normalizeSpaces -doubleQuoted = try $ do +doubleQuoted = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= return . Quoted DoubleQuote . normalizeSpaces @@ -776,13 +757,13 @@ failIfInQuoteContext context = do then fail "already inside quotes" else return () -singleQuoteStart = do +singleQuoteStart = do failIfInQuoteContext InSingleQuote - char '\8216' <|> - (try $ do char '\'' + char '\8216' <|> + (try $ do char '\'' notFollowedBy (oneOf ")!],.;:-? \t\n") notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) + satisfy (not . isAlphaNum))) -- possess/contraction return '\'') @@ -826,13 +807,13 @@ endline = try $ do newline notFollowedBy blankline st <- getState - if stateStrict st + if stateStrict st then do notFollowedBy emailBlockQuoteStart notFollowedBy (char '#') -- atx header - else return () + else return () -- parse potential list-starts differently if in a list: if stateParserContext st == ListItemState - then notFollowedBy' (bulletListStart <|> + then notFollowedBy' (bulletListStart <|> (anyOrderedListStart >> return ())) else return () return Space @@ -846,7 +827,7 @@ reference = notFollowedBy' (string "[^") >> -- footnote reference inlinesInBalanced "[" "]" >>= (return . normalizeSpaces) -- source for a link, with optional title -source = try $ do +source = try $ do char '(' optional (char '<') src <- many (noneOf ")> \t\n") @@ -856,7 +837,7 @@ source = try $ do char ')' return (removeTrailingSpace src, tit) -linkTitle = try $ do +linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces delim <- char '\'' <|> char '"' @@ -871,13 +852,13 @@ link = try $ do -- a link like [this][ref] or [this][] or [this] referenceLink label = do - ref <- option [] (try (optional (char ' ') >> + ref <- option [] (try (optional (char ' ') >> optional (newline >> skipSpaces) >> reference)) let ref' = if null ref then label else ref state <- getState case lookupKeySrc (stateKeys state) ref' of - Nothing -> fail "no corresponding key" - Just target -> return target + Nothing -> fail "no corresponding key" + Just target -> return target emailAddress = try $ do name <- many1 (alphaNum <|> char '+') @@ -898,7 +879,7 @@ autoLink = try $ do char '>' let src' = if "mailto:" `isPrefixOf` src then drop 7 src - else src + else src st <- getState return $ if stateStrict st then Link [Str src'] (src, "") @@ -929,7 +910,7 @@ rawLaTeXInline' = failIfStrict >> rawLaTeXInline rawHtmlInline' = do st <- getState result <- choice $ if stateStrict st - then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] + then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] else [htmlBlockElement, anyHtmlInlineTag] return $ HtmlInline result diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 5fb1306af..626679025 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -34,7 +34,6 @@ module Text.Pandoc.Shared ( substitute, joinWithSep, -- * Text processing - isPunctuation, backslashEscapes, escapeStringUsing, stripTrailingNewlines, @@ -92,7 +91,6 @@ module Text.Pandoc.Shared ( Element (..), hierarchicalize, isHeaderBlock, - uniqueIdentifiers, -- * Writer options WriterOptions (..), defaultWriterOptions @@ -104,7 +102,7 @@ import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty ) import qualified Text.PrettyPrint.HughesPJ as PP import Text.Pandoc.CharacterReferences ( characterReference ) import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) -import Data.List ( find, isPrefixOf, intersperse ) +import Data.List ( find, isPrefixOf ) import Control.Monad ( join ) -- @@ -146,15 +144,6 @@ joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst -- Text processing -- --- | True if character is a punctuation character (unicode). -isPunctuation :: Char -> Bool -isPunctuation c = - let c' = ord c - in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || - c' >= 0xE000 && c' <= 0xE0FF - then True - else False - -- | Returns an association list of backslash escapes for the -- designated characters. backslashEscapes :: [Char] -- ^ list of special characters to escape @@ -580,8 +569,7 @@ data ParserState = ParserState stateStrict :: Bool, -- ^ Use strict markdown syntax? stateSmart :: Bool, -- ^ Use smart typography? stateColumns :: Int, -- ^ Number of columns in terminal - stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateHeaders :: [[Inline]] -- ^ List of header texts used + stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used } deriving Show @@ -600,8 +588,7 @@ defaultParserState = stateStrict = False, stateSmart = False, stateColumns = 80, - stateHeaderTable = [], - stateHeaders = [] } + stateHeaderTable = [] } data HeaderType = SingleHeader Char -- ^ Single line of characters underneath @@ -803,43 +790,6 @@ isHeaderBlock :: Block -> Bool isHeaderBlock (Header _ _) = True isHeaderBlock _ = False --- | Convert Pandoc inline list to plain text identifier. -inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier [] = "" -inlineListToIdentifier (x:xs) = - xAsText ++ inlineListToIdentifier xs - where xAsText = case x of - Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ - concat $ intersperse "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier lst - Strikeout lst -> inlineListToIdentifier lst - Superscript lst -> inlineListToIdentifier lst - Subscript lst -> inlineListToIdentifier lst - Strong lst -> inlineListToIdentifier lst - Quoted _ lst -> inlineListToIdentifier lst - Code s -> s - Space -> "-" - EmDash -> "-" - EnDash -> "-" - Apostrophe -> "" - Ellipses -> "" - LineBreak -> "-" - TeX _ -> "" - HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier lst - Image lst _ -> inlineListToIdentifier lst - Note _ -> "" - --- | Return unique identifiers for list of inline lists. -uniqueIdentifiers :: [[Inline]] -> [String] -uniqueIdentifiers ls = - let addIdentifier (nonuniqueIds, uniqueIds) l = - let new = inlineListToIdentifier l - matches = length $ filter (== new) nonuniqueIds - new' = new ++ if matches > 0 then ("-" ++ show matches) else "" - in (new:nonuniqueIds, new':uniqueIds) - in reverse $ snd $ foldl addIdentifier ([],[]) ls - -- -- Writer options -- diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 56ca5ca48..7ec95d8ef 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -193,6 +193,15 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . decodeCharacterReferences +-- | True if character is a punctuation character (unicode). +isPunctuation :: Char -> Bool +isPunctuation c = + let c' = ord c + in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || + c' >= 0xE000 && c' <= 0xE0FF + then True + else False + -- | Add CSS for document header. addToCSS :: String -> State WriterState () addToCSS item = do @@ -200,6 +209,43 @@ addToCSS item = do let current = stCSS st put $ st {stCSS = S.insert item current} +-- | Convert Pandoc inline list to plain text identifier. +inlineListToIdentifier :: [Inline] -> String +inlineListToIdentifier [] = "" +inlineListToIdentifier (x:xs) = + xAsText ++ inlineListToIdentifier xs + where xAsText = case x of + Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ + concat $ intersperse "-" $ words $ map toLower s + Emph lst -> inlineListToIdentifier lst + Strikeout lst -> inlineListToIdentifier lst + Superscript lst -> inlineListToIdentifier lst + Subscript lst -> inlineListToIdentifier lst + Strong lst -> inlineListToIdentifier lst + Quoted _ lst -> inlineListToIdentifier lst + Code s -> s + Space -> "-" + EmDash -> "-" + EnDash -> "-" + Apostrophe -> "" + Ellipses -> "" + LineBreak -> "-" + TeX _ -> "" + HtmlInline _ -> "" + Link lst _ -> inlineListToIdentifier lst + Image lst _ -> inlineListToIdentifier lst + Note _ -> "" + +-- | Return unique identifiers for list of inline lists. +uniqueIdentifiers :: [[Inline]] -> [String] +uniqueIdentifiers ls = + let addIdentifier (nonuniqueIds, uniqueIds) l = + let new = inlineListToIdentifier l + matches = length $ filter (== new) nonuniqueIds + new' = new ++ if matches > 0 then ("-" ++ show matches) else "" + in (new:nonuniqueIds, new':uniqueIds) + in reverse $ snd $ foldl addIdentifier ([],[]) ls + -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html blockToHtml opts Null = return $ noHtml -- cgit v1.2.3