diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-07-08 17:33:03 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-07-08 17:33:03 +0000 |
commit | 2d4a22d0be7e2da288c4af43250d6d3c607ccf8b (patch) | |
tree | 38ee9a5f375338c7f06a82adfbc05d07cf827371 /src | |
parent | 497a1b781584963f0c1cb442ef79bdcb0e5258d2 (diff) | |
download | pandoc-2d4a22d0be7e2da288c4af43250d6d3c607ccf8b.tar.gz |
Regularized the scheme for unique header identifiers in HTML writer:
- punctuation is now all removed (except -)
- spaces are turned into -
- all lowercase
This scheme should be fairly predictable.
Updated tests accordingly.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@655 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 54d120879..c3ed92f5c 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Entities (decodeEntities) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, partition ) +import Data.List ( isPrefixOf, partition, intersperse ) import Control.Monad.State import Text.XHtml.Strict @@ -169,22 +169,32 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = (concatMap obfuscateChar) . decodeEntities +-- | True if character is a punctuation character (unicode). +isPunctuation :: Char -> Bool +isPunctuation c = + let c' = ord c in + if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) || + (c' >= 0xE000 && c' <= 0xE0FF) + then True + else False + -- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: [Inline] -> String inlineListToIdentifier [] = "" inlineListToIdentifier (x:xs) = xAsText ++ inlineListToIdentifier xs where xAsText = case x of - Str s -> map toLower s + Str s -> filter (\c -> (c == '-') || not (isPunctuation c)) $ + concat $ intersperse "-" $ words $ map toLower s Emph lst -> inlineListToIdentifier lst Strong lst -> inlineListToIdentifier lst Quoted _ lst -> inlineListToIdentifier lst Code s -> s Space -> "-" - EmDash -> "--" + EmDash -> "-" EnDash -> "-" Apostrophe -> "" - Ellipses -> "..." + Ellipses -> "" LineBreak -> "-" TeX _ -> "" HtmlInline _ -> "" |