aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-07-08 17:33:03 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-07-08 17:33:03 +0000
commit2d4a22d0be7e2da288c4af43250d6d3c607ccf8b (patch)
tree38ee9a5f375338c7f06a82adfbc05d07cf827371 /src
parent497a1b781584963f0c1cb442ef79bdcb0e5258d2 (diff)
downloadpandoc-2d4a22d0be7e2da288c4af43250d6d3c607ccf8b.tar.gz
Regularized the scheme for unique header identifiers in HTML writer:
- punctuation is now all removed (except -) - spaces are turned into - - all lowercase This scheme should be fairly predictable. Updated tests accordingly. git-svn-id: https://pandoc.googlecode.com/svn/trunk@655 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs18
1 files changed, 14 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 54d120879..c3ed92f5c 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -34,7 +34,7 @@ import Text.Pandoc.Entities (decodeEntities)
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
-import Data.List ( isPrefixOf, partition )
+import Data.List ( isPrefixOf, partition, intersperse )
import Control.Monad.State
import Text.XHtml.Strict
@@ -169,22 +169,32 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = (concatMap obfuscateChar) . decodeEntities
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+ let c' = ord c in
+ if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) ||
+ (c' >= 0xE000 && c' <= 0xE0FF)
+ then True
+ else False
+
-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier [] = ""
inlineListToIdentifier (x:xs) =
xAsText ++ inlineListToIdentifier xs
where xAsText = case x of
- Str s -> map toLower s
+ Str s -> filter (\c -> (c == '-') || not (isPunctuation c)) $
+ concat $ intersperse "-" $ words $ map toLower s
Emph lst -> inlineListToIdentifier lst
Strong lst -> inlineListToIdentifier lst
Quoted _ lst -> inlineListToIdentifier lst
Code s -> s
Space -> "-"
- EmDash -> "--"
+ EmDash -> "-"
EnDash -> "-"
Apostrophe -> ""
- Ellipses -> "..."
+ Ellipses -> ""
LineBreak -> "-"
TeX _ -> ""
HtmlInline _ -> ""