Regularized the scheme for unique header identifiers in HTML writer:

- punctuation is now all removed (except -) - spaces are turned into - - all lowercase This scheme should be fairly predictable. Updated tests accordingly. git-svn-id: https://pandoc.googlecode.com/svn/trunk@655 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-07-08 17:33:03 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-07-08 17:33:03 +0000
commit: 2d4a22d0be7e2da288c4af43250d6d3c607ccf8b (patch)
tree: 38ee9a5f375338c7f06a82adfbc05d07cf827371 /src
parent: 497a1b781584963f0c1cb442ef79bdcb0e5258d2 (diff)
download: pandoc-2d4a22d0be7e2da288c4af43250d6d3c607ccf8b.tar.gz
1 files changed, 14 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 54d120879..c3ed92f5c 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -34,7 +34,7 @@ import Text.Pandoc.Entities (decodeEntities)
 import Text.Regex ( mkRegex, matchRegex )
 import Numeric ( showHex )
 import Data.Char ( ord, toLower )
-import Data.List ( isPrefixOf, partition )
+import Data.List ( isPrefixOf, partition, intersperse )
 import Control.Monad.State
 import Text.XHtml.Strict
 
@@ -169,22 +169,32 @@ obfuscateChar char =
 obfuscateString :: String -> String
 obfuscateString = (concatMap obfuscateChar) . decodeEntities
 
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+  let c' = ord c in
+  if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) ||
+     (c' >= 0xE000 && c' <= 0xE0FF)
+     then True
+     else False
+
 -- | Convert Pandoc inline list to plain text identifier.
 inlineListToIdentifier :: [Inline] -> String
 inlineListToIdentifier [] = ""
 inlineListToIdentifier (x:xs) = 
   xAsText ++ inlineListToIdentifier xs
   where xAsText = case x of
-                       Str s        -> map toLower s
+                       Str s        -> filter (\c -> (c == '-') || not (isPunctuation c)) $
+                                       concat $ intersperse "-" $ words $ map toLower s
                        Emph lst     -> inlineListToIdentifier lst
                        Strong lst   -> inlineListToIdentifier lst
                        Quoted _ lst -> inlineListToIdentifier lst
                        Code s       -> s
                        Space        -> "-"
-                       EmDash       -> "--"
+                       EmDash       -> "-"
                        EnDash       -> "-"
                        Apostrophe   -> ""
-                       Ellipses     -> "..."
+                       Ellipses     -> ""
                        LineBreak    -> "-"
                        TeX _        -> ""
                        HtmlInline _ -> ""
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-07-08 17:33:03 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-07-08 17:33:03 +0000
commit	2d4a22d0be7e2da288c4af43250d6d3c607ccf8b (patch)
tree	38ee9a5f375338c7f06a82adfbc05d07cf827371 /src
parent	497a1b781584963f0c1cb442ef79bdcb0e5258d2 (diff)
download	pandoc-2d4a22d0be7e2da288c4af43250d6d3c607ccf8b.tar.gz