From 2d4a22d0be7e2da288c4af43250d6d3c607ccf8b Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sun, 8 Jul 2007 17:33:03 +0000 Subject: Regularized the scheme for unique header identifiers in HTML writer: - punctuation is now all removed (except -) - spaces are turned into - - all lowercase This scheme should be fairly predictable. Updated tests accordingly. git-svn-id: https://pandoc.googlecode.com/svn/trunk@655 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Writers/HTML.hs | 18 ++++++++++++++---- tests/s5.basic.html | 6 +++--- tests/s5.fancy.html | 6 +++--- tests/writer.html | 2 +- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 54d120879..c3ed92f5c 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Entities (decodeEntities) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, partition ) +import Data.List ( isPrefixOf, partition, intersperse ) import Control.Monad.State import Text.XHtml.Strict @@ -169,22 +169,32 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = (concatMap obfuscateChar) . decodeEntities +-- | True if character is a punctuation character (unicode). +isPunctuation :: Char -> Bool +isPunctuation c = + let c' = ord c in + if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) || + (c' >= 0xE000 && c' <= 0xE0FF) + then True + else False + -- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: [Inline] -> String inlineListToIdentifier [] = "" inlineListToIdentifier (x:xs) = xAsText ++ inlineListToIdentifier xs where xAsText = case x of - Str s -> map toLower s + Str s -> filter (\c -> (c == '-') || not (isPunctuation c)) $ + concat $ intersperse "-" $ words $ map toLower s Emph lst -> inlineListToIdentifier lst Strong lst -> inlineListToIdentifier lst Quoted _ lst -> inlineListToIdentifier lst Code s -> s Space -> "-" - EmDash -> "--" + EmDash -> "-" EnDash -> "-" Apostrophe -> "" - Ellipses -> "..." + Ellipses -> "" LineBreak -> "-" TeX _ -> "" HtmlInline _ -> "" diff --git a/tests/s5.basic.html b/tests/s5.basic.html index f0dde094e..c6b0d3d6e 100644 --- a/tests/s5.basic.html +++ b/tests/s5.basic.html @@ -749,7 +749,7 @@ window.onresize = function(){setTimeout('fontScale()', 50);}