aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-07-08 17:33:03 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-07-08 17:33:03 +0000
commit2d4a22d0be7e2da288c4af43250d6d3c607ccf8b (patch)
tree38ee9a5f375338c7f06a82adfbc05d07cf827371
parent497a1b781584963f0c1cb442ef79bdcb0e5258d2 (diff)
downloadpandoc-2d4a22d0be7e2da288c4af43250d6d3c607ccf8b.tar.gz
Regularized the scheme for unique header identifiers in HTML writer:
- punctuation is now all removed (except -) - spaces are turned into - - all lowercase This scheme should be fairly predictable. Updated tests accordingly. git-svn-id: https://pandoc.googlecode.com/svn/trunk@655 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs18
-rw-r--r--tests/s5.basic.html6
-rw-r--r--tests/s5.fancy.html6
-rw-r--r--tests/writer.html2
4 files changed, 21 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 54d120879..c3ed92f5c 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -34,7 +34,7 @@ import Text.Pandoc.Entities (decodeEntities)
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
-import Data.List ( isPrefixOf, partition )
+import Data.List ( isPrefixOf, partition, intersperse )
import Control.Monad.State
import Text.XHtml.Strict
@@ -169,22 +169,32 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = (concatMap obfuscateChar) . decodeEntities
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+ let c' = ord c in
+ if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) ||
+ (c' >= 0xE000 && c' <= 0xE0FF)
+ then True
+ else False
+
-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier [] = ""
inlineListToIdentifier (x:xs) =
xAsText ++ inlineListToIdentifier xs
where xAsText = case x of
- Str s -> map toLower s
+ Str s -> filter (\c -> (c == '-') || not (isPunctuation c)) $
+ concat $ intersperse "-" $ words $ map toLower s
Emph lst -> inlineListToIdentifier lst
Strong lst -> inlineListToIdentifier lst
Quoted _ lst -> inlineListToIdentifier lst
Code s -> s
Space -> "-"
- EmDash -> "--"
+ EmDash -> "-"
EnDash -> "-"
Apostrophe -> ""
- Ellipses -> "..."
+ Ellipses -> ""
LineBreak -> "-"
TeX _ -> ""
HtmlInline _ -> ""
diff --git a/tests/s5.basic.html b/tests/s5.basic.html
index f0dde094e..c6b0d3d6e 100644
--- a/tests/s5.basic.html
+++ b/tests/s5.basic.html
@@ -749,7 +749,7 @@ window.onresize = function(){setTimeout('fontScale()', 50);}</script>
<div id="currentSlide"></div>
<div id="header"></div>
<div id="footer">
-<h1 id="july 15, 2006"
+<h1 id="july-15-2006"
>July 15, 2006</h1
><h2 id="my-s5-document"
>My S5 Document</h2
@@ -760,9 +760,9 @@ window.onresize = function(){setTimeout('fontScale()', 50);}</script>
<div class="slide">
<h1 id="my-s5-document-1"
>My S5 Document</h1
- ><h3 id="sam smith, jen jones"
+ ><h3 id="sam-smith-jen-jones"
>Sam Smith, Jen Jones</h3
- ><h4 id="july 15, 2006-1"
+ ><h4 id="july-15-2006-1"
>July 15, 2006</h4
></div>
<div class="slide">
diff --git a/tests/s5.fancy.html b/tests/s5.fancy.html
index d3f445d22..3d84b2a14 100644
--- a/tests/s5.fancy.html
+++ b/tests/s5.fancy.html
@@ -1661,7 +1661,7 @@ else
<div id="currentSlide"></div>
<div id="header"></div>
<div id="footer">
-<h1 id="july 15, 2006"
+<h1 id="july-15-2006"
>July 15, 2006</h1
><h2 id="my-s5-document"
>My S5 Document</h2
@@ -1672,9 +1672,9 @@ else
<div class="slide">
<h1 id="my-s5-document-1"
>My S5 Document</h1
- ><h3 id="sam smith, jen jones"
+ ><h3 id="sam-smith-jen-jones"
>Sam Smith, Jen Jones</h3
- ><h4 id="july 15, 2006-1"
+ ><h4 id="july-15-2006-1"
>July 15, 2006</h4
></div>
<div class="slide">
diff --git a/tests/writer.html b/tests/writer.html
index a77196578..7e6ebd103 100644
--- a/tests/writer.html
+++ b/tests/writer.html
@@ -636,7 +636,7 @@ Blah
>&lt;html&gt;</code
>.</p
><hr
- /><h1 id="smart-quotes,-ellipses,-dashes"
+ /><h1 id="smart-quotes-ellipses-dashes"
>Smart quotes, ellipses, dashes</h1
><p
>&ldquo;Hello,&rdquo; said the spider. &ldquo;&lsquo;Shelob&rsquo; is my name.&rdquo;</p