diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-07-08 17:33:03 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-07-08 17:33:03 +0000 |
commit | 2d4a22d0be7e2da288c4af43250d6d3c607ccf8b (patch) | |
tree | 38ee9a5f375338c7f06a82adfbc05d07cf827371 | |
parent | 497a1b781584963f0c1cb442ef79bdcb0e5258d2 (diff) | |
download | pandoc-2d4a22d0be7e2da288c4af43250d6d3c607ccf8b.tar.gz |
Regularized the scheme for unique header identifiers in HTML writer:
- punctuation is now all removed (except -)
- spaces are turned into -
- all lowercase
This scheme should be fairly predictable.
Updated tests accordingly.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@655 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 18 | ||||
-rw-r--r-- | tests/s5.basic.html | 6 | ||||
-rw-r--r-- | tests/s5.fancy.html | 6 | ||||
-rw-r--r-- | tests/writer.html | 2 |
4 files changed, 21 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 54d120879..c3ed92f5c 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Entities (decodeEntities) import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, partition ) +import Data.List ( isPrefixOf, partition, intersperse ) import Control.Monad.State import Text.XHtml.Strict @@ -169,22 +169,32 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = (concatMap obfuscateChar) . decodeEntities +-- | True if character is a punctuation character (unicode). +isPunctuation :: Char -> Bool +isPunctuation c = + let c' = ord c in + if (c `elem` "!\"'()*,-./:;<>?[\\]`{|}~") || (c' >= 0x2000 && c' <= 0x206F) || + (c' >= 0xE000 && c' <= 0xE0FF) + then True + else False + -- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: [Inline] -> String inlineListToIdentifier [] = "" inlineListToIdentifier (x:xs) = xAsText ++ inlineListToIdentifier xs where xAsText = case x of - Str s -> map toLower s + Str s -> filter (\c -> (c == '-') || not (isPunctuation c)) $ + concat $ intersperse "-" $ words $ map toLower s Emph lst -> inlineListToIdentifier lst Strong lst -> inlineListToIdentifier lst Quoted _ lst -> inlineListToIdentifier lst Code s -> s Space -> "-" - EmDash -> "--" + EmDash -> "-" EnDash -> "-" Apostrophe -> "" - Ellipses -> "..." + Ellipses -> "" LineBreak -> "-" TeX _ -> "" HtmlInline _ -> "" diff --git a/tests/s5.basic.html b/tests/s5.basic.html index f0dde094e..c6b0d3d6e 100644 --- a/tests/s5.basic.html +++ b/tests/s5.basic.html @@ -749,7 +749,7 @@ window.onresize = function(){setTimeout('fontScale()', 50);}</script> <div id="currentSlide"></div> <div id="header"></div> <div id="footer"> -<h1 id="july 15, 2006" +<h1 id="july-15-2006" >July 15, 2006</h1 ><h2 id="my-s5-document" >My S5 Document</h2 @@ -760,9 +760,9 @@ window.onresize = function(){setTimeout('fontScale()', 50);}</script> <div class="slide"> <h1 id="my-s5-document-1" >My S5 Document</h1 - ><h3 id="sam smith, jen jones" + ><h3 id="sam-smith-jen-jones" >Sam Smith, Jen Jones</h3 - ><h4 id="july 15, 2006-1" + ><h4 id="july-15-2006-1" >July 15, 2006</h4 ></div> <div class="slide"> diff --git a/tests/s5.fancy.html b/tests/s5.fancy.html index d3f445d22..3d84b2a14 100644 --- a/tests/s5.fancy.html +++ b/tests/s5.fancy.html @@ -1661,7 +1661,7 @@ else <div id="currentSlide"></div> <div id="header"></div> <div id="footer"> -<h1 id="july 15, 2006" +<h1 id="july-15-2006" >July 15, 2006</h1 ><h2 id="my-s5-document" >My S5 Document</h2 @@ -1672,9 +1672,9 @@ else <div class="slide"> <h1 id="my-s5-document-1" >My S5 Document</h1 - ><h3 id="sam smith, jen jones" + ><h3 id="sam-smith-jen-jones" >Sam Smith, Jen Jones</h3 - ><h4 id="july 15, 2006-1" + ><h4 id="july-15-2006-1" >July 15, 2006</h4 ></div> <div class="slide"> diff --git a/tests/writer.html b/tests/writer.html index a77196578..7e6ebd103 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -636,7 +636,7 @@ Blah ><html></code >.</p ><hr - /><h1 id="smart-quotes,-ellipses,-dashes" + /><h1 id="smart-quotes-ellipses-dashes" >Smart quotes, ellipses, dashes</h1 ><p >“Hello,” said the spider. “‘Shelob’ is my name.”</p |