aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-12-21 19:25:54 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-12-21 19:25:54 +0000
commit48f2cc5600bd26c60ffa1d5531ba2d9aeead129d (patch)
tree029a2cc36b8f3200c4f2752feecae657adec225e
parent0681d1d3e7ccf0cddfd452957c2d792df014064b (diff)
downloadpandoc-48f2cc5600bd26c60ffa1d5531ba2d9aeead129d.tar.gz
Modified rules for HTML header identifiers to ensure legal identifiers.
+ Modified htmlListToIdentifier and uniqueIdentifier in HTML writer to ensure that identifiers begin with an alphabetic character. + The new rules are described in README. + Resolves Issue #33. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1150 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--README19
-rw-r--r--Text/Pandoc/Writers/HTML.hs29
2 files changed, 29 insertions, 19 deletions
diff --git a/README b/README
index a8fd9e673..cd6b30cda 100644
--- a/README
+++ b/README
@@ -790,12 +790,19 @@ derive the identifier from the header text,
- Remove all punctuation, except dashes and hyphens.
- Replace all spaces, dashes, newlines, and hyphens with hyphens.
- Convert all alphabetic characters to lowercase.
-
-Thus, for example, a heading 'Header identifiers in HTML' will get
-the identifier `header-identifiers-in-html`, a heading
-'*Dogs*?--in *my* house?' will get the identifier `dogs--in-my-house`,
-and a heading '[HTML], [S5], or [RTF]?' will get the identifier
-`html-s5-or-rtf`.
+ - Remove everything up to the first letter (identifiers may
+ not begin with a number or punctuation mark).
+ - If nothing is left after this, use the identifier `section`.
+
+Thus, for example,
+
+ Header Identifier
+ ------------------------------------- ---------------------------
+ Header identifiers in HTML `header-identifiers-in-html`
+ *Dogs*?--in *my* house? `dogs--in-my-house`
+ [HTML], [S5], or [RTF]? `html-s5-or-rtf`
+ 3. Applications `applications`
+ 33 `section`
These rules should, in most cases, allow one to determine the identifier
from the header text. The exception is when several headers have the
diff --git a/Text/Pandoc/Writers/HTML.hs b/Text/Pandoc/Writers/HTML.hs
index 660bf652e..0061420d0 100644
--- a/Text/Pandoc/Writers/HTML.hs
+++ b/Text/Pandoc/Writers/HTML.hs
@@ -35,7 +35,7 @@ import Text.Pandoc.Shared
import Text.Pandoc.Readers.TeXMath
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
-import Data.Char ( ord, toLower )
+import Data.Char ( ord, toLower, isAlpha )
import Data.List ( isPrefixOf, intersperse )
import qualified Data.Set as S
import Control.Monad.State
@@ -215,18 +215,20 @@ addToCSS item = do
-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier [] = ""
-inlineListToIdentifier (x:xs) =
- xAsText ++ inlineListToIdentifier xs
+inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
+
+inlineListToIdentifier' [] = ""
+inlineListToIdentifier' (x:xs) =
+ xAsText ++ inlineListToIdentifier' xs
where xAsText = case x of
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
concat $ intersperse "-" $ words $ map toLower s
- Emph lst -> inlineListToIdentifier lst
- Strikeout lst -> inlineListToIdentifier lst
- Superscript lst -> inlineListToIdentifier lst
- Subscript lst -> inlineListToIdentifier lst
- Strong lst -> inlineListToIdentifier lst
- Quoted _ lst -> inlineListToIdentifier lst
+ Emph lst -> inlineListToIdentifier' lst
+ Strikeout lst -> inlineListToIdentifier' lst
+ Superscript lst -> inlineListToIdentifier' lst
+ Subscript lst -> inlineListToIdentifier' lst
+ Strong lst -> inlineListToIdentifier' lst
+ Quoted _ lst -> inlineListToIdentifier' lst
Code s -> s
Space -> "-"
EmDash -> "-"
@@ -237,8 +239,8 @@ inlineListToIdentifier (x:xs) =
Math _ -> ""
TeX _ -> ""
HtmlInline _ -> ""
- Link lst _ -> inlineListToIdentifier lst
- Image lst _ -> inlineListToIdentifier lst
+ Link lst _ -> inlineListToIdentifier' lst
+ Image lst _ -> inlineListToIdentifier' lst
Note _ -> ""
-- | Return unique identifiers for list of inline lists.
@@ -247,7 +249,8 @@ uniqueIdentifiers ls =
let addIdentifier (nonuniqueIds, uniqueIds) l =
let new = inlineListToIdentifier l
matches = length $ filter (== new) nonuniqueIds
- new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
+ new' = (if null new then "section" else new) ++
+ if matches > 0 then ("-" ++ show matches) else ""
in (new:nonuniqueIds, new':uniqueIds)
in reverse $ snd $ foldl addIdentifier ([],[]) ls