diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-12-21 19:25:54 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-12-21 19:25:54 +0000 |
commit | 48f2cc5600bd26c60ffa1d5531ba2d9aeead129d (patch) | |
tree | 029a2cc36b8f3200c4f2752feecae657adec225e | |
parent | 0681d1d3e7ccf0cddfd452957c2d792df014064b (diff) | |
download | pandoc-48f2cc5600bd26c60ffa1d5531ba2d9aeead129d.tar.gz |
Modified rules for HTML header identifiers to ensure legal identifiers.
+ Modified htmlListToIdentifier and uniqueIdentifier in HTML writer
to ensure that identifiers begin with an alphabetic character.
+ The new rules are described in README.
+ Resolves Issue #33.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1150 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r-- | README | 19 | ||||
-rw-r--r-- | Text/Pandoc/Writers/HTML.hs | 29 |
2 files changed, 29 insertions, 19 deletions
@@ -790,12 +790,19 @@ derive the identifier from the header text, - Remove all punctuation, except dashes and hyphens. - Replace all spaces, dashes, newlines, and hyphens with hyphens. - Convert all alphabetic characters to lowercase. - -Thus, for example, a heading 'Header identifiers in HTML' will get -the identifier `header-identifiers-in-html`, a heading -'*Dogs*?--in *my* house?' will get the identifier `dogs--in-my-house`, -and a heading '[HTML], [S5], or [RTF]?' will get the identifier -`html-s5-or-rtf`. + - Remove everything up to the first letter (identifiers may + not begin with a number or punctuation mark). + - If nothing is left after this, use the identifier `section`. + +Thus, for example, + + Header Identifier + ------------------------------------- --------------------------- + Header identifiers in HTML `header-identifiers-in-html` + *Dogs*?--in *my* house? `dogs--in-my-house` + [HTML], [S5], or [RTF]? `html-s5-or-rtf` + 3. Applications `applications` + 33 `section` These rules should, in most cases, allow one to determine the identifier from the header text. The exception is when several headers have the diff --git a/Text/Pandoc/Writers/HTML.hs b/Text/Pandoc/Writers/HTML.hs index 660bf652e..0061420d0 100644 --- a/Text/Pandoc/Writers/HTML.hs +++ b/Text/Pandoc/Writers/HTML.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Readers.TeXMath import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) -import Data.Char ( ord, toLower ) +import Data.Char ( ord, toLower, isAlpha ) import Data.List ( isPrefixOf, intersperse ) import qualified Data.Set as S import Control.Monad.State @@ -215,18 +215,20 @@ addToCSS item = do -- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier [] = "" -inlineListToIdentifier (x:xs) = - xAsText ++ inlineListToIdentifier xs +inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier' + +inlineListToIdentifier' [] = "" +inlineListToIdentifier' (x:xs) = + xAsText ++ inlineListToIdentifier' xs where xAsText = case x of Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ concat $ intersperse "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier lst - Strikeout lst -> inlineListToIdentifier lst - Superscript lst -> inlineListToIdentifier lst - Subscript lst -> inlineListToIdentifier lst - Strong lst -> inlineListToIdentifier lst - Quoted _ lst -> inlineListToIdentifier lst + Emph lst -> inlineListToIdentifier' lst + Strikeout lst -> inlineListToIdentifier' lst + Superscript lst -> inlineListToIdentifier' lst + Subscript lst -> inlineListToIdentifier' lst + Strong lst -> inlineListToIdentifier' lst + Quoted _ lst -> inlineListToIdentifier' lst Code s -> s Space -> "-" EmDash -> "-" @@ -237,8 +239,8 @@ inlineListToIdentifier (x:xs) = Math _ -> "" TeX _ -> "" HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier lst - Image lst _ -> inlineListToIdentifier lst + Link lst _ -> inlineListToIdentifier' lst + Image lst _ -> inlineListToIdentifier' lst Note _ -> "" -- | Return unique identifiers for list of inline lists. @@ -247,7 +249,8 @@ uniqueIdentifiers ls = let addIdentifier (nonuniqueIds, uniqueIds) l = let new = inlineListToIdentifier l matches = length $ filter (== new) nonuniqueIds - new' = new ++ if matches > 0 then ("-" ++ show matches) else "" + new' = (if null new then "section" else new) ++ + if matches > 0 then ("-" ++ show matches) else "" in (new:nonuniqueIds, new':uniqueIds) in reverse $ snd $ foldl addIdentifier ([],[]) ls |