aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Shared.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2010-03-28 22:29:31 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2010-03-28 22:29:31 -0700
commitc5c4e19c99003e64665d244bb40ffa0eff93d269 (patch)
treea6c4644667a1af55caa2ceb3ac65cfdbbafb3476 /src/Text/Pandoc/Shared.hs
parentbe832b3676f01873e38bfd380a1d03f3c06c6d1e (diff)
downloadpandoc-c5c4e19c99003e64665d244bb40ffa0eff93d269.tar.gz
Shared: Fixed uniqueIdent so it behaves as described in README.
Previously some characters that are illegal in HTML identifiers, such as '<', were being allowed in header identifiers. The logic has now been fixed. Thanks to Xyne for reporting.
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r--src/Text/Pandoc/Shared.hs67
1 files changed, 34 insertions, 33 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 54d3f9a43..26aff4250 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -117,7 +117,7 @@ import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text,
import qualified Text.PrettyPrint.HughesPJ as PP
import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, isAscii,
- isPunctuation )
+ isLetter, isDigit )
import Data.List ( find, isPrefixOf, intercalate )
import Network.URI ( parseURI, URI (..), isAllowedInURI, escapeURIString, unEscapeString )
import Codec.Binary.UTF8.String ( encodeString, decodeString )
@@ -914,38 +914,37 @@ data Element = Blk Block
-- lvl num ident label contents
deriving (Eq, Read, Show, Typeable, Data)
--- | Convert Pandoc inline list to plain text identifier.
+-- | Convert Pandoc inline list to plain text identifier. HTML
+-- identifiers must start with a letter, and may contain only
+-- letters, digits, and the characters _-:.
inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
-
-inlineListToIdentifier' :: [Inline] -> [Char]
-inlineListToIdentifier' [] = ""
-inlineListToIdentifier' (x:xs) =
- xAsText ++ inlineListToIdentifier' xs
- where xAsText = case x of
- Str s -> filter (\c -> c `elem` "_-." || not (isPunctuation c)) $
- intercalate "-" $ words $ map toLower s
- Emph lst -> inlineListToIdentifier' lst
- Strikeout lst -> inlineListToIdentifier' lst
- Superscript lst -> inlineListToIdentifier' lst
- SmallCaps lst -> inlineListToIdentifier' lst
- Subscript lst -> inlineListToIdentifier' lst
- Strong lst -> inlineListToIdentifier' lst
- Quoted _ lst -> inlineListToIdentifier' lst
- Cite _ lst -> inlineListToIdentifier' lst
- Code s -> s
- Space -> "-"
- EmDash -> "-"
- EnDash -> "-"
- Apostrophe -> ""
- Ellipses -> ""
- LineBreak -> "-"
- Math _ _ -> ""
- TeX _ -> ""
- HtmlInline _ -> ""
- Link lst _ -> inlineListToIdentifier' lst
- Image lst _ -> inlineListToIdentifier' lst
- Note _ -> ""
+inlineListToIdentifier =
+ dropWhile (not . isAlpha) . intercalate "-" . words . map toLower .
+ filter (\c -> isLetter c || isDigit c || c `elem` "_-:. ") .
+ concatMap extractText
+ where extractText x = case x of
+ Str s -> s
+ Emph lst -> concatMap extractText lst
+ Strikeout lst -> concatMap extractText lst
+ Superscript lst -> concatMap extractText lst
+ SmallCaps lst -> concatMap extractText lst
+ Subscript lst -> concatMap extractText lst
+ Strong lst -> concatMap extractText lst
+ Quoted _ lst -> concatMap extractText lst
+ Cite _ lst -> concatMap extractText lst
+ Code s -> s
+ Space -> " "
+ EmDash -> "---"
+ EnDash -> "--"
+ Apostrophe -> ""
+ Ellipses -> "..."
+ LineBreak -> " "
+ Math _ s -> s
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> concatMap extractText lst
+ Image lst _ -> concatMap extractText lst
+ Note _ -> ""
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
@@ -977,7 +976,9 @@ headerLtEq _ _ = False
-- Second argument is a list of already used identifiers.
uniqueIdent :: [Inline] -> [String] -> String
uniqueIdent title' usedIdents =
- let baseIdent = inlineListToIdentifier title'
+ let baseIdent = case inlineListToIdentifier title' of
+ "" -> "section"
+ x -> x
numIdent n = baseIdent ++ "-" ++ show n
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of