From 7deee9c874f6ee3606460052a5dc22b342166a19 Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Fri, 23 Nov 2007 03:51:21 +0000
Subject: Reverted changes in r1086 (implicit section header references). This
 caused too much of a performance hit.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1093 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 src/Text/Pandoc/Readers/Markdown.hs | 167 ++++++++++++++++--------------------
 src/Text/Pandoc/Shared.hs           |  56 +-----------
 src/Text/Pandoc/Writers/HTML.hs     |  46 ++++++++++
 3 files changed, 123 insertions(+), 146 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 7168cee4d..ded9f2136 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 {- |
    Module      : Text.Pandoc.Readers.Markdown
    Copyright   : Copyright (C) 2006-7 John MacFarlane
-   License     : GNU GPL, version 2 or above
+   License     : GNU GPL, version 2 or above 
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
@@ -27,8 +27,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of markdown-formatted plain text to 'Pandoc' document.
 -}
-module Text.Pandoc.Readers.Markdown (
-                                     readMarkdown
+module Text.Pandoc.Readers.Markdown ( 
+                                     readMarkdown 
                                     ) where
 
 import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex )
@@ -37,9 +37,9 @@ import Data.Char ( isAlphaNum )
 import Data.Maybe ( fromMaybe )
 import Network.URI ( isURI )
 import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared 
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
-import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
+import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, 
                                   anyHtmlInlineTag, anyHtmlTag,
                                   anyHtmlEndTag, htmlEndTag, extractTagType,
                                   htmlBlockElement )
@@ -69,14 +69,14 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221"
 indentSpaces = try $ do
   state <- getState
   let tabStop = stateTabStop state
-  try (count tabStop (char ' ')) <|>
+  try (count tabStop (char ' ')) <|> 
     (many (char ' ') >> string "\t") <?> "indentation"
 
 nonindentSpaces = do
   state <- getState
   let tabStop = stateTabStop state
   sps <- many (char ' ')
-  if length sps < tabStop
+  if length sps < tabStop 
      then return sps
      else unexpected "indented line"
 
@@ -91,8 +91,8 @@ failUnlessSmart = do
   if stateSmart state then return () else fail "Smart typography feature"
 
 -- | Parse an inline Str element with a given content.
-inlineString str = try $ do
-  (Str res) <- inline
+inlineString str = try $ do 
+  (Str res) <- inline 
   if res == str then return res else fail $ "unexpected Str content"
 
 -- | Parse a sequence of inline elements between a string
@@ -103,9 +103,9 @@ inlinesInBalanced opener closer = try $ do
   string opener
   result <- manyTill ( (do lookAhead (inlineString opener)
                            -- because it might be a link...
-                           bal <- inlinesInBalanced opener closer
+                           bal <- inlinesInBalanced opener closer 
                            return $ [Str opener] ++ bal ++ [Str closer])
-                       <|> (count 1 inline))
+                       <|> (count 1 inline)) 
                      (try (string closer))
   return $ concat result
 
@@ -115,7 +115,7 @@ inlinesInBalanced opener closer = try $ do
 
 titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
 
-authorsLine = try $ do
+authorsLine = try $ do 
   char '%'
   skipSpaces
   authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
@@ -143,24 +143,15 @@ parseMarkdown = do
   startPos <- getPosition
   -- go through once just to get list of reference keys
   -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- manyTill (referenceKey <|> headerReference <|>
-                            lineClump) eof >>= return . concat
+  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= 
+                  return . concat
   setInput docMinusKeys
   setPosition startPos
   st <- getState
-  -- get headers and construct implicit references unless strict
-  if stateStrict st
-     then return ()
-     else do let oldkeys = stateKeys st
-             let headers = reverse $ stateHeaders st
-             let idents = uniqueIdentifiers headers
-             let implicitRefs = zipWith (\hd ident -> (hd, ("#" ++ ident, "")))
-                                headers idents
-             updateState $ \st -> st { stateKeys = oldkeys ++ implicitRefs }
   -- go through again for notes unless strict...
   if stateStrict st
      then return ()
-     else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
+     else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= 
                               return . concat
              st <- getState
              let reversedNotes = stateNotes st
@@ -169,10 +160,10 @@ parseMarkdown = do
              setPosition startPos
   -- now parse it for real...
   (title, author, date) <- option ([],[],"") titleBlock
-  blocks <- parseBlocks
+  blocks <- parseBlocks 
   return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
 
---
+-- 
 -- initial pass for references and notes
 --
 
@@ -195,17 +186,7 @@ referenceKey = try $ do
   -- return blanks so line count isn't affected
   return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
 
-headerReference = try $ do
-  failIfStrict
-  startPos <- getPosition
-  (Header level text) <- lookAhead $ atxHeader <|> setextHeader
-  st <- getState
-  let headers = stateHeaders st
-  updateState $ \st -> st { stateHeaders = text:headers }
-  endPos <- getPosition
-  lineClump  -- return the raw header, because we need to parse it later
-
-referenceTitle = try $ do
+referenceTitle = try $ do 
   (many1 spaceChar >> option '\n' newline) <|> newline
   skipSpaces
   tit <-    (charsInBalanced '(' ')' >>= return . unwords . words)
@@ -220,7 +201,7 @@ rawLine = do
   notFollowedBy blankline
   notFollowedBy' noteMarker
   contents <- many1 nonEndline
-  end <- option "" (newline >> optional indentSpaces >> return "\n")
+  end <- option "" (newline >> optional indentSpaces >> return "\n") 
   return $ contents ++ end
 
 rawLines = many1 rawLine >>= return . concat
@@ -249,7 +230,7 @@ noteBlock = try $ do
 
 parseBlocks = manyTill block eof
 
-block = choice [ header
+block = choice [ header 
                , table
                , codeBlock
                , hrule
@@ -304,7 +285,7 @@ hrule = try $ do
 indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
 
 codeBlock = do
-  contents <- many1 (indentedLine <|>
+  contents <- many1 (indentedLine <|> 
                      try (do b <- blanklines
                              l <- indentedLine
                              return $ b ++ l))
@@ -319,7 +300,7 @@ emacsBoxQuote = try $ do
   failIfStrict
   string ",----"
   manyTill anyChar newline
-  raw <- manyTill
+  raw <- manyTill 
          (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
          (try (string "`----"))
   blanklines
@@ -329,7 +310,7 @@ emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
 
 emailBlockQuote = try $ do
   emailBlockQuoteStart
-  raw <- sepBy (many (nonEndline <|>
+  raw <- sepBy (many (nonEndline <|> 
                       (try (endline >> notFollowedBy emailBlockQuoteStart >>
                        return '\n'))))
                (try (newline >> emailBlockQuoteStart))
@@ -337,12 +318,12 @@ emailBlockQuote = try $ do
   optional blanklines
   return raw
 
-blockQuote = do
+blockQuote = do 
   raw <- emailBlockQuote <|> emacsBoxQuote
   -- parse the extracted block, which may contain various block elements:
   contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
   return $ BlockQuote contents
-
+ 
 --
 -- list blocks
 --
@@ -377,7 +358,7 @@ orderedListStart style delim = try $ do
             then do many1 digit
                     char '.'
                     return 1
-            else orderedListMarker style delim
+            else orderedListMarker style delim 
   if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
      num `elem` [1, 5, 10, 50, 100, 500, 1000]))
      then char '\t' <|> (spaceChar >> spaceChar)
@@ -401,7 +382,7 @@ rawListItem start = try $ do
   blanks <- many blankline
   return $ concat result ++ blanks
 
--- continuation of a list item - indented and separated by blankline
+-- continuation of a list item - indented and separated by blankline 
 -- or (in compact lists) endline.
 -- note: nested lists are parsed as continuations
 listContinuation start = try $ do
@@ -417,7 +398,7 @@ listContinuationLine start = try $ do
   result <- manyTill anyChar newline
   return $ result ++ "\n"
 
-listItem start = try $ do
+listItem start = try $ do 
   first <- rawListItem start
   continuations <- many (listContinuation start)
   -- parsing with ListItemState forces markers at beginning of lines to
@@ -437,7 +418,7 @@ orderedList = try $ do
   items <- many1 (listItem (orderedListStart style delim))
   return $ OrderedList (start, style, delim) $ compactify items
 
-bulletList = many1 (listItem bulletListStart) >>=
+bulletList = many1 (listItem bulletListStart) >>= 
              return . BulletList . compactify
 
 -- definition lists
@@ -478,7 +459,7 @@ definitionList = do
 -- paragraph block
 --
 
-para = try $ do
+para = try $ do 
   result <- many1 inline
   newline
   blanklines <|> do st <- getState
@@ -487,9 +468,9 @@ para = try $ do
                        else lookAhead emacsBoxQuote >> return ""
   return $ Para $ normalizeSpaces result
 
-plain = many1 inline >>= return . Plain . normalizeSpaces
+plain = many1 inline >>= return . Plain . normalizeSpaces 
 
---
+-- 
 -- raw html
 --
 
@@ -506,25 +487,25 @@ htmlBlock = do
     else rawHtmlBlocks
 
 -- True if tag is self-closing
-isSelfClosing tag =
+isSelfClosing tag = 
   isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
 
 strictHtmlBlock = try $ do
-  tag <- anyHtmlBlockTag
+  tag <- anyHtmlBlockTag 
   let tag' = extractTagType tag
-  if isSelfClosing tag || tag' == "hr"
+  if isSelfClosing tag || tag' == "hr" 
      then return tag
-     else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
+     else do contents <- many (notFollowedBy' (htmlEndTag tag') >> 
                                (htmlElement <|> (count 1 anyChar)))
              end <- htmlEndTag tag'
              return $ tag ++ concat contents ++ end
 
 rawHtmlBlocks = do
-  htmlBlocks <- many1 rawHtmlBlock
+  htmlBlocks <- many1 rawHtmlBlock    
   let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
   let combined' = if not (null combined) && last combined == '\n'
-                     then init combined  -- strip extra newline
-                     else combined
+                     then init combined  -- strip extra newline 
+                     else combined 
   return $ RawHtml combined'
 
 --
@@ -535,7 +516,7 @@ rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
 
 --
 -- Tables
---
+-- 
 
 -- Parse a dashed line with optional trailing spaces; return its length
 -- and the length including trailing space.
@@ -544,7 +525,7 @@ dashedLine ch = do
   sp     <- many spaceChar
   return $ (length dashes, length $ dashes ++ sp)
 
--- Parse a table header with dashed lines of '-' preceded by
+-- Parse a table header with dashed lines of '-' preceded by 
 -- one line of text.
 simpleTableHeader = try $ do
   rawContent  <- anyLine
@@ -567,7 +548,7 @@ tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
 rawTableLine indices = do
   notFollowedBy' (blanklines <|> tableFooter)
   line <- many1Till anyChar newline
-  return $ map removeLeadingTrailingSpace $ tail $
+  return $ map removeLeadingTrailingSpace $ tail $ 
            splitByIndices (init indices) line
 
 -- Parse a table line and return a list of lists of blocks (columns).
@@ -584,8 +565,8 @@ multilineRow indices = do
 widthsFromIndices :: Int     -- Number of columns on terminal
                   -> [Int]   -- Indices
                   -> [Float] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
+widthsFromIndices _ [] = []  
+widthsFromIndices numColumns indices = 
   let lengths = zipWith (-) indices (0:indices)
       totLength = sum lengths
       quotient = if totLength > numColumns
@@ -624,14 +605,14 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines
 multilineTable = tableWith multilineTableHeader multilineRow tableFooter
 
 multilineTableHeader = try $ do
-  tableSep
+  tableSep 
   rawContent  <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
   initSp      <- nonindentSpaces
   dashes      <- many1 (dashedLine '-')
   newline
   let (lengths, lines) = unzip dashes
   let indices  = scanl (+) (length initSp) lines
-  let rawHeadsList = transpose $ map
+  let rawHeadsList = transpose $ map 
                      (\ln -> tail $ splitByIndices (init indices) ln)
                      rawContent
   let rawHeads = map (joinWithSep " ") rawHeadsList
@@ -644,7 +625,7 @@ multilineTableHeader = try $ do
 alignType :: [String] -> Int -> Alignment
 alignType []  len = AlignDefault
 alignType strLst len =
-  let str        = head $ sortBy (comparing length) $
+  let str        = head $ sortBy (comparing length) $ 
                           map removeTrailingSpace strLst
       leftSpace  = if null str then False else (str !! 0) `elem` " \t"
       rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
@@ -656,7 +637,7 @@ alignType strLst len =
 
 table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
 
---
+-- 
 -- inline
 --
 
@@ -686,7 +667,7 @@ inline = choice [ str
 escapedChar = do
   char '\\'
   state <- getState
-  result <- option '\\' $ if stateStrict state
+  result <- option '\\' $ if stateStrict state 
                              then oneOf "\\`*_{}[]()>#+-.!~"
                              else satisfy (not . isAlphaNum)
   return $ Str [result]
@@ -700,17 +681,17 @@ ltSign = do
 
 specialCharsMinusLt = filter (/= '<') specialChars
 
-symbol = do
+symbol = do 
   result <- oneOf specialCharsMinusLt
   return $ Str [result]
 
 -- parses inline code, between n `s and n `s
-code = try $ do
+code = try $ do 
   starts <- many1 (char '`')
   skipSpaces
   result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
-                       (char '\n' >> return " "))
-                      (try (skipSpaces >> count (length starts) (char '`') >>
+                       (char '\n' >> return " ")) 
+                      (try (skipSpaces >> count (length starts) (char '`') >> 
                       notFollowedBy (char '`')))
   return $ Code $ removeLeadingTrailingSpace $ concat result
 
@@ -726,30 +707,30 @@ math = try $ do
   return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
 
 emph = ((enclosed (char '*') (char '*') inline) <|>
-        (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=
+        (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= 
         return . Emph . normalizeSpaces
 
-strong = ((enclosed (string "**") (try $ string "**") inline) <|>
+strong = ((enclosed (string "**") (try $ string "**") inline) <|> 
           (enclosed (string "__") (try $ string "__") inline)) >>=
          return . Strong . normalizeSpaces
 
 strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
             return . Strikeout . normalizeSpaces
 
-superscript = failIfStrict >> enclosed (char '^') (char '^')
+superscript = failIfStrict >> enclosed (char '^') (char '^') 
               (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
               return . Superscript
 
 subscript = failIfStrict >> enclosed (char '~') (char '~')
             (notFollowedBy' whitespace >> inline) >>=  -- may not contain Space
-            return . Subscript
+            return . Subscript 
 
-smartPunctuation = failUnlessSmart >>
+smartPunctuation = failUnlessSmart >> 
                    choice [ quoted, apostrophe, dash, ellipses ]
 
 apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
 
-quoted = doubleQuoted <|> singleQuoted
+quoted = doubleQuoted <|> singleQuoted 
 
 withQuoteContext context parser = do
   oldState <- getState
@@ -765,7 +746,7 @@ singleQuoted = try $ do
   withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
     return . Quoted SingleQuote . normalizeSpaces
 
-doubleQuoted = try $ do
+doubleQuoted = try $ do 
   doubleQuoteStart
   withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
     return . Quoted DoubleQuote . normalizeSpaces
@@ -776,13 +757,13 @@ failIfInQuoteContext context = do
      then fail "already inside quotes"
      else return ()
 
-singleQuoteStart = do
+singleQuoteStart = do 
   failIfInQuoteContext InSingleQuote
-  char '\8216' <|>
-     (try $ do char '\''
+  char '\8216' <|> 
+     (try $ do char '\''  
                notFollowedBy (oneOf ")!],.;:-? \t\n")
                notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
-                                   satisfy (not . isAlphaNum)))
+                                   satisfy (not . isAlphaNum))) 
                                    -- possess/contraction
                return '\'')
 
@@ -826,13 +807,13 @@ endline = try $ do
   newline
   notFollowedBy blankline
   st <- getState
-  if stateStrict st
+  if stateStrict st 
     then do notFollowedBy emailBlockQuoteStart
             notFollowedBy (char '#')  -- atx header
-    else return ()
+    else return () 
   -- parse potential list-starts differently if in a list:
   if stateParserContext st == ListItemState
-     then notFollowedBy' (bulletListStart <|>
+     then notFollowedBy' (bulletListStart <|> 
                           (anyOrderedListStart >> return ()))
      else return ()
   return Space
@@ -846,7 +827,7 @@ reference = notFollowedBy' (string "[^") >>  -- footnote reference
             inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
 
 -- source for a link, with optional title
-source = try $ do
+source = try $ do 
   char '('
   optional (char '<')
   src <- many (noneOf ")> \t\n")
@@ -856,7 +837,7 @@ source = try $ do
   char ')'
   return (removeTrailingSpace src, tit)
 
-linkTitle = try $ do
+linkTitle = try $ do 
   (many1 spaceChar >> option '\n' newline) <|> newline
   skipSpaces
   delim <- char '\'' <|> char '"'
@@ -871,13 +852,13 @@ link = try $ do
 
 -- a link like [this][ref] or [this][] or [this]
 referenceLink label = do
-  ref <- option [] (try (optional (char ' ') >>
+  ref <- option [] (try (optional (char ' ') >> 
                          optional (newline >> skipSpaces) >> reference))
   let ref' = if null ref then label else ref
   state <- getState
   case lookupKeySrc (stateKeys state) ref' of
-     Nothing     -> fail "no corresponding key"
-     Just target -> return target
+     Nothing     -> fail "no corresponding key" 
+     Just target -> return target 
 
 emailAddress = try $ do
   name <- many1 (alphaNum <|> char '+')
@@ -898,7 +879,7 @@ autoLink = try $ do
   char '>'
   let src' = if "mailto:" `isPrefixOf` src
                 then drop 7 src
-                else src
+                else src 
   st <- getState
   return $ if stateStrict st
               then Link [Str src'] (src, "")
@@ -929,7 +910,7 @@ rawLaTeXInline' = failIfStrict >> rawLaTeXInline
 rawHtmlInline' = do
   st <- getState
   result <- choice $ if stateStrict st
-                        then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
+                        then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] 
                         else [htmlBlockElement, anyHtmlInlineTag]
   return $ HtmlInline result
 
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5fb1306af..626679025 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -34,7 +34,6 @@ module Text.Pandoc.Shared (
                      substitute,
                      joinWithSep,
                      -- * Text processing
-                     isPunctuation,
                      backslashEscapes,
                      escapeStringUsing,
                      stripTrailingNewlines,
@@ -92,7 +91,6 @@ module Text.Pandoc.Shared (
                      Element (..),
                      hierarchicalize,
                      isHeaderBlock,
-                     uniqueIdentifiers,
                      -- * Writer options
                      WriterOptions (..),
                      defaultWriterOptions
@@ -104,7 +102,7 @@ import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty )
 import qualified Text.PrettyPrint.HughesPJ as PP
 import Text.Pandoc.CharacterReferences ( characterReference )
 import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
-import Data.List ( find, isPrefixOf, intersperse )
+import Data.List ( find, isPrefixOf )
 import Control.Monad ( join )
 
 --
@@ -146,15 +144,6 @@ joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
 -- Text processing
 --
 
--- | True if character is a punctuation character (unicode).
-isPunctuation :: Char -> Bool
-isPunctuation c =
-  let c' = ord c
-  in  if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
-         c' >= 0xE000 && c' <= 0xE0FF
-         then True
-         else False
-
 -- | Returns an association list of backslash escapes for the
 -- designated characters.
 backslashEscapes :: [Char]    -- ^ list of special characters to escape
@@ -580,8 +569,7 @@ data ParserState = ParserState
       stateStrict          :: Bool,          -- ^ Use strict markdown syntax?
       stateSmart           :: Bool,          -- ^ Use smart typography?
       stateColumns         :: Int,           -- ^ Number of columns in terminal
-      stateHeaderTable     :: [HeaderType],  -- ^ Ordered list of header types used
-      stateHeaders         :: [[Inline]]     -- ^ List of header texts used
+      stateHeaderTable     :: [HeaderType]   -- ^ Ordered list of header types used
     }
     deriving Show
 
@@ -600,8 +588,7 @@ defaultParserState =
                   stateStrict          = False,
                   stateSmart           = False,
                   stateColumns         = 80,
-                  stateHeaderTable     = [],
-                  stateHeaders         = [] }
+                  stateHeaderTable     = [] }
 
 data HeaderType 
     = SingleHeader Char  -- ^ Single line of characters underneath
@@ -803,43 +790,6 @@ isHeaderBlock :: Block -> Bool
 isHeaderBlock (Header _ _) = True
 isHeaderBlock _ = False
 
--- | Convert Pandoc inline list to plain text identifier.
-inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier [] = ""
-inlineListToIdentifier (x:xs) = 
-  xAsText ++ inlineListToIdentifier xs
-  where xAsText = case x of
-          Str s          -> filter (\c -> c == '-' || not (isPunctuation c)) $
-                            concat $ intersperse "-" $ words $ map toLower s
-          Emph lst       -> inlineListToIdentifier lst
-          Strikeout lst  -> inlineListToIdentifier lst
-          Superscript lst -> inlineListToIdentifier lst
-          Subscript lst  -> inlineListToIdentifier lst
-          Strong lst     -> inlineListToIdentifier lst
-          Quoted _ lst   -> inlineListToIdentifier lst
-          Code s         -> s
-          Space          -> "-"
-          EmDash         -> "-"
-          EnDash         -> "-"
-          Apostrophe     -> ""
-          Ellipses       -> ""
-          LineBreak      -> "-"
-          TeX _          -> ""
-          HtmlInline _   -> ""
-          Link lst _     -> inlineListToIdentifier lst
-          Image lst _    -> inlineListToIdentifier lst
-          Note _         -> ""
-
--- | Return unique identifiers for list of inline lists.
-uniqueIdentifiers :: [[Inline]] -> [String]
-uniqueIdentifiers ls =
-  let addIdentifier (nonuniqueIds, uniqueIds) l =
-        let new = inlineListToIdentifier l
-            matches = length $ filter (== new) nonuniqueIds
-            new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
-        in  (new:nonuniqueIds, new':uniqueIds)
-  in  reverse $ snd $ foldl addIdentifier ([],[]) ls
-
 --
 -- Writer options
 --
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 56ca5ca48..7ec95d8ef 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -193,6 +193,15 @@ obfuscateChar char =
 obfuscateString :: String -> String
 obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
 
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+  let c' = ord c
+  in  if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
+         c' >= 0xE000 && c' <= 0xE0FF
+         then True
+         else False
+
 -- | Add CSS for document header.
 addToCSS :: String -> State WriterState ()
 addToCSS item = do
@@ -200,6 +209,43 @@ addToCSS item = do
   let current = stCSS st
   put $ st {stCSS = S.insert item current}
 
+-- | Convert Pandoc inline list to plain text identifier.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier [] = ""
+inlineListToIdentifier (x:xs) = 
+  xAsText ++ inlineListToIdentifier xs
+  where xAsText = case x of
+          Str s          -> filter (\c -> c == '-' || not (isPunctuation c)) $
+                            concat $ intersperse "-" $ words $ map toLower s
+          Emph lst       -> inlineListToIdentifier lst
+          Strikeout lst  -> inlineListToIdentifier lst
+          Superscript lst -> inlineListToIdentifier lst
+          Subscript lst  -> inlineListToIdentifier lst
+          Strong lst     -> inlineListToIdentifier lst
+          Quoted _ lst   -> inlineListToIdentifier lst
+          Code s         -> s
+          Space          -> "-"
+          EmDash         -> "-"
+          EnDash         -> "-"
+          Apostrophe     -> ""
+          Ellipses       -> ""
+          LineBreak      -> "-"
+          TeX _          -> ""
+          HtmlInline _   -> ""
+          Link lst _     -> inlineListToIdentifier lst
+          Image lst _    -> inlineListToIdentifier lst
+          Note _         -> ""
+
+-- | Return unique identifiers for list of inline lists.
+uniqueIdentifiers :: [[Inline]] -> [String]
+uniqueIdentifiers ls =
+  let addIdentifier (nonuniqueIds, uniqueIds) l =
+        let new = inlineListToIdentifier l
+            matches = length $ filter (== new) nonuniqueIds
+            new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
+        in  (new:nonuniqueIds, new':uniqueIds)
+  in  reverse $ snd $ foldl addIdentifier ([],[]) ls
+
 -- | Convert Pandoc block element to HTML.
 blockToHtml :: WriterOptions -> Block -> State WriterState Html
 blockToHtml opts Null = return $ noHtml 
-- 
cgit v1.2.3