Implemented implicit reference-style links to section headers in markdown.

For example, if you have a header '# Supported architectures', you can link to it with '[Supported architectures]'. If there are multiple headers with this label, the link will point to the first of them. Implicit references are always overridden by explicitly specified references. Addresses Issue #20. + Moved isPunctuation, uniqueIdentifiers, and inlineListToIdentifier from Text.Pandoc.Writers.HTML to Text.Pandoc.Shared. + Added stHeaders to ParserState. This holds a list of header texts used in the document, and is used to construct implicit header references. + In Text.Pandoc.Readers.Markdown, added call to headerReference parser in initial parsing pass, constructing a list of section header labels. This is then passed to uniqueIdentifiers to produce identifiers, and a list of implicit references is constructed. This is added to the end of the explicitly specified references, so it will be overridden by explicitly specified references. All of this processing is skipped if --strict was specified. + Modified documentation in README. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1086 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-11-22 17:14:21 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-11-22 17:14:21 +0000
commit: f7b705b44cfff881d12501eb8061ebdf20627632 (patch)
tree: b786427e010f0529c41a8055b6f4769e4023f9b7 /src/Text/Pandoc/Shared.hs
parent: 8d334b84cc51b16a6f010cd775f22b8072de9e7a (diff)
download: pandoc-f7b705b44cfff881d12501eb8061ebdf20627632.tar.gz
1 files changed, 53 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index ce07ccd24..e0e93c189 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -34,6 +34,7 @@ module Text.Pandoc.Shared (
                      substitute,
                      joinWithSep,
                      -- * Text processing
+                     isPunctuation,
                      backslashEscapes,
                      escapeStringUsing,
                      stripTrailingNewlines,
@@ -91,6 +92,7 @@ module Text.Pandoc.Shared (
                      Element (..),
                      hierarchicalize,
                      isHeaderBlock,
+                     uniqueIdentifiers,
                      -- * Writer options
                      WriterOptions (..),
                      defaultWriterOptions
@@ -102,7 +104,7 @@ import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty )
 import qualified Text.PrettyPrint.HughesPJ as PP
 import Text.Pandoc.CharacterReferences ( characterReference )
 import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
-import Data.List ( find, isPrefixOf )
+import Data.List ( find, isPrefixOf, intersperse )
 import Control.Monad ( join )
 
 --
@@ -144,6 +146,15 @@ joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
 -- Text processing
 --
 
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+  let c' = ord c
+  in  if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
+         c' >= 0xE000 && c' <= 0xE0FF
+         then True
+         else False
+
 -- | Returns an association list of backslash escapes for the
 -- designated characters.
 backslashEscapes :: [Char]    -- ^ list of special characters to escape
@@ -566,7 +577,8 @@ data ParserState = ParserState
       stateStrict          :: Bool,          -- ^ Use strict markdown syntax?
       stateSmart           :: Bool,          -- ^ Use smart typography?
       stateColumns         :: Int,           -- ^ Number of columns in terminal
-      stateHeaderTable     :: [HeaderType]   -- ^ Ordered list of header types used
+      stateHeaderTable     :: [HeaderType],  -- ^ Ordered list of header types used
+      stateHeaders         :: [[Inline]]     -- ^ List of header texts used
     }
     deriving Show
 
@@ -585,7 +597,8 @@ defaultParserState =
                   stateStrict          = False,
                   stateSmart           = False,
                   stateColumns         = 80,
-                  stateHeaderTable     = [] }
+                  stateHeaderTable     = [],
+                  stateHeaders         = [] }
 
 data HeaderType 
     = SingleHeader Char  -- ^ Single line of characters underneath
@@ -787,6 +800,43 @@ isHeaderBlock :: Block -> Bool
 isHeaderBlock (Header _ _) = True
 isHeaderBlock _ = False
 
+-- | Convert Pandoc inline list to plain text identifier.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier [] = ""
+inlineListToIdentifier (x:xs) = 
+  xAsText ++ inlineListToIdentifier xs
+  where xAsText = case x of
+          Str s          -> filter (\c -> c == '-' || not (isPunctuation c)) $
+                            concat $ intersperse "-" $ words $ map toLower s
+          Emph lst       -> inlineListToIdentifier lst
+          Strikeout lst  -> inlineListToIdentifier lst
+          Superscript lst -> inlineListToIdentifier lst
+          Subscript lst  -> inlineListToIdentifier lst
+          Strong lst     -> inlineListToIdentifier lst
+          Quoted _ lst   -> inlineListToIdentifier lst
+          Code s         -> s
+          Space          -> "-"
+          EmDash         -> "-"
+          EnDash         -> "-"
+          Apostrophe     -> ""
+          Ellipses       -> ""
+          LineBreak      -> "-"
+          TeX _          -> ""
+          HtmlInline _   -> ""
+          Link lst _     -> inlineListToIdentifier lst
+          Image lst _    -> inlineListToIdentifier lst
+          Note _         -> ""
+
+-- | Return unique identifiers for list of inline lists.
+uniqueIdentifiers :: [[Inline]] -> [String]
+uniqueIdentifiers ls =
+  let addIdentifier (nonuniqueIds, uniqueIds) l =
+        let new = inlineListToIdentifier l
+            matches = length $ filter (== new) nonuniqueIds
+            new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
+        in  (new:nonuniqueIds, new':uniqueIds)
+  in  reverse $ snd $ foldl addIdentifier ([],[]) ls
+
 --
 -- Writer options
 --
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-11-22 17:14:21 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-11-22 17:14:21 +0000
commit	f7b705b44cfff881d12501eb8061ebdf20627632 (patch)
tree	b786427e010f0529c41a8055b6f4769e4023f9b7 /src/Text/Pandoc/Shared.hs
parent	8d334b84cc51b16a6f010cd775f22b8072de9e7a (diff)
download	pandoc-f7b705b44cfff881d12501eb8061ebdf20627632.tar.gz