aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Shared.hs
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-11-22 17:14:21 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-11-22 17:14:21 +0000
commitf7b705b44cfff881d12501eb8061ebdf20627632 (patch)
treeb786427e010f0529c41a8055b6f4769e4023f9b7 /src/Text/Pandoc/Shared.hs
parent8d334b84cc51b16a6f010cd775f22b8072de9e7a (diff)
downloadpandoc-f7b705b44cfff881d12501eb8061ebdf20627632.tar.gz
Implemented implicit reference-style links to section headers in markdown.
For example, if you have a header '# Supported architectures', you can link to it with '[Supported architectures]'. If there are multiple headers with this label, the link will point to the first of them. Implicit references are always overridden by explicitly specified references. Addresses Issue #20. + Moved isPunctuation, uniqueIdentifiers, and inlineListToIdentifier from Text.Pandoc.Writers.HTML to Text.Pandoc.Shared. + Added stHeaders to ParserState. This holds a list of header texts used in the document, and is used to construct implicit header references. + In Text.Pandoc.Readers.Markdown, added call to headerReference parser in initial parsing pass, constructing a list of section header labels. This is then passed to uniqueIdentifiers to produce identifiers, and a list of implicit references is constructed. This is added to the end of the explicitly specified references, so it will be overridden by explicitly specified references. All of this processing is skipped if --strict was specified. + Modified documentation in README. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1086 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r--src/Text/Pandoc/Shared.hs56
1 files changed, 53 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index ce07ccd24..e0e93c189 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -34,6 +34,7 @@ module Text.Pandoc.Shared (
substitute,
joinWithSep,
-- * Text processing
+ isPunctuation,
backslashEscapes,
escapeStringUsing,
stripTrailingNewlines,
@@ -91,6 +92,7 @@ module Text.Pandoc.Shared (
Element (..),
hierarchicalize,
isHeaderBlock,
+ uniqueIdentifiers,
-- * Writer options
WriterOptions (..),
defaultWriterOptions
@@ -102,7 +104,7 @@ import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty )
import qualified Text.PrettyPrint.HughesPJ as PP
import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
-import Data.List ( find, isPrefixOf )
+import Data.List ( find, isPrefixOf, intersperse )
import Control.Monad ( join )
--
@@ -144,6 +146,15 @@ joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
-- Text processing
--
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+ let c' = ord c
+ in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
+ c' >= 0xE000 && c' <= 0xE0FF
+ then True
+ else False
+
-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char] -- ^ list of special characters to escape
@@ -566,7 +577,8 @@ data ParserState = ParserState
stateStrict :: Bool, -- ^ Use strict markdown syntax?
stateSmart :: Bool, -- ^ Use smart typography?
stateColumns :: Int, -- ^ Number of columns in terminal
- stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
+ stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
+ stateHeaders :: [[Inline]] -- ^ List of header texts used
}
deriving Show
@@ -585,7 +597,8 @@ defaultParserState =
stateStrict = False,
stateSmart = False,
stateColumns = 80,
- stateHeaderTable = [] }
+ stateHeaderTable = [],
+ stateHeaders = [] }
data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath
@@ -787,6 +800,43 @@ isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _) = True
isHeaderBlock _ = False
+-- | Convert Pandoc inline list to plain text identifier.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier [] = ""
+inlineListToIdentifier (x:xs) =
+ xAsText ++ inlineListToIdentifier xs
+ where xAsText = case x of
+ Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
+ concat $ intersperse "-" $ words $ map toLower s
+ Emph lst -> inlineListToIdentifier lst
+ Strikeout lst -> inlineListToIdentifier lst
+ Superscript lst -> inlineListToIdentifier lst
+ Subscript lst -> inlineListToIdentifier lst
+ Strong lst -> inlineListToIdentifier lst
+ Quoted _ lst -> inlineListToIdentifier lst
+ Code s -> s
+ Space -> "-"
+ EmDash -> "-"
+ EnDash -> "-"
+ Apostrophe -> ""
+ Ellipses -> ""
+ LineBreak -> "-"
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> inlineListToIdentifier lst
+ Image lst _ -> inlineListToIdentifier lst
+ Note _ -> ""
+
+-- | Return unique identifiers for list of inline lists.
+uniqueIdentifiers :: [[Inline]] -> [String]
+uniqueIdentifiers ls =
+ let addIdentifier (nonuniqueIds, uniqueIds) l =
+ let new = inlineListToIdentifier l
+ matches = length $ filter (== new) nonuniqueIds
+ new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
+ in (new:nonuniqueIds, new':uniqueIds)
+ in reverse $ snd $ foldl addIdentifier ([],[]) ls
+
--
-- Writer options
--