diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-11-22 17:14:21 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-11-22 17:14:21 +0000 |
commit | f7b705b44cfff881d12501eb8061ebdf20627632 (patch) | |
tree | b786427e010f0529c41a8055b6f4769e4023f9b7 /src/Text/Pandoc/Shared.hs | |
parent | 8d334b84cc51b16a6f010cd775f22b8072de9e7a (diff) | |
download | pandoc-f7b705b44cfff881d12501eb8061ebdf20627632.tar.gz |
Implemented implicit reference-style links to section headers in markdown.
For example, if you have a header '# Supported architectures', you can
link to it with '[Supported architectures]'. If there are multiple
headers with this label, the link will point to the first of them.
Implicit references are always overridden by explicitly specified references.
Addresses Issue #20.
+ Moved isPunctuation, uniqueIdentifiers, and inlineListToIdentifier from
Text.Pandoc.Writers.HTML to Text.Pandoc.Shared.
+ Added stHeaders to ParserState. This holds a list of header texts
used in the document, and is used to construct implicit header references.
+ In Text.Pandoc.Readers.Markdown, added call to headerReference
parser in initial parsing pass, constructing a list of section header
labels. This is then passed to uniqueIdentifiers to produce
identifiers, and a list of implicit references is constructed. This is
added to the end of the explicitly specified references, so it will be
overridden by explicitly specified references. All of this processing
is skipped if --strict was specified.
+ Modified documentation in README.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1086 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 56 |
1 files changed, 53 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index ce07ccd24..e0e93c189 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -34,6 +34,7 @@ module Text.Pandoc.Shared ( substitute, joinWithSep, -- * Text processing + isPunctuation, backslashEscapes, escapeStringUsing, stripTrailingNewlines, @@ -91,6 +92,7 @@ module Text.Pandoc.Shared ( Element (..), hierarchicalize, isHeaderBlock, + uniqueIdentifiers, -- * Writer options WriterOptions (..), defaultWriterOptions @@ -102,7 +104,7 @@ import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty ) import qualified Text.PrettyPrint.HughesPJ as PP import Text.Pandoc.CharacterReferences ( characterReference ) import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) -import Data.List ( find, isPrefixOf ) +import Data.List ( find, isPrefixOf, intersperse ) import Control.Monad ( join ) -- @@ -144,6 +146,15 @@ joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst -- Text processing -- +-- | True if character is a punctuation character (unicode). +isPunctuation :: Char -> Bool +isPunctuation c = + let c' = ord c + in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || + c' >= 0xE000 && c' <= 0xE0FF + then True + else False + -- | Returns an association list of backslash escapes for the -- designated characters. backslashEscapes :: [Char] -- ^ list of special characters to escape @@ -566,7 +577,8 @@ data ParserState = ParserState stateStrict :: Bool, -- ^ Use strict markdown syntax? stateSmart :: Bool, -- ^ Use smart typography? stateColumns :: Int, -- ^ Number of columns in terminal - stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used + stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used + stateHeaders :: [[Inline]] -- ^ List of header texts used } deriving Show @@ -585,7 +597,8 @@ defaultParserState = stateStrict = False, stateSmart = False, stateColumns = 80, - stateHeaderTable = [] } + stateHeaderTable = [], + stateHeaders = [] } data HeaderType = SingleHeader Char -- ^ Single line of characters underneath @@ -787,6 +800,43 @@ isHeaderBlock :: Block -> Bool isHeaderBlock (Header _ _) = True isHeaderBlock _ = False +-- | Convert Pandoc inline list to plain text identifier. +inlineListToIdentifier :: [Inline] -> String +inlineListToIdentifier [] = "" +inlineListToIdentifier (x:xs) = + xAsText ++ inlineListToIdentifier xs + where xAsText = case x of + Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ + concat $ intersperse "-" $ words $ map toLower s + Emph lst -> inlineListToIdentifier lst + Strikeout lst -> inlineListToIdentifier lst + Superscript lst -> inlineListToIdentifier lst + Subscript lst -> inlineListToIdentifier lst + Strong lst -> inlineListToIdentifier lst + Quoted _ lst -> inlineListToIdentifier lst + Code s -> s + Space -> "-" + EmDash -> "-" + EnDash -> "-" + Apostrophe -> "" + Ellipses -> "" + LineBreak -> "-" + TeX _ -> "" + HtmlInline _ -> "" + Link lst _ -> inlineListToIdentifier lst + Image lst _ -> inlineListToIdentifier lst + Note _ -> "" + +-- | Return unique identifiers for list of inline lists. +uniqueIdentifiers :: [[Inline]] -> [String] +uniqueIdentifiers ls = + let addIdentifier (nonuniqueIds, uniqueIds) l = + let new = inlineListToIdentifier l + matches = length $ filter (== new) nonuniqueIds + new' = new ++ if matches > 0 then ("-" ++ show matches) else "" + in (new:nonuniqueIds, new':uniqueIds) + in reverse $ snd $ foldl addIdentifier ([],[]) ls + -- -- Writer options -- |