aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Extensions.hs13
-rw-r--r--src/Text/Pandoc/Parsing.hs2
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs35
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs10
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs9
-rw-r--r--src/Text/Pandoc/Shared.hs59
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs2
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs8
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs2
-rw-r--r--src/Text/Pandoc/Writers/Muse.hs2
-rw-r--r--src/Text/Pandoc/Writers/Powerpoint/Presentation.hs3
-rw-r--r--src/Text/Pandoc/Writers/RST.hs3
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs3
13 files changed, 81 insertions, 70 deletions
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 1df21e24f..f2599ed6d 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -56,7 +56,7 @@ import Data.Bits (clearBit, setBit, testBit, (.|.))
import Data.Data (Data)
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
-import Text.Pandoc.Shared (safeRead)
+import Safe (readMay)
import Text.Parsec
#ifdef DERIVE_JSON_VIA_TH
@@ -96,7 +96,8 @@ data Extension =
| Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable
| Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup
| Ext_angle_brackets_escapable -- ^ Make < and > escapable
- | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers
+ | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers;
+ -- presupposes Ext_auto_identifiers
| Ext_auto_identifiers -- ^ Automatic identifiers for headers
| Ext_autolink_bare_uris -- ^ Make all absolute URIs into links
| Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
@@ -123,8 +124,9 @@ data Extension =
| Ext_fenced_divs -- ^ Allow fenced div syntax :::
| Ext_footnotes -- ^ Pandoc\/PHP\/MMD style footnotes
| Ext_four_space_rule -- ^ Require 4-space indent for list contents
- | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using
- -- GitHub's method for generating identifiers
+ | Ext_gfm_auto_identifiers -- ^ Use GitHub's method for generating
+ -- header identifiers; presupposes
+ -- Ext_auto_identifiers
| Ext_grid_tables -- ^ Grid tables (pandoc, reST)
| Ext_hard_line_breaks -- ^ All newlines become hard line breaks
| Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v}
@@ -265,6 +267,7 @@ githubMarkdownExtensions = extensionsFromList
, Ext_pipe_tables
, Ext_raw_html
, Ext_fenced_code_blocks
+ , Ext_auto_identifiers
, Ext_gfm_auto_identifiers
, Ext_backtick_code_blocks
, Ext_autolink_bare_uris
@@ -384,7 +387,7 @@ parseFormatSpec = parse formatSpec ""
extMod = do
polarity <- oneOf "-+"
name <- many $ noneOf "-+"
- ext <- case safeRead ("Ext_" ++ name) of
+ ext <- case readMay ("Ext_" ++ name) of
Just n -> return n
Nothing
| name == "lhs" -> return Ext_literate_haskell
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index a40a891f6..a5588e4f1 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1291,7 +1291,7 @@ registerHeader (ident,classes,kvs) header' = do
let insert' = M.insertWith (\_new old -> old)
if null ident && Ext_auto_identifiers `extensionEnabled` exts
then do
- let id' = uniqueIdent (B.toList header') ids
+ let id' = uniqueIdent exts (B.toList header') ids
let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
then mapMaybe toAsciiChar id'
else id'
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 5a2e5784a..3cc75e2a1 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -36,23 +36,20 @@ where
import Prelude
import CMarkGFM
import Control.Monad.State
-import Data.Char (isAlphaNum, isSpace, toLower)
import Data.List (groupBy)
-import qualified Data.Map as Map
-import Data.Maybe (mapMaybe)
+import qualified Data.Set as Set
import Data.Text (Text, unpack)
-import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Options
-import Text.Pandoc.Shared (stringify)
+import Text.Pandoc.Shared (uniqueIdent)
import Text.Pandoc.Walk (walkM)
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark opts s = return $
- (if isEnabled Ext_gfm_auto_identifiers opts
+ (if isEnabled Ext_auto_identifiers opts
then addHeaderIdentifiers opts
else id) $
nodeToPandoc opts $ commonmarkToNode opts' exts s
@@ -78,30 +75,14 @@ convertEmojis s =
addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
-addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block
+addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block
addHeaderId opts (Header lev (_,classes,kvs) ils) = do
- idmap <- get
- let ident = toIdent opts ils
- ident' <- case Map.lookup ident idmap of
- Nothing -> do
- put (Map.insert ident 1 idmap)
- return ident
- Just i -> do
- put (Map.adjust (+ 1) ident idmap)
- return (ident ++ "-" ++ show i)
- return $ Header lev (ident',classes,kvs) ils
+ ids <- get
+ let ident = uniqueIdent (readerExtensions opts) ils ids
+ modify (Set.insert ident)
+ return $ Header lev (ident,classes,kvs) ils
addHeaderId _ x = return x
-toIdent :: ReaderOptions -> [Inline] -> String
-toIdent opts =
- filterAscii . filterPunct . spaceToDash . map toLower. stringify
- where
- filterAscii = if isEnabled Ext_ascii_identifiers opts
- then mapMaybe toAsciiChar
- else id
- filterPunct = filter (\c -> isAlphaNum c || c == '_' || c == '-')
- spaceToDash = map (\c -> if isSpace c then '-' else c)
-
nodeToPandoc :: ReaderOptions -> Node -> Pandoc
nodeToPandoc opts (Node _ DOCUMENT nodes) =
Pandoc nullMeta $ foldr (addBlock opts) [] nodes
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index ca9f8c8dd..621239bc7 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -442,9 +442,11 @@ parPartToInlines' (BookMark _ anchor) =
(modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap})
return mempty
Nothing -> do
+ exts <- readerExtensions <$> asks docxOptions
let newAnchor =
if not inHdrBool && anchor `elem` M.elems anchorMap
- then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
+ then uniqueIdent exts [Str anchor]
+ (Set.fromList $ M.elems anchorMap)
else anchor
unless inHdrBool
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
@@ -487,8 +489,9 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
| (c:_) <- filter isAnchorSpan ils
, (Span (anchIdent, ["anchor"], _) cIls) <- c = do
hdrIDMap <- gets docxAnchorMap
+ exts <- readerExtensions <$> asks docxOptions
let newIdent = if null ident
- then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
+ then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
newIls = concatMap f ils where f il | il == c = cIls
| otherwise = [il]
@@ -499,8 +502,9 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
do
hdrIDMap <- gets docxAnchorMap
+ exts <- readerExtensions <$> asks docxOptions
let newIdent = if null ident
- then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
+ then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
return $ Header n (newIdent, classes, kvs) ils
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 78881914d..097516eb4 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -54,6 +54,7 @@ import qualified Text.XML.Light as XML
import Text.Pandoc.Builder
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
import Text.Pandoc.Shared
+import Text.Pandoc.Extensions (extensionsFromList, Extension(..))
import Text.Pandoc.Readers.Odt.Base
import Text.Pandoc.Readers.Odt.Namespaces
@@ -253,7 +254,9 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do
getHeaderAnchor :: OdtReaderSafe Inlines Anchor
getHeaderAnchor = proc title -> do
state <- getExtraState -< ()
- let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state)
+ let exts = extensionsFromList [Ext_auto_identifiers]
+ let anchor = uniqueIdent exts (toList title)
+ (Set.fromList $ usedAnchors state)
modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor
@@ -768,6 +771,7 @@ read_maybe_nested_img_frame = matchingElement NsDraw "frame"
read_frame :: OdtReaderSafe Inlines Inlines
read_frame =
proc blocks -> do
+ let exts = extensionsFromList [Ext_auto_identifiers]
w <- ( findAttr' NsSVG "width" ) -< ()
h <- ( findAttr' NsSVG "height" ) -< ()
titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks
@@ -776,7 +780,8 @@ read_frame =
_ <- updateMediaWithResource -< resource
alt <- (matchChildContent [] read_plain_text) -< blocks
arr (uncurry4 imageWith ) -<
- (image_attributes w h, src, inlineListToIdentifier (toList titleNodes), alt)
+ (image_attributes w h, src,
+ inlineListToIdentifier exts (toList titleNodes), alt)
image_attributes :: Maybe String -> Maybe String -> Attr
image_attributes x y =
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 90789447f..0b29347a3 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -116,8 +116,7 @@ import Control.Monad (MonadPlus (..), msum, unless)
import qualified Control.Monad.State.Strict as S
import qualified Data.ByteString.Lazy as BL
import qualified Data.Bifunctor as Bifunctor
-import Data.Char (isAlpha, isDigit, isLetter, isLower, isSpace, isUpper,
- toLower)
+import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum)
import Data.Data (Data, Typeable)
import Data.List (find, intercalate, intersperse, stripPrefix)
import qualified Data.Map as M
@@ -137,7 +136,9 @@ import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
import qualified Text.Pandoc.Builder as B
import Data.Time
+import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Definition
+import Text.Pandoc.Extensions (Extensions, Extension(..), extensionEnabled)
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Walk
@@ -483,18 +484,29 @@ instance Walkable Block Element where
query f (Blk x) = query f x
query f (Sec _ _ _ ils elts) = query f ils `mappend` query f elts
-
-- | Convert Pandoc inline list to plain text identifier. HTML
-- identifiers must start with a letter, and may contain only
-- letters, digits, and the characters _-.
-inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier =
- dropWhile (not . isAlpha) . intercalate "-" . words .
- map (nbspToSp . toLower) .
- filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
- stringify
- where nbspToSp '\160' = ' '
- nbspToSp x = x
+inlineListToIdentifier :: Extensions -> [Inline] -> String
+inlineListToIdentifier exts =
+ dropNonLetter . filterAscii . toIdent . stringify
+ where
+ dropNonLetter
+ | extensionEnabled Ext_gfm_auto_identifiers exts = id
+ | otherwise = dropWhile (not . isAlpha)
+ filterAscii
+ | extensionEnabled Ext_ascii_identifiers exts
+ = mapMaybe toAsciiChar
+ | otherwise = id
+ toIdent
+ | extensionEnabled Ext_gfm_auto_identifiers exts =
+ filterPunct . spaceToDash . map toLower
+ | otherwise = intercalate "-" . words . filterPunct . map toLower
+ filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c)
+ isAllowedPunct c
+ | extensionEnabled Ext_gfm_auto_identifiers exts = c == '_' || c == '-'
+ | otherwise = c == '_' || c == '-' || c == '.'
+ spaceToDash = map (\c -> if isSpace c then '-' else c)
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
@@ -530,17 +542,20 @@ headerLtEq _ _ = False
-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
-uniqueIdent :: [Inline] -> Set.Set String -> String
-uniqueIdent title' usedIdents
- = let baseIdent = case inlineListToIdentifier title' of
- "" -> "section"
- x -> x
- numIdent n = baseIdent ++ "-" ++ show n
- in if baseIdent `Set.member` usedIdents
- then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
- Just x -> numIdent x
- Nothing -> baseIdent -- if we have more than 60,000, allow repeats
- else baseIdent
+uniqueIdent :: Extensions -> [Inline] -> Set.Set String -> String
+uniqueIdent exts title' usedIdents =
+ if baseIdent `Set.member` usedIdents
+ then case find (\x -> not $ numIdent x `Set.member` usedIdents)
+ ([1..60000] :: [Int]) of
+ Just x -> numIdent x
+ Nothing -> baseIdent
+ -- if we have more than 60,000, allow repeats
+ else baseIdent
+ where
+ baseIdent = case inlineListToIdentifier exts title' of
+ "" -> "section"
+ x -> x
+ numIdent n = baseIdent ++ "-" ++ show n
-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 3287779c7..2f92e93ad 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -170,7 +170,7 @@ blockToAsciiDoc _ HorizontalRule =
blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
contents <- inlineListToAsciiDoc opts inlines
ids <- gets autoIds
- let autoId = uniqueIdent inlines ids
+ let autoId = uniqueIdent (writerExtensions opts) inlines ids
modify $ \st -> st{ autoIds = Set.insert autoId ids }
let identifier = if null ident ||
(isEnabled Ext_auto_identifiers opts && ident == autoId)
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 96b8c88ed..ab0cf940c 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -495,7 +495,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
-- body pages
-- add level 1 header to beginning if none there
- let blocks' = addIdentifiers
+ let blocks' = addIdentifiers opts
$ case blocks of
(Header 1 _ _ : _) -> blocks
_ -> Header 1 ("",["unnumbered"],[])
@@ -1056,12 +1056,12 @@ showChapter :: Int -> String
showChapter = printf "ch%03d.xhtml"
-- Add identifiers to any headers without them.
-addIdentifiers :: [Block] -> [Block]
-addIdentifiers bs = evalState (mapM go bs) Set.empty
+addIdentifiers :: WriterOptions -> [Block] -> [Block]
+addIdentifiers opts bs = evalState (mapM go bs) Set.empty
where go (Header n (ident,classes,kvs) ils) = do
ids <- get
let ident' = if null ident
- then uniqueIdent ils ids
+ then uniqueIdent (writerExtensions opts) ils ids
else ident
modify $ Set.insert ident'
return $ Header n (ident',classes,kvs) ils
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 03689e95d..cba57a93b 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -508,7 +508,7 @@ blockToMarkdown' opts (Header level attr inlines) = do
-- we calculate the id that would be used by auto_identifiers
-- so we know whether to print an explicit identifier
ids <- gets stIds
- let autoId = uniqueIdent inlines ids
+ let autoId = uniqueIdent (writerExtensions opts) inlines ids
modify $ \st -> st{ stIds = Set.insert autoId ids }
let attr' = case attr of
("",[],[]) -> empty
diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs
index 72a674cdc..2001c56fd 100644
--- a/src/Text/Pandoc/Writers/Muse.hs
+++ b/src/Text/Pandoc/Writers/Muse.hs
@@ -270,7 +270,7 @@ blockToMuse (Header level (ident,_,_) inlines) = do
topLevel <- asks envTopLevel
contents <- local (\env -> env { envOneLine = True }) $ inlineListToMuse' inlines
ids <- gets stIds
- let autoId = uniqueIdent inlines ids
+ let autoId = uniqueIdent (writerExtensions opts) inlines ids
modify $ \st -> st{ stIds = Set.insert autoId ids }
let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId)
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
index 22f2a46e7..7787e991b 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
@@ -735,6 +735,7 @@ makeEndNotesSlideBlocks :: Pres [Block]
makeEndNotesSlideBlocks = do
noteIds <- gets stNoteIds
slideLevel <- asks envSlideLevel
+ exts <- writerExtensions <$> asks envOpts
meta <- asks envMetadata
-- Get identifiers so we can give the notes section a unique ident.
anchorSet <- M.keysSet <$> gets stAnchorMap
@@ -743,7 +744,7 @@ makeEndNotesSlideBlocks = do
else let title = case lookupMetaInlines "notes-title" meta of
[] -> [Str "Notes"]
ls -> ls
- ident = Shared.uniqueIdent title anchorSet
+ ident = Shared.uniqueIdent exts title anchorSet
hdr = Header slideLevel (ident, [], []) title
blks = concatMap (\(n, bs) -> makeNoteEntry n bs) $
M.toList noteIds
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index d64529c21..ea7c04e9b 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -260,7 +260,8 @@ blockToRST (Header level (name,classes,_) inlines) = do
contents <- inlineListToRST inlines
-- we calculate the id that would be used by auto_identifiers
-- so we know whether to print an explicit identifier
- let autoId = uniqueIdent inlines mempty
+ opts <- gets stOptions
+ let autoId = uniqueIdent (writerExtensions opts) inlines mempty
isTopLevel <- gets stTopLevel
if isTopLevel
then do
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 4a4dde461..7bec145c5 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -231,7 +231,8 @@ blockToTexinfo (Header level _ lst)
node <- inlineListForNode lst
txt <- inlineListToTexinfo lst
idsUsed <- gets stIdentifiers
- let id' = uniqueIdent lst idsUsed
+ opts <- gets stOptions
+ let id' = uniqueIdent (writerExtensions opts) lst idsUsed
modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed }
sec <- seccmd level
return $ if (level > 0) && (level <= 4)