diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Extensions.hs | 13 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 35 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/ContentReader.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 59 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/AsciiDoc.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Muse.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Powerpoint/Presentation.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Texinfo.hs | 3 |
13 files changed, 81 insertions, 70 deletions
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 1df21e24f..f2599ed6d 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -56,7 +56,7 @@ import Data.Bits (clearBit, setBit, testBit, (.|.)) import Data.Data (Data) import Data.Typeable (Typeable) import GHC.Generics (Generic) -import Text.Pandoc.Shared (safeRead) +import Safe (readMay) import Text.Parsec #ifdef DERIVE_JSON_VIA_TH @@ -96,7 +96,8 @@ data Extension = | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup | Ext_angle_brackets_escapable -- ^ Make < and > escapable - | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers + | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers; + -- presupposes Ext_auto_identifiers | Ext_auto_identifiers -- ^ Automatic identifiers for headers | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks @@ -123,8 +124,9 @@ data Extension = | Ext_fenced_divs -- ^ Allow fenced div syntax ::: | Ext_footnotes -- ^ Pandoc\/PHP\/MMD style footnotes | Ext_four_space_rule -- ^ Require 4-space indent for list contents - | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using - -- GitHub's method for generating identifiers + | Ext_gfm_auto_identifiers -- ^ Use GitHub's method for generating + -- header identifiers; presupposes + -- Ext_auto_identifiers | Ext_grid_tables -- ^ Grid tables (pandoc, reST) | Ext_hard_line_breaks -- ^ All newlines become hard line breaks | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} @@ -265,6 +267,7 @@ githubMarkdownExtensions = extensionsFromList , Ext_pipe_tables , Ext_raw_html , Ext_fenced_code_blocks + , Ext_auto_identifiers , Ext_gfm_auto_identifiers , Ext_backtick_code_blocks , Ext_autolink_bare_uris @@ -384,7 +387,7 @@ parseFormatSpec = parse formatSpec "" extMod = do polarity <- oneOf "-+" name <- many $ noneOf "-+" - ext <- case safeRead ("Ext_" ++ name) of + ext <- case readMay ("Ext_" ++ name) of Just n -> return n Nothing | name == "lhs" -> return Ext_literate_haskell diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index a40a891f6..a5588e4f1 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1291,7 +1291,7 @@ registerHeader (ident,classes,kvs) header' = do let insert' = M.insertWith (\_new old -> old) if null ident && Ext_auto_identifiers `extensionEnabled` exts then do - let id' = uniqueIdent (B.toList header') ids + let id' = uniqueIdent exts (B.toList header') ids let id'' = if Ext_ascii_identifiers `extensionEnabled` exts then mapMaybe toAsciiChar id' else id' diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 5a2e5784a..3cc75e2a1 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -36,23 +36,20 @@ where import Prelude import CMarkGFM import Control.Monad.State -import Data.Char (isAlphaNum, isSpace, toLower) import Data.List (groupBy) -import qualified Data.Map as Map -import Data.Maybe (mapMaybe) +import qualified Data.Set as Set import Data.Text (Text, unpack) -import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Options -import Text.Pandoc.Shared (stringify) +import Text.Pandoc.Shared (uniqueIdent) import Text.Pandoc.Walk (walkM) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ - (if isEnabled Ext_gfm_auto_identifiers opts + (if isEnabled Ext_auto_identifiers opts then addHeaderIdentifiers opts else id) $ nodeToPandoc opts $ commonmarkToNode opts' exts s @@ -78,30 +75,14 @@ convertEmojis s = addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty -addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block +addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block addHeaderId opts (Header lev (_,classes,kvs) ils) = do - idmap <- get - let ident = toIdent opts ils - ident' <- case Map.lookup ident idmap of - Nothing -> do - put (Map.insert ident 1 idmap) - return ident - Just i -> do - put (Map.adjust (+ 1) ident idmap) - return (ident ++ "-" ++ show i) - return $ Header lev (ident',classes,kvs) ils + ids <- get + let ident = uniqueIdent (readerExtensions opts) ils ids + modify (Set.insert ident) + return $ Header lev (ident,classes,kvs) ils addHeaderId _ x = return x -toIdent :: ReaderOptions -> [Inline] -> String -toIdent opts = - filterAscii . filterPunct . spaceToDash . map toLower. stringify - where - filterAscii = if isEnabled Ext_ascii_identifiers opts - then mapMaybe toAsciiChar - else id - filterPunct = filter (\c -> isAlphaNum c || c == '_' || c == '-') - spaceToDash = map (\c -> if isSpace c then '-' else c) - nodeToPandoc :: ReaderOptions -> Node -> Pandoc nodeToPandoc opts (Node _ DOCUMENT nodes) = Pandoc nullMeta $ foldr (addBlock opts) [] nodes diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index ca9f8c8dd..621239bc7 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -442,9 +442,11 @@ parPartToInlines' (BookMark _ anchor) = (modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap}) return mempty Nothing -> do + exts <- readerExtensions <$> asks docxOptions let newAnchor = if not inHdrBool && anchor `elem` M.elems anchorMap - then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap) + then uniqueIdent exts [Str anchor] + (Set.fromList $ M.elems anchorMap) else anchor unless inHdrBool (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) @@ -487,8 +489,9 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) | (c:_) <- filter isAnchorSpan ils , (Span (anchIdent, ["anchor"], _) cIls) <- c = do hdrIDMap <- gets docxAnchorMap + exts <- readerExtensions <$> asks docxOptions let newIdent = if null ident - then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) + then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident newIls = concatMap f ils where f il | il == c = cIls | otherwise = [il] @@ -499,8 +502,9 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) makeHeaderAnchor' (Header n (ident, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap + exts <- readerExtensions <$> asks docxOptions let newIdent = if null ident - then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) + then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) ils diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 78881914d..097516eb4 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -54,6 +54,7 @@ import qualified Text.XML.Light as XML import Text.Pandoc.Builder import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Shared +import Text.Pandoc.Extensions (extensionsFromList, Extension(..)) import Text.Pandoc.Readers.Odt.Base import Text.Pandoc.Readers.Odt.Namespaces @@ -253,7 +254,9 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do getHeaderAnchor :: OdtReaderSafe Inlines Anchor getHeaderAnchor = proc title -> do state <- getExtraState -< () - let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state) + let exts = extensionsFromList [Ext_auto_identifiers] + let anchor = uniqueIdent exts (toList title) + (Set.fromList $ usedAnchors state) modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor @@ -768,6 +771,7 @@ read_maybe_nested_img_frame = matchingElement NsDraw "frame" read_frame :: OdtReaderSafe Inlines Inlines read_frame = proc blocks -> do + let exts = extensionsFromList [Ext_auto_identifiers] w <- ( findAttr' NsSVG "width" ) -< () h <- ( findAttr' NsSVG "height" ) -< () titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks @@ -776,7 +780,8 @@ read_frame = _ <- updateMediaWithResource -< resource alt <- (matchChildContent [] read_plain_text) -< blocks arr (uncurry4 imageWith ) -< - (image_attributes w h, src, inlineListToIdentifier (toList titleNodes), alt) + (image_attributes w h, src, + inlineListToIdentifier exts (toList titleNodes), alt) image_attributes :: Maybe String -> Maybe String -> Attr image_attributes x y = diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 90789447f..0b29347a3 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -116,8 +116,7 @@ import Control.Monad (MonadPlus (..), msum, unless) import qualified Control.Monad.State.Strict as S import qualified Data.ByteString.Lazy as BL import qualified Data.Bifunctor as Bifunctor -import Data.Char (isAlpha, isDigit, isLetter, isLower, isSpace, isUpper, - toLower) +import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum) import Data.Data (Data, Typeable) import Data.List (find, intercalate, intersperse, stripPrefix) import qualified Data.Map as M @@ -137,7 +136,9 @@ import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions, import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..)) import qualified Text.Pandoc.Builder as B import Data.Time +import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Definition +import Text.Pandoc.Extensions (Extensions, Extension(..), extensionEnabled) import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.Walk @@ -483,18 +484,29 @@ instance Walkable Block Element where query f (Blk x) = query f x query f (Sec _ _ _ ils elts) = query f ils `mappend` query f elts - -- | Convert Pandoc inline list to plain text identifier. HTML -- identifiers must start with a letter, and may contain only -- letters, digits, and the characters _-. -inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier = - dropWhile (not . isAlpha) . intercalate "-" . words . - map (nbspToSp . toLower) . - filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . - stringify - where nbspToSp '\160' = ' ' - nbspToSp x = x +inlineListToIdentifier :: Extensions -> [Inline] -> String +inlineListToIdentifier exts = + dropNonLetter . filterAscii . toIdent . stringify + where + dropNonLetter + | extensionEnabled Ext_gfm_auto_identifiers exts = id + | otherwise = dropWhile (not . isAlpha) + filterAscii + | extensionEnabled Ext_ascii_identifiers exts + = mapMaybe toAsciiChar + | otherwise = id + toIdent + | extensionEnabled Ext_gfm_auto_identifiers exts = + filterPunct . spaceToDash . map toLower + | otherwise = intercalate "-" . words . filterPunct . map toLower + filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c) + isAllowedPunct c + | extensionEnabled Ext_gfm_auto_identifiers exts = c == '_' || c == '-' + | otherwise = c == '_' || c == '-' || c == '.' + spaceToDash = map (\c -> if isSpace c then '-' else c) -- | Convert list of Pandoc blocks into (hierarchical) list of Elements hierarchicalize :: [Block] -> [Element] @@ -530,17 +542,20 @@ headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. -- Second argument is a list of already used identifiers. -uniqueIdent :: [Inline] -> Set.Set String -> String -uniqueIdent title' usedIdents - = let baseIdent = case inlineListToIdentifier title' of - "" -> "section" - x -> x - numIdent n = baseIdent ++ "-" ++ show n - in if baseIdent `Set.member` usedIdents - then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of - Just x -> numIdent x - Nothing -> baseIdent -- if we have more than 60,000, allow repeats - else baseIdent +uniqueIdent :: Extensions -> [Inline] -> Set.Set String -> String +uniqueIdent exts title' usedIdents = + if baseIdent `Set.member` usedIdents + then case find (\x -> not $ numIdent x `Set.member` usedIdents) + ([1..60000] :: [Int]) of + Just x -> numIdent x + Nothing -> baseIdent + -- if we have more than 60,000, allow repeats + else baseIdent + where + baseIdent = case inlineListToIdentifier exts title' of + "" -> "section" + x -> x + numIdent n = baseIdent ++ "-" ++ show n -- | True if block is a Header block. isHeaderBlock :: Block -> Bool diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 3287779c7..2f92e93ad 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -170,7 +170,7 @@ blockToAsciiDoc _ HorizontalRule = blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do contents <- inlineListToAsciiDoc opts inlines ids <- gets autoIds - let autoId = uniqueIdent inlines ids + let autoId = uniqueIdent (writerExtensions opts) inlines ids modify $ \st -> st{ autoIds = Set.insert autoId ids } let identifier = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 96b8c88ed..ab0cf940c 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -495,7 +495,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do -- body pages -- add level 1 header to beginning if none there - let blocks' = addIdentifiers + let blocks' = addIdentifiers opts $ case blocks of (Header 1 _ _ : _) -> blocks _ -> Header 1 ("",["unnumbered"],[]) @@ -1056,12 +1056,12 @@ showChapter :: Int -> String showChapter = printf "ch%03d.xhtml" -- Add identifiers to any headers without them. -addIdentifiers :: [Block] -> [Block] -addIdentifiers bs = evalState (mapM go bs) Set.empty +addIdentifiers :: WriterOptions -> [Block] -> [Block] +addIdentifiers opts bs = evalState (mapM go bs) Set.empty where go (Header n (ident,classes,kvs) ils) = do ids <- get let ident' = if null ident - then uniqueIdent ils ids + then uniqueIdent (writerExtensions opts) ils ids else ident modify $ Set.insert ident' return $ Header n (ident',classes,kvs) ils diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 03689e95d..cba57a93b 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -508,7 +508,7 @@ blockToMarkdown' opts (Header level attr inlines) = do -- we calculate the id that would be used by auto_identifiers -- so we know whether to print an explicit identifier ids <- gets stIds - let autoId = uniqueIdent inlines ids + let autoId = uniqueIdent (writerExtensions opts) inlines ids modify $ \st -> st{ stIds = Set.insert autoId ids } let attr' = case attr of ("",[],[]) -> empty diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index 72a674cdc..2001c56fd 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -270,7 +270,7 @@ blockToMuse (Header level (ident,_,_) inlines) = do topLevel <- asks envTopLevel contents <- local (\env -> env { envOneLine = True }) $ inlineListToMuse' inlines ids <- gets stIds - let autoId = uniqueIdent inlines ids + let autoId = uniqueIdent (writerExtensions opts) inlines ids modify $ \st -> st{ stIds = Set.insert autoId ids } let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index 22f2a46e7..7787e991b 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -735,6 +735,7 @@ makeEndNotesSlideBlocks :: Pres [Block] makeEndNotesSlideBlocks = do noteIds <- gets stNoteIds slideLevel <- asks envSlideLevel + exts <- writerExtensions <$> asks envOpts meta <- asks envMetadata -- Get identifiers so we can give the notes section a unique ident. anchorSet <- M.keysSet <$> gets stAnchorMap @@ -743,7 +744,7 @@ makeEndNotesSlideBlocks = do else let title = case lookupMetaInlines "notes-title" meta of [] -> [Str "Notes"] ls -> ls - ident = Shared.uniqueIdent title anchorSet + ident = Shared.uniqueIdent exts title anchorSet hdr = Header slideLevel (ident, [], []) title blks = concatMap (\(n, bs) -> makeNoteEntry n bs) $ M.toList noteIds diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index d64529c21..ea7c04e9b 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -260,7 +260,8 @@ blockToRST (Header level (name,classes,_) inlines) = do contents <- inlineListToRST inlines -- we calculate the id that would be used by auto_identifiers -- so we know whether to print an explicit identifier - let autoId = uniqueIdent inlines mempty + opts <- gets stOptions + let autoId = uniqueIdent (writerExtensions opts) inlines mempty isTopLevel <- gets stTopLevel if isTopLevel then do diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 4a4dde461..7bec145c5 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -231,7 +231,8 @@ blockToTexinfo (Header level _ lst) node <- inlineListForNode lst txt <- inlineListToTexinfo lst idsUsed <- gets stIdentifiers - let id' = uniqueIdent lst idsUsed + opts <- gets stOptions + let id' = uniqueIdent (writerExtensions opts) lst idsUsed modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } sec <- seccmd level return $ if (level > 0) && (level <= 4) |