diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-11-11 13:27:25 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-11-11 13:46:23 -0800 |
commit | a36d202e862f3fe0652e4f46cf7713120f50be28 (patch) | |
tree | 57b26749c5aff5e02e379fd5ae9824e513677fd4 /src/Text/Pandoc/Readers | |
parent | ca17ae52465e0194b97d38b9a065a8186cb23a6d (diff) | |
download | pandoc-a36d202e862f3fe0652e4f46cf7713120f50be28.tar.gz |
Text.Pandoc.Shared: add parameter to uniqueIdent, inlineListToIdentifier.
The parameter is Extensions. This allows these functions to
be sensitive to the settings of `Ext_gfm_auto_identifiers` and
`Ext_ascii_identifiers`.
This allows us to use `uniqueIdent` in the CommonMark reader,
replacing some custom code.
It also means that `gfm_auto_identifiers` can now be used
in all formats.
Semantically, `gfm_auto_identifiers` is now a modifier of
`auto_identifiers`; for identifiers to be set, `auto_identifiers`
must be turned on, and then the type of identifier produced
depends on `gfm_auto_identifiers` and `ascii_identifiers` are set.
Closes #5057.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 35 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/ContentReader.hs | 9 |
3 files changed, 22 insertions, 32 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 5a2e5784a..3cc75e2a1 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -36,23 +36,20 @@ where import Prelude import CMarkGFM import Control.Monad.State -import Data.Char (isAlphaNum, isSpace, toLower) import Data.List (groupBy) -import qualified Data.Map as Map -import Data.Maybe (mapMaybe) +import qualified Data.Set as Set import Data.Text (Text, unpack) -import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Options -import Text.Pandoc.Shared (stringify) +import Text.Pandoc.Shared (uniqueIdent) import Text.Pandoc.Walk (walkM) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = return $ - (if isEnabled Ext_gfm_auto_identifiers opts + (if isEnabled Ext_auto_identifiers opts then addHeaderIdentifiers opts else id) $ nodeToPandoc opts $ commonmarkToNode opts' exts s @@ -78,30 +75,14 @@ convertEmojis s = addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty -addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block +addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block addHeaderId opts (Header lev (_,classes,kvs) ils) = do - idmap <- get - let ident = toIdent opts ils - ident' <- case Map.lookup ident idmap of - Nothing -> do - put (Map.insert ident 1 idmap) - return ident - Just i -> do - put (Map.adjust (+ 1) ident idmap) - return (ident ++ "-" ++ show i) - return $ Header lev (ident',classes,kvs) ils + ids <- get + let ident = uniqueIdent (readerExtensions opts) ils ids + modify (Set.insert ident) + return $ Header lev (ident,classes,kvs) ils addHeaderId _ x = return x -toIdent :: ReaderOptions -> [Inline] -> String -toIdent opts = - filterAscii . filterPunct . spaceToDash . map toLower. stringify - where - filterAscii = if isEnabled Ext_ascii_identifiers opts - then mapMaybe toAsciiChar - else id - filterPunct = filter (\c -> isAlphaNum c || c == '_' || c == '-') - spaceToDash = map (\c -> if isSpace c then '-' else c) - nodeToPandoc :: ReaderOptions -> Node -> Pandoc nodeToPandoc opts (Node _ DOCUMENT nodes) = Pandoc nullMeta $ foldr (addBlock opts) [] nodes diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index ca9f8c8dd..621239bc7 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -442,9 +442,11 @@ parPartToInlines' (BookMark _ anchor) = (modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap}) return mempty Nothing -> do + exts <- readerExtensions <$> asks docxOptions let newAnchor = if not inHdrBool && anchor `elem` M.elems anchorMap - then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap) + then uniqueIdent exts [Str anchor] + (Set.fromList $ M.elems anchorMap) else anchor unless inHdrBool (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) @@ -487,8 +489,9 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) | (c:_) <- filter isAnchorSpan ils , (Span (anchIdent, ["anchor"], _) cIls) <- c = do hdrIDMap <- gets docxAnchorMap + exts <- readerExtensions <$> asks docxOptions let newIdent = if null ident - then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) + then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident newIls = concatMap f ils where f il | il == c = cIls | otherwise = [il] @@ -499,8 +502,9 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) makeHeaderAnchor' (Header n (ident, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap + exts <- readerExtensions <$> asks docxOptions let newIdent = if null ident - then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) + then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) ils diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 78881914d..097516eb4 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -54,6 +54,7 @@ import qualified Text.XML.Light as XML import Text.Pandoc.Builder import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Shared +import Text.Pandoc.Extensions (extensionsFromList, Extension(..)) import Text.Pandoc.Readers.Odt.Base import Text.Pandoc.Readers.Odt.Namespaces @@ -253,7 +254,9 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do getHeaderAnchor :: OdtReaderSafe Inlines Anchor getHeaderAnchor = proc title -> do state <- getExtraState -< () - let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state) + let exts = extensionsFromList [Ext_auto_identifiers] + let anchor = uniqueIdent exts (toList title) + (Set.fromList $ usedAnchors state) modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor @@ -768,6 +771,7 @@ read_maybe_nested_img_frame = matchingElement NsDraw "frame" read_frame :: OdtReaderSafe Inlines Inlines read_frame = proc blocks -> do + let exts = extensionsFromList [Ext_auto_identifiers] w <- ( findAttr' NsSVG "width" ) -< () h <- ( findAttr' NsSVG "height" ) -< () titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks @@ -776,7 +780,8 @@ read_frame = _ <- updateMediaWithResource -< resource alt <- (matchChildContent [] read_plain_text) -< blocks arr (uncurry4 imageWith ) -< - (image_attributes w h, src, inlineListToIdentifier (toList titleNodes), alt) + (image_attributes w h, src, + inlineListToIdentifier exts (toList titleNodes), alt) image_attributes :: Maybe String -> Maybe String -> Attr image_attributes x y = |