aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-01-22 10:16:47 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2016-01-22 10:16:47 -0800
commit20170c328f12fc0214c2c50d14c8f7d03ee05e70 (patch)
treeeba75d165fe6aec92b5bcd5f63ec89b328478b1d /src/Text/Pandoc/Readers
parent2a2e3d99d226636166859e63d5259258ba759d5c (diff)
downloadpandoc-20170c328f12fc0214c2c50d14c8f7d03ee05e70.tar.gz
Changed type of Shared.uniqueIdent argument from [String] to Set String.
This avoids performance problems in documents with many identically named headers. Closes #2671.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs7
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs6
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs5
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs3
-rw-r--r--src/Text/Pandoc/Readers/Org.hs5
5 files changed, 15 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 44f67ce75..1b3269136 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -89,6 +89,7 @@ import Text.TeXMath (writeTeX)
import Data.Default (Default)
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
+import qualified Data.Set as Set
import Control.Monad.Reader
import Control.Monad.State
import Data.Sequence (ViewL(..), viewl)
@@ -350,7 +351,7 @@ parPartToInlines (BookMark _ anchor) =
-- avoid an extra pass.
let newAnchor =
if not inHdrBool && anchor `elem` (M.elems anchorMap)
- then uniqueIdent [Str anchor] (M.elems anchorMap)
+ then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
else anchor
unless inHdrBool
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
@@ -393,7 +394,7 @@ makeHeaderAnchor' (Header n (_, classes, kvs) ils)
| (c:cs) <- filter isAnchorSpan ils
, (Span (ident, ["anchor"], _) _) <- c = do
hdrIDMap <- gets docxAnchorMap
- let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+ let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
modify $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap}
return $ Header n (newIdent, classes, kvs) (ils \\ (c:cs))
-- Otherwise we just give it a name, and register that name (associate
@@ -401,7 +402,7 @@ makeHeaderAnchor' (Header n (_, classes, kvs) ils)
makeHeaderAnchor' (Header n (_, classes, kvs) ils) =
do
hdrIDMap <- gets docxAnchorMap
- let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+ let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
return $ Header n (newIdent, classes, kvs) ils
makeHeaderAnchor' blk = return blk
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index a34e2fb5c..c3be1f544 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -68,7 +68,7 @@ import Text.Pandoc.Error
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Compat.Monoid ((<>))
import Text.Parsec.Error
-
+import qualified Data.Set as Set
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ReaderOptions -- ^ Reader options
@@ -77,7 +77,7 @@ readHtml :: ReaderOptions -- ^ Reader options
readHtml opts inp =
mapLeft (ParseFailure . getError) . flip runReader def $
runParserT parseDoc
- (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty)
+ (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
"source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
@@ -104,7 +104,7 @@ data HTMLState =
{ parserState :: ParserState,
noteTable :: [(String, Blocks)],
baseHref :: Maybe String,
- identifiers :: [String],
+ identifiers :: Set.Set String,
headerMap :: M.Map Inlines String
}
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index d29ec50e7..950497992 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -52,6 +52,7 @@ import Text.HTML.TagSoup
import Data.Sequence (viewl, ViewL(..), (<|))
import qualified Data.Foldable as F
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Char (isDigit, isSpace)
import Data.Maybe (fromMaybe)
import Text.Printf (printf)
@@ -69,7 +70,7 @@ readMediaWiki opts s =
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
- , mwIdentifierList = []
+ , mwIdentifierList = Set.empty
}
(s ++ "\n")
@@ -78,7 +79,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwNextLinkNumber :: Int
, mwCategoryLinks :: [Inlines]
, mwHeaderMap :: M.Map Inlines String
- , mwIdentifierList :: [String]
+ , mwIdentifierList :: Set.Set String
}
type MWParser = Parser [Char] MWState
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 1f1c57646..8c475eefc 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -61,6 +61,7 @@ import Text.Pandoc.Readers.Odt.Generic.XMLConverter
import Text.Pandoc.Readers.Odt.Generic.Fallible
import Text.Pandoc.Readers.Odt.Generic.Utils
+import qualified Data.Set as Set
--------------------------------------------------------------------------------
-- State
@@ -221,7 +222,7 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do
getHeaderAnchor :: OdtReaderSafe Inlines Anchor
getHeaderAnchor = proc title -> do
state <- getExtraState -< ()
- let anchor = uniqueIdent (toList title) (usedAnchors state)
+ let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state)
modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c7906618c..d82541638 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -53,6 +53,7 @@ import Data.Char (isAlphaNum, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isJust)
import Network.HTTP (urlEncode)
@@ -144,7 +145,7 @@ data OrgParserState = OrgParserState
, orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
, orgStateParserContext :: ParserContext
- , orgStateIdentifiers :: [String]
+ , orgStateIdentifiers :: Set.Set String
, orgStateHeaderMap :: M.Map Inlines String
}
@@ -186,7 +187,7 @@ defaultOrgParserState = OrgParserState
, orgStateMeta' = return nullMeta
, orgStateNotes' = []
, orgStateParserContext = NullState
- , orgStateIdentifiers = []
+ , orgStateIdentifiers = Set.empty
, orgStateHeaderMap = M.empty
}