From 3876b91448ad8d279f5d5a9d217e00cf4583e14b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 14 Jan 2017 13:06:27 +0100
Subject: Make Extensions a custom type instead of a Set Extension.

The type is implemented in terms of an underlying bitset
which should be more efficient.

API change: from Text.Pandoc.Extensions export Extensions,
emptyExtensions, extensionsFromList, enableExtension, disableExtension,
extensionEnabled.
---
 src/Text/Pandoc.hs                  | 36 +++++++++++++------------
 src/Text/Pandoc/Extensions.hs       | 54 +++++++++++++++++++++++++------------
 src/Text/Pandoc/Options.hs          |  8 +++---
 src/Text/Pandoc/Parsing.hs          |  8 +++---
 src/Text/Pandoc/Readers/Markdown.hs | 18 ++++++-------
 5 files changed, 72 insertions(+), 52 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index b94d05718..86f70b293 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -184,15 +184,13 @@ import Text.Pandoc.Class
 import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
 import Data.List (intercalate)
-import Data.Set (Set)
-import qualified Data.Set as Set
 import Text.Parsec
 import Text.Parsec.Error
 import qualified Text.Pandoc.UTF8 as UTF8
 import Control.Monad.Except (throwError)
 
 parseFormatSpec :: String
-                -> Either ParseError (String, Set Extension -> Set Extension)
+                -> Either ParseError (String, Extensions -> Extensions)
 parseFormatSpec = parse formatSpec ""
   where formatSpec = do
           name <- formatName
@@ -208,8 +206,8 @@ parseFormatSpec = parse formatSpec ""
                          | name == "lhs" -> return Ext_literate_haskell
                          | otherwise -> fail $ "Unknown extension: " ++ name
           return $ case polarity of
-                        '-'  -> Set.delete ext
-                        _    -> Set.insert ext
+                        '-'  -> disableExtension ext
+                        _    -> enableExtension ext
 
 -- TODO: when we get the PandocMonad stuff all sorted out,
 -- we can simply these types considerably.  Errors/MediaBag can be
@@ -330,25 +328,29 @@ writers = [
   ,("tei"          , StringWriter writeTEI)
   ]
 
-getDefaultExtensions :: String -> Set Extension
+getDefaultExtensions :: String -> Extensions
 getDefaultExtensions "markdown_strict" = strictExtensions
 getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions
 getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
 getDefaultExtensions "markdown_github" = githubMarkdownExtensions
 getDefaultExtensions "markdown"        = pandocExtensions
 getDefaultExtensions "plain"           = plainExtensions
-getDefaultExtensions "org"             = Set.fromList [Ext_citations,
-                                                       Ext_auto_identifiers]
-getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers]
-getDefaultExtensions "html"            = Set.fromList [Ext_auto_identifiers,
-                                                       Ext_native_divs,
-                                                       Ext_native_spans]
+getDefaultExtensions "org"             = extensionsFromList
+                                          [Ext_citations, Ext_auto_identifiers]
+getDefaultExtensions "textile"         = extensionsFromList
+                                          [Ext_auto_identifiers]
+getDefaultExtensions "html"            = extensionsFromList
+                                          [Ext_auto_identifiers,
+                                           Ext_native_divs,
+                                           Ext_native_spans]
 getDefaultExtensions "html5"           = getDefaultExtensions "html"
-getDefaultExtensions "epub"            = Set.fromList [Ext_raw_html,
-                                                       Ext_native_divs,
-                                                       Ext_native_spans,
-                                                       Ext_epub_html_exts]
-getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
+getDefaultExtensions "epub"            = extensionsFromList
+                                          [Ext_raw_html,
+                                           Ext_native_divs,
+                                           Ext_native_spans,
+                                           Ext_epub_html_exts]
+getDefaultExtensions _                 = extensionsFromList
+                                          [Ext_auto_identifiers]
 
 -- | Retrieve reader based on formatSpec (format+extensions).
 getReader :: PandocMonad m => String -> Either String (Reader m)
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 91cd045de..68d76792c 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -29,6 +29,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Data structures and functions for representing markup extensions.
 -}
 module Text.Pandoc.Extensions ( Extension(..)
+                              , Extensions
+                              , emptyExtensions
+                              , extensionsFromList
+                              , extensionEnabled
+                              , enableExtension
+                              , disableExtension
                               , pandocExtensions
                               , plainExtensions
                               , strictExtensions
@@ -36,15 +42,29 @@ module Text.Pandoc.Extensions ( Extension(..)
                               , githubMarkdownExtensions
                               , multimarkdownExtensions )
 where
-import Data.LargeWord (Word256)
-import Data.Bits ()
-import Data.Set (Set)
-import qualified Data.Set as Set
+import Data.Word (Word64)
+import Data.Bits (testBit, setBit, clearBit)
 import Data.Data (Data)
 import Data.Typeable (Typeable)
 import GHC.Generics (Generic)
 
-newtype Extensions = Extensions { unExtensions :: Word256 }
+newtype Extensions = Extensions Word64
+  deriving (Show, Read, Eq, Ord, Bounded, Data, Typeable, Generic)
+
+extensionsFromList :: [Extension] -> Extensions
+extensionsFromList = foldr enableExtension emptyExtensions
+
+emptyExtensions :: Extensions
+emptyExtensions = Extensions 0
+
+extensionEnabled :: Extension -> Extensions -> Bool
+extensionEnabled x (Extensions exts) = testBit exts (fromEnum x)
+
+enableExtension :: Extension -> Extensions -> Extensions
+enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x))
+
+disableExtension :: Extension -> Extensions -> Extensions
+disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x))
 
 -- | Individually selectable syntax extensions.
 data Extension =
@@ -112,8 +132,8 @@ data Extension =
     | Ext_shortcut_reference_links -- ^ Shortcut reference links
     deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
 
-pandocExtensions :: Set Extension
-pandocExtensions = Set.fromList
+pandocExtensions :: Extensions
+pandocExtensions = extensionsFromList
   [ Ext_footnotes
   , Ext_inline_notes
   , Ext_pandoc_title_block
@@ -157,8 +177,8 @@ pandocExtensions = Set.fromList
   , Ext_shortcut_reference_links
   ]
 
-plainExtensions :: Set Extension
-plainExtensions = Set.fromList
+plainExtensions :: Extensions
+plainExtensions = extensionsFromList
   [ Ext_table_captions
   , Ext_implicit_figures
   , Ext_simple_tables
@@ -175,8 +195,8 @@ plainExtensions = Set.fromList
   , Ext_strikeout
   ]
 
-phpMarkdownExtraExtensions :: Set Extension
-phpMarkdownExtraExtensions = Set.fromList
+phpMarkdownExtraExtensions :: Extensions
+phpMarkdownExtraExtensions = extensionsFromList
   [ Ext_footnotes
   , Ext_pipe_tables
   , Ext_raw_html
@@ -190,8 +210,8 @@ phpMarkdownExtraExtensions = Set.fromList
   , Ext_shortcut_reference_links
   ]
 
-githubMarkdownExtensions :: Set Extension
-githubMarkdownExtensions = Set.fromList
+githubMarkdownExtensions :: Extensions
+githubMarkdownExtensions = extensionsFromList
   [ Ext_angle_brackets_escapable
   , Ext_pipe_tables
   , Ext_raw_html
@@ -208,8 +228,8 @@ githubMarkdownExtensions = Set.fromList
   , Ext_shortcut_reference_links
   ]
 
-multimarkdownExtensions :: Set Extension
-multimarkdownExtensions = Set.fromList
+multimarkdownExtensions :: Extensions
+multimarkdownExtensions = extensionsFromList
   [ Ext_pipe_tables
   , Ext_raw_html
   , Ext_markdown_attribute
@@ -237,8 +257,8 @@ multimarkdownExtensions = Set.fromList
   , Ext_subscript
   ]
 
-strictExtensions :: Set Extension
-strictExtensions = Set.fromList
+strictExtensions :: Extensions
+strictExtensions = extensionsFromList
   [ Ext_raw_html
   , Ext_shortcut_reference_links
   ]
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 56681f4b2..e18ee7d5f 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -45,17 +45,15 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions
                            , isEnabled
                            ) where
 import Text.Pandoc.Extensions
-import qualified Data.Set as Set
 import Data.Default
 import Text.Pandoc.Highlighting (Style, pygments)
 import Text.Pandoc.MediaBag (MediaBag)
 import Data.Data (Data)
-import Data.Set (Set)
 import Data.Typeable (Typeable)
 import GHC.Generics (Generic)
 
 data ReaderOptions = ReaderOptions{
-         readerExtensions      :: Set Extension  -- ^ Syntax extensions
+         readerExtensions      :: Extensions  -- ^ Syntax extensions
        , readerSmart           :: Bool -- ^ Smart punctuation
        , readerStandalone      :: Bool -- ^ Standalone document with header
        , readerParseRaw        :: Bool -- ^ Parse raw HTML, LaTeX
@@ -165,7 +163,7 @@ data WriterOptions = WriterOptions
   , writerNumberSections   :: Bool   -- ^ Number sections in LaTeX
   , writerNumberOffset     :: [Int]  -- ^ Starting number for section, subsection, ...
   , writerSectionDivs      :: Bool   -- ^ Put sections in div tags in HTML
-  , writerExtensions       :: Set Extension -- ^ Markdown extensions that can be used
+  , writerExtensions       :: Extensions -- ^ Markdown extensions that can be used
   , writerReferenceLinks   :: Bool   -- ^ Use reference links in writing markdown, rst
   , writerDpi              :: Int    -- ^ Dpi for pixel to/from inch/cm conversions
   , writerWrapText         :: WrapOption  -- ^ Option for wrapping text
@@ -248,4 +246,4 @@ instance Default WriterOptions where
 
 -- | Returns True if the given extension is enabled.
 isEnabled :: Extension -> WriterOptions -> Bool
-isEnabled ext opts = ext `Set.member` (writerExtensions opts)
+isEnabled ext opts = ext `extensionEnabled` (writerExtensions opts)
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index f53db1cbc..cd85fe58e 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1031,11 +1031,11 @@ defaultParserState =
 
 -- | Succeed only if the extension is enabled.
 guardEnabled :: (Stream s m a,  HasReaderOptions st) => Extension -> ParserT s st m ()
-guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
+guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext
 
 -- | Succeed only if the extension is disabled.
 guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
-guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
+guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext
 
 -- | Update the position on which the last string ended.
 updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
@@ -1090,10 +1090,10 @@ registerHeader (ident,classes,kvs) header' = do
   ids <- extractIdentifierList <$> getState
   exts <- getOption readerExtensions
   let insert' = M.insertWith (\_new old -> old)
-  if null ident && Ext_auto_identifiers `Set.member` exts
+  if null ident && Ext_auto_identifiers `extensionEnabled` exts
      then do
        let id' = uniqueIdent (B.toList header') ids
-       let id'' = if Ext_ascii_identifiers `Set.member` exts
+       let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
                      then catMaybes $ map toAsciiChar id'
                      else id'
        updateState $ updateIdentifierList $ Set.insert id'
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a6156e497..e0694f38a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -61,7 +61,6 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockT
 import Control.Monad
 import System.FilePath (takeExtension, addExtension)
 import Text.HTML.TagSoup
-import qualified Data.Set as Set
 import Text.Printf (printf)
 import Debug.Trace (trace)
 import Data.Monoid ((<>))
@@ -310,11 +309,11 @@ toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
          | otherwise         -> MetaInlines xs
         Pandoc _ bs           -> MetaBlocks bs
     endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
-    opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts}
-    meta_exts = Set.fromList [ Ext_pandoc_title_block
-                             , Ext_mmd_title_block
-                             , Ext_yaml_metadata_block
-                             ]
+    opts' = opts{readerExtensions =
+                  disableExtension Ext_pandoc_title_block $
+                  disableExtension Ext_mmd_title_block $
+                  disableExtension Ext_yaml_metadata_block $
+                  readerExtensions opts }
 
 yamlToMeta :: PandocMonad m => ReaderOptions -> Yaml.Value -> m MetaValue
 yamlToMeta opts (Yaml.String t) = toMetaValue opts t
@@ -534,8 +533,9 @@ header = setextHeader <|> atxHeader <?> "header"
 atxChar :: PandocMonad m => MarkdownParser m Char
 atxChar = do
   exts <- getOption readerExtensions
-  return $ if Set.member Ext_literate_haskell exts
-    then '=' else '#'
+  return $ if extensionEnabled Ext_literate_haskell exts
+              then '='
+              else '#'
 
 atxHeader :: PandocMonad m => MarkdownParser m (F Blocks)
 atxHeader = try $ do
@@ -1013,7 +1013,7 @@ para = try $ do
               result' <- result
               case B.toList result' of
                    [Image attr alt (src,tit)]
-                     | Ext_implicit_figures `Set.member` exts ->
+                     | Ext_implicit_figures `extensionEnabled` exts ->
                         -- the fig: at beginning of title indicates a figure
                         return $ B.para $ B.singleton
                                $ Image attr alt (src,'f':'i':'g':':':tit)
-- 
cgit v1.2.3