diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-01-14 13:06:27 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-01-25 17:07:42 +0100 |
commit | 3876b91448ad8d279f5d5a9d217e00cf4583e14b (patch) | |
tree | ab1050da4d77dd666f9698d4517a0e0b786ba227 | |
parent | 14272521600f9a616c07333261fa258b3dc5c487 (diff) | |
download | pandoc-3876b91448ad8d279f5d5a9d217e00cf4583e14b.tar.gz |
Make Extensions a custom type instead of a Set Extension.
The type is implemented in terms of an underlying bitset
which should be more efficient.
API change: from Text.Pandoc.Extensions export Extensions,
emptyExtensions, extensionsFromList, enableExtension, disableExtension,
extensionEnabled.
-rw-r--r-- | pandoc.cabal | 1 | ||||
-rw-r--r-- | pandoc.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc.hs | 36 | ||||
-rw-r--r-- | src/Text/Pandoc/Extensions.hs | 54 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 18 | ||||
-rw-r--r-- | tests/Tests/Readers/Markdown.hs | 7 |
8 files changed, 76 insertions, 59 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index 80a35378f..3e87fec97 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -257,7 +257,6 @@ Library containers >= 0.1 && < 0.6, unordered-containers >= 0.2 && < 0.3, array >= 0.3 && < 0.6, - largeword >= 1.2 && < 1.3, parsec >= 3.1 && < 3.2, mtl >= 2.2 && < 2.3, filepath >= 1.1 && < 1.5, @@ -47,7 +47,6 @@ import System.Environment ( getArgs, getProgName, getEnvironment ) import System.Exit ( ExitCode (..), exitSuccess ) import System.FilePath import System.Console.GetOpt -import qualified Data.Set as Set import Data.Char ( toLower, toUpper ) import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort ) import System.Directory ( getAppUserDataDirectory, findExecutable, @@ -1295,7 +1294,7 @@ options = (NoArg (\_ -> do let showExt x = drop 4 (show x) ++ - if x `Set.member` pandocExtensions + if extensionEnabled x pandocExtensions then " +" else " -" mapM_ (UTF8.hPutStrLn stdout . showExt) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b94d05718..86f70b293 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -184,15 +184,13 @@ import Text.Pandoc.Class import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) -import Data.Set (Set) -import qualified Data.Set as Set import Text.Parsec import Text.Parsec.Error import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Except (throwError) parseFormatSpec :: String - -> Either ParseError (String, Set Extension -> Set Extension) + -> Either ParseError (String, Extensions -> Extensions) parseFormatSpec = parse formatSpec "" where formatSpec = do name <- formatName @@ -208,8 +206,8 @@ parseFormatSpec = parse formatSpec "" | name == "lhs" -> return Ext_literate_haskell | otherwise -> fail $ "Unknown extension: " ++ name return $ case polarity of - '-' -> Set.delete ext - _ -> Set.insert ext + '-' -> disableExtension ext + _ -> enableExtension ext -- TODO: when we get the PandocMonad stuff all sorted out, -- we can simply these types considerably. Errors/MediaBag can be @@ -330,25 +328,29 @@ writers = [ ,("tei" , StringWriter writeTEI) ] -getDefaultExtensions :: String -> Set Extension +getDefaultExtensions :: String -> Extensions getDefaultExtensions "markdown_strict" = strictExtensions getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = plainExtensions -getDefaultExtensions "org" = Set.fromList [Ext_citations, - Ext_auto_identifiers] -getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers] -getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, - Ext_native_divs, - Ext_native_spans] +getDefaultExtensions "org" = extensionsFromList + [Ext_citations, Ext_auto_identifiers] +getDefaultExtensions "textile" = extensionsFromList + [Ext_auto_identifiers] +getDefaultExtensions "html" = extensionsFromList + [Ext_auto_identifiers, + Ext_native_divs, + Ext_native_spans] getDefaultExtensions "html5" = getDefaultExtensions "html" -getDefaultExtensions "epub" = Set.fromList [Ext_raw_html, - Ext_native_divs, - Ext_native_spans, - Ext_epub_html_exts] -getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] +getDefaultExtensions "epub" = extensionsFromList + [Ext_raw_html, + Ext_native_divs, + Ext_native_spans, + Ext_epub_html_exts] +getDefaultExtensions _ = extensionsFromList + [Ext_auto_identifiers] -- | Retrieve reader based on formatSpec (format+extensions). getReader :: PandocMonad m => String -> Either String (Reader m) diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 91cd045de..68d76792c 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -29,6 +29,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Data structures and functions for representing markup extensions. -} module Text.Pandoc.Extensions ( Extension(..) + , Extensions + , emptyExtensions + , extensionsFromList + , extensionEnabled + , enableExtension + , disableExtension , pandocExtensions , plainExtensions , strictExtensions @@ -36,15 +42,29 @@ module Text.Pandoc.Extensions ( Extension(..) , githubMarkdownExtensions , multimarkdownExtensions ) where -import Data.LargeWord (Word256) -import Data.Bits () -import Data.Set (Set) -import qualified Data.Set as Set +import Data.Word (Word64) +import Data.Bits (testBit, setBit, clearBit) import Data.Data (Data) import Data.Typeable (Typeable) import GHC.Generics (Generic) -newtype Extensions = Extensions { unExtensions :: Word256 } +newtype Extensions = Extensions Word64 + deriving (Show, Read, Eq, Ord, Bounded, Data, Typeable, Generic) + +extensionsFromList :: [Extension] -> Extensions +extensionsFromList = foldr enableExtension emptyExtensions + +emptyExtensions :: Extensions +emptyExtensions = Extensions 0 + +extensionEnabled :: Extension -> Extensions -> Bool +extensionEnabled x (Extensions exts) = testBit exts (fromEnum x) + +enableExtension :: Extension -> Extensions -> Extensions +enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x)) + +disableExtension :: Extension -> Extensions -> Extensions +disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) -- | Individually selectable syntax extensions. data Extension = @@ -112,8 +132,8 @@ data Extension = | Ext_shortcut_reference_links -- ^ Shortcut reference links deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) -pandocExtensions :: Set Extension -pandocExtensions = Set.fromList +pandocExtensions :: Extensions +pandocExtensions = extensionsFromList [ Ext_footnotes , Ext_inline_notes , Ext_pandoc_title_block @@ -157,8 +177,8 @@ pandocExtensions = Set.fromList , Ext_shortcut_reference_links ] -plainExtensions :: Set Extension -plainExtensions = Set.fromList +plainExtensions :: Extensions +plainExtensions = extensionsFromList [ Ext_table_captions , Ext_implicit_figures , Ext_simple_tables @@ -175,8 +195,8 @@ plainExtensions = Set.fromList , Ext_strikeout ] -phpMarkdownExtraExtensions :: Set Extension -phpMarkdownExtraExtensions = Set.fromList +phpMarkdownExtraExtensions :: Extensions +phpMarkdownExtraExtensions = extensionsFromList [ Ext_footnotes , Ext_pipe_tables , Ext_raw_html @@ -190,8 +210,8 @@ phpMarkdownExtraExtensions = Set.fromList , Ext_shortcut_reference_links ] -githubMarkdownExtensions :: Set Extension -githubMarkdownExtensions = Set.fromList +githubMarkdownExtensions :: Extensions +githubMarkdownExtensions = extensionsFromList [ Ext_angle_brackets_escapable , Ext_pipe_tables , Ext_raw_html @@ -208,8 +228,8 @@ githubMarkdownExtensions = Set.fromList , Ext_shortcut_reference_links ] -multimarkdownExtensions :: Set Extension -multimarkdownExtensions = Set.fromList +multimarkdownExtensions :: Extensions +multimarkdownExtensions = extensionsFromList [ Ext_pipe_tables , Ext_raw_html , Ext_markdown_attribute @@ -237,8 +257,8 @@ multimarkdownExtensions = Set.fromList , Ext_subscript ] -strictExtensions :: Set Extension -strictExtensions = Set.fromList +strictExtensions :: Extensions +strictExtensions = extensionsFromList [ Ext_raw_html , Ext_shortcut_reference_links ] diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 56681f4b2..e18ee7d5f 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -45,17 +45,15 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions , isEnabled ) where import Text.Pandoc.Extensions -import qualified Data.Set as Set import Data.Default import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.MediaBag (MediaBag) import Data.Data (Data) -import Data.Set (Set) import Data.Typeable (Typeable) import GHC.Generics (Generic) data ReaderOptions = ReaderOptions{ - readerExtensions :: Set Extension -- ^ Syntax extensions + readerExtensions :: Extensions -- ^ Syntax extensions , readerSmart :: Bool -- ^ Smart punctuation , readerStandalone :: Bool -- ^ Standalone document with header , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX @@ -165,7 +163,7 @@ data WriterOptions = WriterOptions , writerNumberSections :: Bool -- ^ Number sections in LaTeX , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ... , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , writerExtensions :: Set Extension -- ^ Markdown extensions that can be used + , writerExtensions :: Extensions -- ^ Markdown extensions that can be used , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions , writerWrapText :: WrapOption -- ^ Option for wrapping text @@ -248,4 +246,4 @@ instance Default WriterOptions where -- | Returns True if the given extension is enabled. isEnabled :: Extension -> WriterOptions -> Bool -isEnabled ext opts = ext `Set.member` (writerExtensions opts) +isEnabled ext opts = ext `extensionEnabled` (writerExtensions opts) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index f53db1cbc..cd85fe58e 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1031,11 +1031,11 @@ defaultParserState = -- | Succeed only if the extension is enabled. guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext +guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext -- | Succeed only if the extension is disabled. guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext +guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext -- | Update the position on which the last string ended. updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m () @@ -1090,10 +1090,10 @@ registerHeader (ident,classes,kvs) header' = do ids <- extractIdentifierList <$> getState exts <- getOption readerExtensions let insert' = M.insertWith (\_new old -> old) - if null ident && Ext_auto_identifiers `Set.member` exts + if null ident && Ext_auto_identifiers `extensionEnabled` exts then do let id' = uniqueIdent (B.toList header') ids - let id'' = if Ext_ascii_identifiers `Set.member` exts + let id'' = if Ext_ascii_identifiers `extensionEnabled` exts then catMaybes $ map toAsciiChar id' else id' updateState $ updateIdentifierList $ Set.insert id' diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a6156e497..e0694f38a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -61,7 +61,6 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockT import Control.Monad import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup -import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) import Data.Monoid ((<>)) @@ -310,11 +309,11 @@ toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) | otherwise -> MetaInlines xs Pandoc _ bs -> MetaBlocks bs endsWithNewline t = T.pack "\n" `T.isSuffixOf` t - opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} - meta_exts = Set.fromList [ Ext_pandoc_title_block - , Ext_mmd_title_block - , Ext_yaml_metadata_block - ] + opts' = opts{readerExtensions = + disableExtension Ext_pandoc_title_block $ + disableExtension Ext_mmd_title_block $ + disableExtension Ext_yaml_metadata_block $ + readerExtensions opts } yamlToMeta :: PandocMonad m => ReaderOptions -> Yaml.Value -> m MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t @@ -534,8 +533,9 @@ header = setextHeader <|> atxHeader <?> "header" atxChar :: PandocMonad m => MarkdownParser m Char atxChar = do exts <- getOption readerExtensions - return $ if Set.member Ext_literate_haskell exts - then '=' else '#' + return $ if extensionEnabled Ext_literate_haskell exts + then '=' + else '#' atxHeader :: PandocMonad m => MarkdownParser m (F Blocks) atxHeader = try $ do @@ -1013,7 +1013,7 @@ para = try $ do result' <- result case B.toList result' of [Image attr alt (src,tit)] - | Ext_implicit_figures `Set.member` exts -> + | Ext_implicit_figures `extensionEnabled` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton $ Image attr alt (src,'f':'i':'g':':':tit) diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index b43a0ec49..3bc8f34ae 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -6,7 +6,6 @@ import Test.Framework import Tests.Helpers import Text.Pandoc.Arbitrary() import Text.Pandoc.Builder -import qualified Data.Set as Set import Text.Pandoc markdown :: String -> Pandoc @@ -16,7 +15,7 @@ markdownSmart :: String -> Pandoc markdownSmart = purely $ readMarkdown def { readerSmart = True } markdownCDL :: String -> Pandoc -markdownCDL = purely $ readMarkdown def { readerExtensions = Set.insert +markdownCDL = purely $ readMarkdown def { readerExtensions = enableExtension Ext_compact_definition_lists $ readerExtensions def } markdownGH :: String -> Pandoc @@ -30,7 +29,7 @@ infix 4 =: testBareLink :: (String, Inlines) -> Test testBareLink (inp, ils) = test (purely $ readMarkdown def{ readerExtensions = - Set.fromList [Ext_autolink_bare_uris, Ext_raw_html] }) + extensionsFromList [Ext_autolink_bare_uris, Ext_raw_html] }) inp (inp, doc $ para ils) autolink :: String -> Inlines @@ -303,7 +302,7 @@ tests = [ testGroup "inline code" =?> para (note (para "See [^1]")) ] , testGroup "lhs" - [ test (purely $ readMarkdown def{ readerExtensions = Set.insert + [ test (purely $ readMarkdown def{ readerExtensions = enableExtension Ext_literate_haskell $ readerExtensions def }) "inverse bird tracks and html" $ "> a\n\n< b\n\n<div>\n" |