From a26b3a2d6af8614e13299bbf477e28c5932ef680 Mon Sep 17 00:00:00 2001
From: Brian Leung <bkleung89@gmail.com>
Date: Fri, 5 Oct 2018 14:28:17 -0700
Subject: Org reader: Add partial support for `#+EXCLUDE_TAGS` option. (#4950)

Closes #4284.

Headers with the corresponding tags should not appear in the output.

If one or more of the specified tags contains a non-tag character
like `+`, Org-mode will not treat that as a valid tag, but will
nonetheless continue scanning for valid tags. That behavior is not
replicated in this patch; entering `cat+dog` as one of the entries in
`#+EXCLUDE_TAGS` and running the file through Pandoc will cause the
parser to fail and result in the only excluded tag being the default, `noexport`.
---
 src/Text/Pandoc/Readers/Org/DocumentTree.hs | 14 +++++++-------
 src/Text/Pandoc/Readers/Org/Meta.hs         | 11 +++++++++++
 src/Text/Pandoc/Readers/Org/ParserState.hs  |  6 ++++++
 src/Text/Pandoc/Readers/Org/Parsing.hs      |  8 ++++++++
 4 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org')

diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index a9df3b437..7d55892fe 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -43,6 +43,7 @@ import Text.Pandoc.Readers.Org.BlockStarts
 import Text.Pandoc.Readers.Org.ParserState
 import Text.Pandoc.Readers.Org.Parsing
 
+import qualified Data.Set as Set
 import qualified Text.Pandoc.Builder as B
 
 --
@@ -73,9 +74,6 @@ documentTree blocks inline = do
       , headlineChildren = headlines'
       }
 
-newtype Tag = Tag { fromTag :: String }
-  deriving (Show, Eq)
-
 -- | Create a tag containing the given string.
 toTag :: String -> Tag
 toTag = Tag
@@ -153,7 +151,7 @@ headline blocks inline lvl = try $ do
 
    headerTags :: Monad m => OrgParser m [Tag]
    headerTags = try $
-     let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+     let tag = orgTagWord <* char ':'
      in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
 
 -- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
@@ -163,15 +161,17 @@ headlineToBlocks hdln = do
   let tags = headlineTags hdln
   let text = headlineText hdln
   let level = headlineLevel hdln
+  shouldNotExport <- hasDoNotExportTag tags
   case () of
-    _ | any isNoExportTag tags -> return mempty
+    _ | shouldNotExport -> return mempty
     _ | any isArchiveTag  tags -> archivedHeadlineToBlocks hdln
     _ | isCommentTitle text    -> return mempty
     _ | maxLevel <= level      -> headlineToHeaderWithList hdln
     _ | otherwise              -> headlineToHeaderWithContents hdln
 
-isNoExportTag :: Tag -> Bool
-isNoExportTag = (== toTag "noexport")
+hasDoNotExportTag :: Monad m => [Tag] -> OrgParser m Bool
+hasDoNotExportTag tags = containsExcludedTag . orgStateExcludedTags <$> getState
+  where containsExcludedTag s = any (`Set.member` s) tags
 
 isArchiveTag :: Tag -> Bool
 isArchiveTag = (== toTag "ARCHIVE")
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 965e33d94..921cd27e0 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -52,6 +52,7 @@ import Data.Char (toLower)
 import Data.List (intersperse)
 import Data.Maybe (fromMaybe)
 import qualified Data.Map as M
+import qualified Data.Set as Set
 import Network.HTTP (urlEncode)
 
 -- | Returns the current meta, respecting export options.
@@ -158,6 +159,7 @@ optionLine = try $ do
     "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
     "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
     "macro"    -> macroDefinition >>= updateState . registerMacro
+    "exclude_tags" -> excludedTagSet >>= updateState . setExcludedTags
     "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
     "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
     _          -> mzero
@@ -190,6 +192,15 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
    rest            = manyTill anyChar         (eof <|> () <$ oneOf "\n\r")
    tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
 
+excludedTagSet :: Monad m => OrgParser m (Set.Set Tag)
+excludedTagSet = do
+  skipSpaces
+  Set.fromList . map Tag <$>
+                      many (orgTagWord <* skipSpaces) <* newline
+
+setExcludedTags :: Set.Set Tag -> OrgParserState -> OrgParserState
+setExcludedTags tagSet st = st { orgStateExcludedTags = tagSet }
+
 setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState
 setEmphasisPreChar csMb st =
   let preChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index d33602575..381d4c5ee 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState
   , defaultOrgParserState
   , OrgParserLocal (..)
   , OrgNoteRecord
+  , Tag(..)
   , HasReaderOptions (..)
   , HasQuoteContext (..)
   , HasMacros (..)
@@ -88,6 +89,9 @@ type OrgNoteTable = [OrgNoteRecord]
 type OrgLinkFormatters = M.Map String (String -> String)
 -- | Macro expander function
 type MacroExpander = [String] -> String
+-- | Tag
+newtype Tag = Tag { fromTag :: String }
+  deriving (Show, Eq, Ord)
 
 -- | The states in which a todo item can be
 data TodoState = Todo | Done
@@ -113,6 +117,7 @@ data OrgParserState = OrgParserState
                                            -- specified here.
   , orgStateEmphasisPostChars    :: [Char] -- ^ Chars allowed at after emphasis
   , orgStateEmphasisNewlines     :: Maybe Int
+  , orgStateExcludedTags         :: Set.Set Tag
   , orgStateExportSettings       :: ExportSettings
   , orgStateHeaderMap            :: M.Map Inlines String
   , orgStateIdentifiers          :: Set.Set String
@@ -183,6 +188,7 @@ defaultOrgParserState = OrgParserState
   , orgStateEmphasisCharStack = []
   , orgStateEmphasisNewlines = Nothing
   , orgStateExportSettings = def
+  , orgStateExcludedTags = Set.singleton $ Tag "noexport"
   , orgStateHeaderMap = M.empty
   , orgStateIdentifiers = Set.empty
   , orgStateIncludeFiles = []
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index b37b36624..52a346e36 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -46,6 +46,8 @@ module Text.Pandoc.Readers.Org.Parsing
   , orgArgKey
   , orgArgWord
   , orgArgWordChar
+  , orgTagWord
+  , orgTagWordChar
   -- * Re-exports from Text.Pandoc.Parser
   , ParserContext (..)
   , many1Till
@@ -220,3 +222,9 @@ orgArgWord = many1 orgArgWordChar
 -- | Chars treated as part of a word in plists.
 orgArgWordChar :: Monad m => OrgParser m Char
 orgArgWordChar = alphaNum <|> oneOf "-_"
+
+orgTagWord :: Monad m => OrgParser m String
+orgTagWord = many1 orgTagWordChar
+
+orgTagWordChar :: Monad m => OrgParser m Char
+orgTagWordChar = alphaNum <|> oneOf "@%#_"
-- 
cgit v1.2.3