From 1ebaf6de117d74145a58d63a41a4c69b87aaa771 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 1 Jul 2016 21:17:55 +0200 Subject: Org reader: refactor comment tree handling Comment trees were handled after parsing, as pattern matching on lists is easier than matching on sequences. The new method of reading documents as trees allows for more elegant subtree removal. --- src/Text/Pandoc/Readers/Org.hs | 39 +---------------------------------- src/Text/Pandoc/Readers/Org/Blocks.hs | 21 ++++++++++++++++++- 2 files changed, 21 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index d593f856d..4e1c926da 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -52,41 +52,4 @@ parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- blockList meta' <- meta - return . Pandoc meta' $ removeUnwantedBlocks blocks' - where - removeUnwantedBlocks :: [Block] -> [Block] - removeUnwantedBlocks = dropCommentTrees . filter (/= Null) - --- | Drop COMMENT headers and the document tree below those headers. -dropCommentTrees :: [Block] -> [Block] -dropCommentTrees [] = [] -dropCommentTrees (b:bs) = - maybe (b:dropCommentTrees bs) - (dropCommentTrees . flip dropUntilHeaderAboveLevel bs) - (commentHeaderLevel b) - --- | Return the level of a header starting a comment or :noexport: tree and --- Nothing otherwise. -commentHeaderLevel :: Block -> Maybe Int -commentHeaderLevel blk = - case blk of - (Header level _ ((Str "COMMENT"):_)) -> Just level - (Header level _ title) | hasNoExportTag title -> Just level - _ -> Nothing - where - hasNoExportTag :: [Inline] -> Bool - hasNoExportTag = any isNoExportTag - - isNoExportTag :: Inline -> Bool - isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True - isNoExportTag _ = False - --- | Drop blocks until a header on or above the given level is seen -dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block] -dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n) - -isHeaderLevelLowerEq :: Int -> Block -> Bool -isHeaderLevelLowerEq n blk = - case blk of - (Header level _ _) -> n >= level - _ -> False + return $ Pandoc meta' blocks' diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 9ebb22d13..c9e9d2ced 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,5 +1,6 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE RecordWildCards #-} +{-# OPTIONS_GHC -fno-warn-overlapping-patterns #-} {- Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -52,6 +53,7 @@ import Data.Char ( isSpace, toLower, toUpper) import Data.List ( foldl', intersperse, isPrefixOf ) import qualified Data.Map as M import Data.Maybe ( fromMaybe, isNothing ) +import qualified Data.Sequence as S import Network.HTTP ( urlEncode ) -- @@ -132,7 +134,24 @@ headline lvl = try $ do -- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks headlineToBlocks :: Headline -> OrgParser Blocks -headlineToBlocks (Headline {..}) = do +headlineToBlocks hdln@(Headline {..}) = + case () of + _ | any isNoExportTag headlineTags -> return mempty + _ | isCommentTitle headlineText -> return mempty + _ -> headlineToHeader hdln + +isNoExportTag :: Tag -> Bool +isNoExportTag = (== toTag "noexport") + +-- | Check if the title starts with COMMENT. +-- FIXME: This accesses builder internals not intended for use in situations +-- as these. Replace once keyword parsing is supported. +isCommentTitle :: Inlines -> Bool +isCommentTitle xs = (B.Many . S.take 1 . B.unMany) xs == B.str "COMMENT" +isCommentTitle _ = False + +headlineToHeader :: Headline -> OrgParser Blocks +headlineToHeader (Headline {..}) = do let text = tagTitle headlineText headlineTags let propAttr = propertiesToAttr headlineProperties attr <- registerHeader propAttr headlineText -- cgit v1.2.3