From 5ef315cc6db868a11bd0c3e887b8c55eb2216662 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 29 Jun 2020 21:19:34 +0200
Subject: Org reader: keep unknown keyword lines as raw org

The lines of unknown keywords, like `#+SOMEWORD: value` are no longer
read as metadata, but kept as raw `org` blocks. This ensures that more
information is retained when round-tripping org-mode files;
additionally, this change makes it possible to support non-standard org
extensions via filters.
---
 src/Text/Pandoc/Readers/Org/Blocks.hs | 11 +++++++++++
 src/Text/Pandoc/Readers/Org/Meta.hs   |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers/Org')

diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 2fbb26d31..0e2f49a83 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -76,6 +76,7 @@ block = choice [ mempty <$ blanklines
                , list
                , latexFragment
                , noteBlock
+               , rawOrgLine
                , paraOrPlain
                ] <?> "block"
 
@@ -559,6 +560,8 @@ include = try $ do
        | otherwise        -> Para content
       _ -> blk
 
+-- | Parses a meta line which defines a raw block. Currently recognized:
+-- @#+LATEX:@, @#+HTML:@, @#+TEXINFO:@, and @#+BEAMER@.
 rawExportLine :: PandocMonad m => OrgParser m Blocks
 rawExportLine = try $ do
   metaLineStart
@@ -567,6 +570,14 @@ rawExportLine = try $ do
     then B.rawBlock key <$> anyLine
     else mzero
 
+-- | Parses any meta line, i.e., a line starting with @#+@, into a raw
+-- org block. This should be the last resort when trying to parse
+-- keywords. Leading spaces are discarded.
+rawOrgLine :: PandocMonad m => OrgParser m (F Blocks)
+rawOrgLine = do
+  line <- metaLineStart *> anyLine
+  returnF $ B.rawBlock "org" $ ("#+" <> line)
+
 commentLine :: Monad m => OrgParser m Blocks
 commentLine = commentLineStart *> anyLine *> pure mempty
 
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 7d46841b3..43de04ffa 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -57,13 +57,13 @@ removeMeta key meta' =
 -- The order, in which blocks are tried, makes sure that we're not looking at
 -- the beginning of a block, so we don't need to check for it
 metaLine :: PandocMonad m => OrgParser m Blocks
-metaLine = mempty <$ metaLineStart <* keywordLine
+metaLine = try $ mempty <$ metaLineStart <* keywordLine
 
 keywordLine :: PandocMonad m => OrgParser m ()
 keywordLine = try $ do
   key   <- T.toLower <$> metaKey
   case Map.lookup key keywordHandlers of
-    Nothing -> () <$ anyLine -- discard unknown lines
+    Nothing -> fail $ "Unknown keyword: " ++ T.unpack key
     Just hd -> hd
 
 metaKey :: Monad m => OrgParser m Text
-- 
cgit v1.2.3