4 files changed, 203 insertions, 153 deletions
diff --git a/doc/org.md b/doc/org.md
index b65782107..eb929bb7c 100644
--- a/doc/org.md
+++ b/doc/org.md
@@ -14,13 +14,13 @@ The following export keywords are supported:
 
 - AUTHOR: comma-separated list of author(s); fully supported.
 
-- CREATOR: output generator; passed as metadata entry, but
-  ignored by most output formats.
+- CREATOR: output generator; passed as plain-text metadata entry
+  `creator`, but not used by any default templates.
 
 - DATE: creation or publication date; well supported by pandoc.
 
-- EMAIL: author email address; passed as metadata entry, but not
-  included in most output formats.
+- EMAIL: author email address; passed as plain-text metadata
+  field `email`, but not used by any default templates.
 
 - LANGUAGE: currently unsupported; use `#+LANG:` instead.
 
@@ -51,11 +51,12 @@ occur.
 
   Pandoc follows the LaTeX exporter in that it allows markup in
   the description. In contrast, the Org-mode HTML exporter treats
-  reads the description as plain text.
+  the description as plain text.
 
-- LATEX_HEADER: arbitrary lines to add to the document's preamble.
-  Contrary to Org-mode, these lines are not inserted before the
-  hyperref settings, but close to the end of the preamble.
+- LATEX\_HEADER and LATEX_HEADER_EXTRA: arbitrary lines to add to
+  the document's preamble. Contrary to Org-mode, these lines are
+  not inserted before the hyperref settings, but close to the end
+  of the preamble.
 
   The contents of this option are stored as a list of raw LaTeX
   lines in the `header-includes` metadata field.
@@ -74,10 +75,13 @@ occur.
 
 - SUBTITLE: the document's subtitle; fully supported.
 
-- HTML_HEAD: arbitrary lines to add to the HTML document's head;
-  fully supported.
+  The content of this option is stored as inlines in the
+  `subtitle` metadata field.
+
+- HTML\_HEAD and HTML\_HEAD\_EXTRA: arbitrary lines to add to the
+  HTML document's head; fully supported.
 
-  The contents of this option are stored as a list of raw HTML
+  The contents of these options are stored as a list of raw HTML
   lines in the `header-includes` metadata field.
 
 Pandoc-specific options
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 2fbb26d31..0e2f49a83 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -76,6 +76,7 @@ block = choice [ mempty <$ blanklines
                , list
                , latexFragment
                , noteBlock
+               , rawOrgLine
                , paraOrPlain
                ] <?> "block"
 
@@ -559,6 +560,8 @@ include = try $ do
        | otherwise        -> Para content
       _ -> blk
 
+-- | Parses a meta line which defines a raw block. Currently recognized:
+-- @#+LATEX:@, @#+HTML:@, @#+TEXINFO:@, and @#+BEAMER@.
 rawExportLine :: PandocMonad m => OrgParser m Blocks
 rawExportLine = try $ do
   metaLineStart
@@ -567,6 +570,14 @@ rawExportLine = try $ do
     then B.rawBlock key <$> anyLine
     else mzero
 
+-- | Parses any meta line, i.e., a line starting with @#+@, into a raw
+-- org block. This should be the last resort when trying to parse
+-- keywords. Leading spaces are discarded.
+rawOrgLine :: PandocMonad m => OrgParser m (F Blocks)
+rawOrgLine = do
+  line <- metaLineStart *> anyLine
+  returnF $ B.rawBlock "org" $ ("#+" <> line)
+
 commentLine :: Monad m => OrgParser m Blocks
 commentLine = commentLineStart *> anyLine *> pure mempty
 
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 3e77b3f42..43de04ffa 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -22,7 +22,7 @@ import Text.Pandoc.Readers.Org.Inlines
 import Text.Pandoc.Readers.Org.ParserState
 import Text.Pandoc.Readers.Org.Parsing
 
-import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue)
+import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
@@ -57,13 +57,13 @@ removeMeta key meta' =
 -- The order, in which blocks are tried, makes sure that we're not looking at
 -- the beginning of a block, so we don't need to check for it
 metaLine :: PandocMonad m => OrgParser m Blocks
-metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
+metaLine = try $ mempty <$ metaLineStart <* keywordLine
 
-declarationLine :: PandocMonad m => OrgParser m ()
-declarationLine = try $ do
+keywordLine :: PandocMonad m => OrgParser m ()
+keywordLine = try $ do
   key   <- T.toLower <$> metaKey
-  case Map.lookup key exportSettingHandlers of
-    Nothing -> () <$ anyLine
+  case Map.lookup key keywordHandlers of
+    Nothing -> fail $ "Unknown keyword: " ++ T.unpack key
     Just hd -> hd
 
 metaKey :: Monad m => OrgParser m Text
@@ -71,28 +71,55 @@ metaKey = T.toLower <$> many1Char (noneOf ": \n\r")
                     <*  char ':'
                     <*  skipSpaces
 
-exportSettingHandlers :: PandocMonad m => Map Text (OrgParser m ())
-exportSettingHandlers = Map.fromList
-  [ ("result"     , fmap pure anyLine `parseThen` discard)
-    -- Common settings
-  , ("author"     , lineOfInlines `parseThen` collectLines "author")
-  , ("date"       , lineOfInlines `parseThen` setField "date")
-  , ("description", lineOfInlines `parseThen` collectLines "description")
-  , ("keywords"   , lineOfInlines `parseThen` collectLines "keywords")
-  , ("title"      , lineOfInlines `parseThen` collectLines "title")
-    -- LaTeX
-  , ("latex_class", fmap pure anyLine `parseThen` setField "documentclass")
-  , ("latex_class_options", (pure . T.filter (`notElem` ("[]" :: String)) <$> anyLine)
-                            `parseThen` setField "classoption")
-  , ("latex_header", metaExportSnippet "latex" `parseThen`
-                     collectAsList "header-includes")
-    -- HTML
-  , ("html_head"  , metaExportSnippet "html" `parseThen`
-                    collectAsList "header-includes")
-    -- pandoc-specific
-  , ("nocite"         , lineOfInlines `parseThen` collectLines "nocite")
-  , ("header-includes", lineOfInlines `parseThen` collectLines "header-includes")
-  , ("institute"      , lineOfInlines `parseThen` collectLines "institute")
+infix 0 ~~>
+(~~>) :: a -> b -> (a, b)
+a ~~> b = (a, b)
+
+keywordHandlers :: PandocMonad m => Map Text (OrgParser m ())
+keywordHandlers = Map.fromList
+  [ "author" ~~> lineOfInlines `parseThen` collectLines "author"
+  , "creator" ~~> fmap pure anyLine `parseThen` B.setMeta "creator"
+  , "date" ~~> lineOfInlines `parseThen` B.setMeta "date"
+  , "description" ~~> lineOfInlines `parseThen` collectLines "description"
+  , "email" ~~> fmap pure anyLine `parseThen` B.setMeta "email"
+  , "exclude_tags" ~~> tagList >>= updateState . setExcludedTags
+  , "header-includes" ~~>
+    lineOfInlines `parseThen` collectLines "header-includes"
+  -- HTML-specifix export settings
+  , "html_head" ~~>
+    metaExportSnippet "html" `parseThen` collectAsList "header-includes"
+  , "html_head_extra" ~~>
+    metaExportSnippet "html" `parseThen` collectAsList "header-includes"
+  , "institute" ~~> lineOfInlines `parseThen` collectLines "institute"
+  -- topic keywords
+  , "keywords" ~~> lineOfInlines `parseThen` collectLines "keywords"
+  -- LaTeX-specific export settings
+  , "latex_class" ~~> fmap pure anyLine `parseThen` B.setMeta "documentclass"
+  , "latex_class_options" ~~>
+      (pure . T.filter (`notElem` ("[]" :: String)) <$> anyLine)
+      `parseThen` B.setMeta "classoption"
+  , "latex_header" ~~>
+      metaExportSnippet "latex" `parseThen` collectAsList "header-includes"
+  , "latex_header_extra" ~~>
+      metaExportSnippet "latex" `parseThen` collectAsList "header-includes"
+  -- link and macro
+  , "link" ~~> addLinkFormatter
+  , "macro" ~~> macroDefinition >>= updateState . registerMacro
+  -- pandoc-specific way to include references in the bibliography
+  , "nocite" ~~> lineOfInlines `parseThen` collectLines "nocite"
+  -- compact way to set export settings
+  , "options"  ~~> exportSettings
+  -- pandoc-specific way to configure emphasis recognition
+  , "pandoc-emphasis-post" ~~> emphChars >>= updateState . setEmphasisPostChar
+  , "pandoc-emphasis-pre" ~~> emphChars >>= updateState . setEmphasisPreChar
+  -- result markers (ignored)
+  , "result" ~~> void anyLine
+  , "select_tags" ~~> tagList >>= updateState . setSelectedTags
+  , "seq_todo" ~~> todoSequence >>= updateState . registerTodoSequence
+  , "subtitle" ~~> lineOfInlines `parseThen` collectLines "subtitle"
+  , "title" ~~> lineOfInlines `parseThen` collectLines "title"
+  , "todo" ~~> todoSequence >>= updateState . registerTodoSequence
+  , "typ_todo" ~~> todoSequence >>= updateState . registerTodoSequence
   ]
 
 parseThen :: PandocMonad m
@@ -104,9 +131,6 @@ parseThen p modMeta = do
   meta  <- orgStateMeta <$> getState
   updateState (\st -> st { orgStateMeta = modMeta <$> value <*> meta })
 
-discard :: a -> Meta -> Meta
-discard = const id
-
 collectLines :: Text -> Inlines -> Meta -> Meta
 collectLines key value meta =
   let value' = appendValue meta (B.toList value)
@@ -141,51 +165,25 @@ collectAsList key value meta =
                 Just x             -> [x]
                 _                  -> []
 
-setField :: ToMetaValue a => Text -> a -> Meta -> Meta
-setField field value meta = B.setMeta field (B.toMetaValue value) meta
-
 -- | Read an format specific meta definition
 metaExportSnippet :: Monad m => Text -> OrgParser m (F Inlines)
 metaExportSnippet format = pure . B.rawInline format <$> anyLine
 
---
--- export options
---
-optionLine :: PandocMonad m => OrgParser m ()
-optionLine = try $ do
-  key <- metaKey
-  case key of
-    "link"     -> parseLinkFormat >>= uncurry addLinkFormat
-    "options"  -> exportSettings
-    "todo"     -> todoSequence >>= updateState . registerTodoSequence
-    "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
-    "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
-    "macro"    -> macroDefinition >>= updateState . registerMacro
-    "exclude_tags" -> tagList >>= updateState . setExcludedTags
-    "select_tags" -> tagList >>= updateState . setSelectedTags
-    "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
-    "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
-    _          -> mzero
-
-addLinkFormat :: Monad m => Text
-              -> (Text -> Text)
-              -> OrgParser m ()
-addLinkFormat key formatter = updateState $ \s ->
-  let fs = orgStateLinkFormatters s
-  in s{ orgStateLinkFormatters = Map.insert key formatter fs }
-
-parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text)
-parseLinkFormat = try $ do
+-- | Parse a link type definition (like @wp https://en.wikipedia.org/wiki/@).
+addLinkFormatter :: Monad m => OrgParser m ()
+addLinkFormatter = try $ do
   linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces
-  linkSubst <- parseFormat
-  return (linkType, linkSubst)
+  formatter <- parseFormat
+  updateState $ \s ->
+    let fs = orgStateLinkFormatters s
+    in s{ orgStateLinkFormatters = Map.insert linkType formatter fs }
 
 -- | An ad-hoc, single-argument-only implementation of a printf-style format
 -- parser.
 parseFormat :: Monad m => OrgParser m (Text -> Text)
 parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
  where
-   -- inefficient, but who cares
+   -- inefficient
    replacePlain = try $ (\x -> T.concat . flip intersperse x)
                      <$> sequence [tillSpecifier 's', rest]
    replaceUrl   = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
@@ -224,6 +222,7 @@ setEmphasisPostChar csMb st =
   let postChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb
   in st { orgStateEmphasisPostChars = postChars }
 
+-- | Parses emphasis border character like @".,?!"@
 emphChars :: Monad m => OrgParser m (Maybe [Char])
 emphChars = do
   skipSpaces
@@ -234,16 +233,14 @@ lineOfInlines = do
   updateLastPreCharPos
   trimInlinesF . mconcat <$> manyTill inline newline
 
---
--- ToDo Sequences and Keywords
---
+-- | Parses ToDo sequences / keywords like @TODO DOING | DONE@.
 todoSequence :: Monad m => OrgParser m TodoSequence
 todoSequence = try $ do
   todoKws <- todoKeywords
   doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
   newline
-  -- There must be at least one DONE keyword. The last TODO keyword is taken if
-  -- necessary.
+  -- There must be at least one DONE keyword. The last TODO keyword is
+  -- taken if necessary.
   case doneKws of
     Just done  -> return $ keywordsToSequence todoKws done
     Nothing    -> case reverse todoKws of
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index c811f2363..0bd63b15d 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -93,6 +93,13 @@ tests =
         in Pandoc meta mempty
       ]
 
+    , "Subtitle" =:
+      T.unlines [ "#+SUBTITLE: Your Life in"
+                , "#+SUBTITLE: /Plain/ Text"
+                ] =?>
+      let subtitle = "Your Life in" <> softbreak <> emph "Plain" <> " Text"
+      in Pandoc (setMeta "subtitle" (toMetaValue subtitle) nullMeta) mempty
+
     , "Keywords" =:
       T.unlines [ "#+KEYWORDS: pandoc, testing,"
                 , "#+KEYWORDS: Org"
@@ -101,42 +108,117 @@ tests =
           meta = setMeta "keywords" (MetaInlines keywords) nullMeta
       in Pandoc meta mempty
 
-    , "LaTeX_headers options are translated to header-includes" =:
-      "#+LaTeX_header: \\usepackage{tikz}" =?>
-      let latexInlines = rawInline "latex" "\\usepackage{tikz}"
-          inclList = MetaList [MetaInlines (toList latexInlines)]
-          meta = setMeta "header-includes" inclList nullMeta
-      in Pandoc meta mempty
+    , "Institute" =:
+      "#+INSTITUTE: ACME Inc." =?>
+      Pandoc (setMeta "institute" ("ACME Inc." :: Inlines) nullMeta) mempty
 
-    , testGroup "LaTeX_CLASS"
-      [ "LaTeX_class option is translated to documentclass" =:
-        "#+LATEX_CLASS: article" =?>
-        let meta = setMeta "documentclass" (MetaString "article") nullMeta
+    , testGroup "LaTeX"
+      [ "LATEX_HEADER" =:
+        "#+LaTeX_header: \\usepackage{tikz}" =?>
+        let latexInlines = rawInline "latex" "\\usepackage{tikz}"
+            inclList = MetaList [MetaInlines (toList latexInlines)]
+            meta = setMeta "header-includes" inclList nullMeta
         in Pandoc meta mempty
 
-      , "last definition takes precedence" =:
-        T.unlines [ "#+LATEX_CLASS: this will not be used"
-                  , "#+LATEX_CLASS: report"
-                  ] =?>
-        let meta = setMeta "documentclass" (MetaString "report") nullMeta
+      , "LATEX_HEADER_EXTRA" =:
+        "#+LATEX_HEADER_EXTRA: \\usepackage{calc}" =?>
+        let latexInlines = rawInline "latex" "\\usepackage{calc}"
+            inclList = toMetaValue [latexInlines]
+        in Pandoc (setMeta "header-includes" inclList nullMeta) mempty
+
+      , testGroup "LaTeX_CLASS"
+        [ "stored as documentclass" =:
+          "#+LATEX_CLASS: article" =?>
+          let meta = setMeta "documentclass" (MetaString "article") nullMeta
+          in Pandoc meta mempty
+
+        , "last definition takes precedence" =:
+          T.unlines [ "#+LATEX_CLASS: this will not be used"
+                    , "#+LATEX_CLASS: report"
+                    ] =?>
+          let meta = setMeta "documentclass" (MetaString "report") nullMeta
+          in Pandoc meta mempty
+        ]
+
+      , "LATEX_CLASS_OPTIONS as classoption" =:
+        "#+LATEX_CLASS_OPTIONS: [a4paper]" =?>
+        let meta = setMeta "classoption" (MetaString "a4paper") nullMeta
         in Pandoc meta mempty
       ]
 
-    , "LaTeX_class_options is translated to classoption" =:
-      "#+LATEX_CLASS_OPTIONS: [a4paper]" =?>
-      let meta = setMeta "classoption" (MetaString "a4paper") nullMeta
-      in Pandoc meta mempty
+    , testGroup "HTML"
+      [ "HTML_HEAD values are added to header-includes" =:
+        "#+html_head: <meta/>" =?>
+        let html = rawInline "html" "<meta/>"
+            inclList = MetaList [MetaInlines (toList html)]
+            meta = setMeta "header-includes" inclList nullMeta
+        in Pandoc meta mempty
 
-    , "LaTeX_class_options is translated to classoption" =:
-      "#+html_head: <meta/>" =?>
-      let html = rawInline "html" "<meta/>"
-          inclList = MetaList [MetaInlines (toList html)]
-          meta = setMeta "header-includes" inclList nullMeta
-      in Pandoc meta mempty
+      , "HTML_HEAD_EXTRA behaves like HTML_HEAD" =:
+        T.unlines [ "#+HTML_HEAD: <meta name=\"generator\" content=\"pandoc\">"
+                  , "#+HTML_HEAD_EXTRA: <meta charset=\"utf-8\">"
+                  ] =?>
+        let generator = rawInline "html"
+                                  "<meta name=\"generator\" content=\"pandoc\">"
+            charset = rawInline "html" "<meta charset=\"utf-8\">"
+            inclList = toMetaValue [generator, charset]
+        in Pandoc (setMeta "header-includes" inclList nullMeta) mempty
+      ]
+    ]
 
-    , "Institute" =:
-      "#+INSTITUTE: ACME Inc." =?>
-      Pandoc (setMeta "institute" ("ACME Inc." :: Inlines) nullMeta) mempty
+  , testGroup "Non-export keywords"
+    [ testGroup "#+LINK"
+      [ "Link abbreviation" =:
+        T.unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s"
+                  , "[[wp:Org_mode][Wikipedia on Org-mode]]"
+                  ] =?>
+        para (link "https://en.wikipedia.org/wiki/Org_mode" ""
+               ("Wikipedia" <> space <> "on" <> space <> "Org-mode"))
+
+      , "Link abbreviation, defined after first use" =:
+        T.unlines [ "[[zl:non-sense][Non-sense articles]]"
+                  , "#+LINK: zl http://zeitlens.com/tags/%s.html"
+                  ] =?>
+        para (link "http://zeitlens.com/tags/non-sense.html" ""
+               ("Non-sense" <> space <> "articles"))
+
+      , "Link abbreviation, URL encoded arguments" =:
+        T.unlines [ "#+link: expl http://example.com/%h/foo"
+                  , "[[expl:Hello, World!][Moin!]]"
+                  ] =?>
+        para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")
+
+      , "Link abbreviation, append arguments" =:
+        T.unlines [ "#+link: expl http://example.com/"
+                  , "[[expl:foo][bar]]"
+                  ] =?>
+        para (link "http://example.com/foo" "" "bar")
+      ]
+
+    , testGroup "emphasis config"
+      [ "Changing pre and post chars for emphasis" =:
+        T.unlines [ "#+pandoc-emphasis-pre: \"[)\""
+                  , "#+pandoc-emphasis-post: \"]\\n\""
+                  , "([/emph/])*foo*"
+                  ] =?>
+        para ("([" <> emph "emph" <> "])" <> strong "foo")
+
+      , "setting an invalid value restores the default" =:
+        T.unlines [ "#+pandoc-emphasis-pre: \"[\""
+                  , "#+pandoc-emphasis-post: \"]\""
+                  , "#+pandoc-emphasis-pre:"
+                  , "#+pandoc-emphasis-post:"
+                  , "[/noemph/]"
+                  ] =?>
+        para "[/noemph/]"
+      ]
+
+    , "Unknown keyword" =:
+      T.unlines [ "#+UNKNOWN_KEYWORD: Chumbawamba"
+                , "#+ANOTHER_UNKNOWN: Blur"
+                ] =?>
+      rawBlock "org" "#+UNKNOWN_KEYWORD: Chumbawamba" <>
+      rawBlock "org" "#+ANOTHER_UNKNOWN: Blur"
     ]
 
   , "Properties drawer" =:
@@ -193,48 +275,4 @@ tests =
                 ] =?>
       (para (spanWith ("link-here", [], []) mempty <> "Target.") <>
        para (emph ("See" <> space <> "here!")))
-
-  , "Link abbreviation" =:
-      T.unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s"
-                , "[[wp:Org_mode][Wikipedia on Org-mode]]"
-                ] =?>
-      para (link "https://en.wikipedia.org/wiki/Org_mode" ""
-                  ("Wikipedia" <> space <> "on" <> space <> "Org-mode"))
-
-  , "Link abbreviation, defined after first use" =:
-      T.unlines [ "[[zl:non-sense][Non-sense articles]]"
-                , "#+LINK: zl http://zeitlens.com/tags/%s.html"
-                ] =?>
-      para (link "http://zeitlens.com/tags/non-sense.html" ""
-                  ("Non-sense" <> space <> "articles"))
-
-  , "Link abbreviation, URL encoded arguments" =:
-      T.unlines [ "#+link: expl http://example.com/%h/foo"
-                , "[[expl:Hello, World!][Moin!]]"
-                ] =?>
-      para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")
-
-  , "Link abbreviation, append arguments" =:
-      T.unlines [ "#+link: expl http://example.com/"
-                , "[[expl:foo][bar]]"
-                ] =?>
-      para (link "http://example.com/foo" "" "bar")
-
-  , testGroup "emphasis config"
-    [ "Changing pre and post chars for emphasis" =:
-        T.unlines [ "#+pandoc-emphasis-pre: \"[)\""
-                  , "#+pandoc-emphasis-post: \"]\\n\""
-                  , "([/emph/])*foo*"
-                  ] =?>
-        para ("([" <> emph "emph" <> "])" <> strong "foo")
-
-    , "setting an invalid value restores the default" =:
-        T.unlines [ "#+pandoc-emphasis-pre: \"[\""
-                  , "#+pandoc-emphasis-post: \"]\""
-                  , "#+pandoc-emphasis-pre:"
-                  , "#+pandoc-emphasis-post:"
-                  , "[/noemph/]"
-                  ] =?>
-        para "[/noemph/]"
-    ]
   ]