From a340c7249f8e19d36ee4a68663b4c97e0893292b Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 22 May 2016 16:33:31 +0200
Subject: Org reader: extract parsing function to module

The Org-mode reader uses many functions defined in the
`Text.Pandoc.Parsing` utility module.  Some of the functions are
overwritten with versions adapted to Org-mode idiosyncrasies.  These
special functions, as well as the normal Pandoc versions, are combined
in a single module to increase the ease of use.

This leads to decoupling of Org-mode and Pandoc and hence to slightly
cleaner code.  The downside is code-bloat due to repeated import/export
statements.
---
 src/Text/Pandoc/Readers/Org.hs             |  86 ++------------
 src/Text/Pandoc/Readers/Org/ParserState.hs |   7 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs     | 182 +++++++++++++++++++++++++++++
 3 files changed, 197 insertions(+), 78 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/Org/Parsing.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 621e7107f..fd811c078 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -29,27 +29,23 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
+import           Text.Pandoc.Readers.Org.ParserState
+import           Text.Pandoc.Readers.Org.Parsing
+
 import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder ( Inlines, Blocks )
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Compat.Monoid ((<>))
 import           Text.Pandoc.Error
 import           Text.Pandoc.Options
-import qualified Text.Pandoc.Parsing as P
-import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
-                                            , anyLine, blanklines, newline
-                                            , orderedListMarker
-                                            , parseFromString
-                                            )
 import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
-import           Text.Pandoc.Readers.Org.ParserState
 import           Text.Pandoc.Shared (compactify', compactify'DL)
 import           Text.TeXMath (readTeX, writePandoc, DisplayType(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 
 import           Control.Arrow (first)
 import           Control.Monad (foldM, guard, mplus, mzero, when)
-import           Control.Monad.Reader ( Reader, runReader )
+import           Control.Monad.Reader ( runReader )
 import           Data.Char (isAlphaNum, isSpace, toLower, toUpper)
 import           Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
 import qualified Data.Map as M
@@ -63,9 +59,6 @@ readOrg :: ReaderOptions -- ^ Reader options
         -> Either PandocError Pandoc
 readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
 
--- | The parser used to read org files.
-type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
-
 --
 -- Functions acting on the parser state
 --
@@ -73,14 +66,6 @@ recordAnchorId :: String -> OrgParser ()
 recordAnchorId i = updateState $ \s ->
   s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
 
-updateLastForbiddenCharPos :: OrgParser ()
-updateLastForbiddenCharPos = getPosition >>= \p ->
-  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-
-updateLastPreCharPos :: OrgParser ()
-updateLastPreCharPos = getPosition >>= \p ->
-  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
-
 pushToInlineCharStack :: Char -> OrgParser ()
 pushToInlineCharStack c = updateState $ \s ->
   s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
@@ -258,57 +243,6 @@ isHeaderLevelLowerEq n blk =
     _                  -> False
 
 
---
--- Adaptions and specializations of parsing utilities
---
-
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
-parseFromString :: OrgParser a -> String -> OrgParser a
-parseFromString parser str' = do
-  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
-  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
-  result <- P.parseFromString parser str'
-  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
-  return result
-
--- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
-newline :: OrgParser Char
-newline =
-  P.newline
-       <* updateLastPreCharPos
-       <* updateLastForbiddenCharPos
-
--- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: OrgParser [Char]
-blanklines =
-  P.blanklines
-       <* updateLastPreCharPos
-       <* updateLastForbiddenCharPos
-
-anyLine :: OrgParser String
-anyLine =
-  P.anyLine
-    <* updateLastPreCharPos
-    <* updateLastForbiddenCharPos
-
--- | Succeeds when we're in list context.
-inList :: OrgParser ()
-inList = do
-  ctx <- orgStateParserContext <$> getState
-  guard (ctx == ListItemState)
-
--- | Parse in different context
-withContext :: ParserContext -- ^ New parser context
-            -> OrgParser a   -- ^ Parser to run in that context
-            -> OrgParser a
-withContext context parser = do
-  oldContext <- orgStateParserContext <$> getState
-  updateState $ \s -> s{ orgStateParserContext = context }
-  result <- parser
-  updateState $ \s -> s{ orgStateParserContext = oldContext }
-  return result
-
 --
 -- parsing blocks
 --
@@ -398,7 +332,7 @@ keyValues = try $
    endOfValue :: OrgParser ()
    endOfValue =
      lookAhead $ (() <$ try (many1 spaceChar <* key))
-              <|> () <$ P.newline
+              <|> () <$ newline
 
 
 --
@@ -675,7 +609,7 @@ propertiesDrawer = try $ do
    key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
 
    value :: OrgParser String
-   value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline)
+   value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
 
 keyValuesToAttr :: [(String, String)] -> Attr
 keyValuesToAttr kvs =
@@ -696,7 +630,7 @@ keyValuesToAttr kvs =
 figure :: OrgParser (F Blocks)
 figure = try $ do
   figAttrs <- blockAttributes
-  src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
+  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
   guard . not . isNothing . blockAttrCaption $ figAttrs
   guard (isImageFilename src)
   let figName    = fromMaybe mempty $ blockAttrName figAttrs
@@ -785,7 +719,7 @@ parseFormat = try $ do
 header :: OrgParser (F Blocks)
 header = try $ do
   level    <- headerStart
-  title    <- manyTill inline (lookAhead $ optional headerTags <* P.newline)
+  title    <- manyTill inline (lookAhead $ optional headerTags <* newline)
   tags     <- option [] headerTags
   newline
   propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
@@ -1083,7 +1017,7 @@ definitionListItem parseMarkerGetLength = try $ do
   return $ (,) <$> term' <*> fmap (:[]) contents'
  where
    definitionMarker =
-     spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
+     spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
 
 
 -- parse raw text for one list item, excluding start marker and continuations
@@ -1559,7 +1493,7 @@ many1TillNOrLessNewlines n p end = try $
    nMoreLines k        cs = try $ (final k cs <|> rest k cs)
                                   >>= uncurry nMoreLines
    final _ cs = (\x -> (Nothing,      cs ++ x)) <$> try finalLine
-   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
+   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
    finalLine = try $ manyTill p end
    minus1 k = k - 1
    oneOrMore cs = guard (not $ null cs) *> return cs
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 6a902cd46..e648a883e 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Define the Org-mode parser state.
 -}
 module Text.Pandoc.Readers.Org.ParserState
-  ( OrgParserState(..)
-  , OrgParserLocal(..)
+  ( OrgParserState (..)
+  , OrgParserLocal (..)
   , OrgNoteRecord
+  , HasReaderOptions (..)
+  , HasQuoteContext (..)
   , F(..)
   , askF
   , asksF
@@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse
 modifyExportSettings setter val state =
   state { orgStateExportSettings = setter val . orgStateExportSettings $ state }
 
+
 --
 -- Parser state reader
 --
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
new file mode 100644
index 000000000..efe2ae25f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -0,0 +1,182 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Org.Options
+   Copyright   : Copyright (C) 2014-2016 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Org-mode parsing utilities.
+
+Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
+functions are adapted to Org-mode specific functionality.
+-}
+module Text.Pandoc.Readers.Org.Parsing
+  ( OrgParser
+  , anyLine
+  , blanklines
+  , newline
+  , parseFromString
+  , inList
+  , withContext
+  , updateLastForbiddenCharPos
+  , updateLastPreCharPos
+  -- * Re-exports from Text.Pandoc.Parser
+  , ParserContext (..)
+  , many1Till
+  , notFollowedBy'
+  , spaceChar
+  , nonspaceChar
+  , skipSpaces
+  , blankline
+  , enclosed
+  , stringAnyCase
+  , charsInBalanced
+  , uri
+  , withRaw
+  , readWithM
+  , guardEnabled
+  , updateLastStrPos
+  , notAfterString
+  , ParserState (..)
+  , registerHeader
+  , QuoteContext (..)
+  , singleQuoteStart
+  , singleQuoteEnd
+  , doubleQuoteStart
+  , doubleQuoteEnd
+  , dash
+  , ellipses
+  , citeKey
+  -- * Re-exports from Text.Pandoc.Parsec
+  , runParser
+  , getInput
+  , char
+  , letter
+  , digit
+  , alphaNum
+  , skipMany1
+  , spaces
+  , anyChar
+  , string
+  , count
+  , eof
+  , noneOf
+  , oneOf
+  , lookAhead
+  , notFollowedBy
+  , many
+  , many1
+  , manyTill
+  , (<|>)
+  , (<?>)
+  , choice
+  , try
+  , sepBy
+  , sepBy1
+  , option
+  , optional
+  , optionMaybe
+  , getState
+  , updateState
+  , SourcePos
+  , getPosition
+  ) where
+
+import           Text.Pandoc.Readers.Org.ParserState
+
+import qualified Text.Pandoc.Parsing as P
+import           Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
+                                            , parseFromString )
+
+import           Control.Monad ( guard )
+import           Control.Monad.Reader ( Reader )
+
+-- | The parser used to read org files.
+type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+
+--
+-- Adaptions and specializations of parsing utilities
+--
+
+-- | Parse any line of text
+anyLine :: OrgParser String
+anyLine =
+  P.anyLine
+    <* updateLastPreCharPos
+    <* updateLastForbiddenCharPos
+
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+  result <- P.parseFromString parser str'
+  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+  return result
+
+-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
+newline :: OrgParser Char
+newline =
+  P.newline
+       <* updateLastPreCharPos
+       <* updateLastForbiddenCharPos
+
+-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
+blanklines :: OrgParser [Char]
+blanklines =
+  P.blanklines
+       <* updateLastPreCharPos
+       <* updateLastForbiddenCharPos
+
+-- | Succeeds when we're in list context.
+inList :: OrgParser ()
+inList = do
+  ctx <- orgStateParserContext <$> getState
+  guard (ctx == ListItemState)
+
+-- | Parse in different context
+withContext :: ParserContext -- ^ New parser context
+            -> OrgParser a   -- ^ Parser to run in that context
+            -> OrgParser a
+withContext context parser = do
+  oldContext <- orgStateParserContext <$> getState
+  updateState $ \s -> s{ orgStateParserContext = context }
+  result <- parser
+  updateState $ \s -> s{ orgStateParserContext = oldContext }
+  return result
+
+--
+-- Parser state update functions
+--
+
+-- | Set the current position as the last position at which a forbidden char
+-- was found (i.e. a character which is not allowed at the inner border of
+-- markup).
+updateLastForbiddenCharPos :: OrgParser ()
+updateLastForbiddenCharPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
+
+-- | Set the current parser position as the position at which a character was
+-- seen which allows inline markup to follow.
+updateLastPreCharPos :: OrgParser ()
+updateLastPreCharPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
-- 
cgit v1.2.3


From 39e8b4276e6d88d5cbb943d04c866dde9bf6473c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 22 May 2016 16:52:06 +0200
Subject: Org reader: extract inline parser to module

Inline parsing code is moved to a separate module.  Parsers for block
starts are extracted as well, as those are used in the `endline` parser.

This is part of the Org-mode reader cleanup effort.
---
 pandoc.cabal                               |   2 +
 src/Text/Pandoc/Readers/Org.hs             | 797 ++---------------------------
 src/Text/Pandoc/Readers/Org/BlockStarts.hs | 112 ++++
 src/Text/Pandoc/Readers/Org/Inlines.hs     | 715 ++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/Org/Parsing.hs     |  19 +
 5 files changed, 889 insertions(+), 756 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/Org/BlockStarts.hs
 create mode 100644 src/Text/Pandoc/Readers/Org/Inlines.hs

(limited to 'src/Text')

diff --git a/pandoc.cabal b/pandoc.cabal
index 61b5043ba..7286bd890 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -393,6 +393,8 @@ Library
                    Text.Pandoc.Readers.Odt.Generic.XMLConverter,
                    Text.Pandoc.Readers.Odt.Arrows.State,
                    Text.Pandoc.Readers.Odt.Arrows.Utils,
+                   Text.Pandoc.Readers.Org.BlockStarts,
+                   Text.Pandoc.Readers.Org.Inlines,
                    Text.Pandoc.Readers.Org.ParserState,
                    Text.Pandoc.Readers.Org.Parsing,
                    Text.Pandoc.Writers.Shared,
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index fd811c078..605d2220e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,5 +1,4 @@
 {-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE OverloadedStrings #-}
 {-
 Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 
@@ -29,6 +28,8 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
+import           Text.Pandoc.Readers.Org.BlockStarts
+import           Text.Pandoc.Readers.Org.Inlines
 import           Text.Pandoc.Readers.Org.ParserState
 import           Text.Pandoc.Readers.Org.Parsing
 
@@ -38,19 +39,16 @@ import           Text.Pandoc.Definition
 import           Text.Pandoc.Compat.Monoid ((<>))
 import           Text.Pandoc.Error
 import           Text.Pandoc.Options
-import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
-import           Text.Pandoc.Shared (compactify', compactify'DL)
-import           Text.TeXMath (readTeX, writePandoc, DisplayType(..))
-import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
+import           Text.Pandoc.Shared ( compactify', compactify'DL )
 
-import           Control.Arrow (first)
-import           Control.Monad (foldM, guard, mplus, mzero, when)
+import           Control.Arrow ( first )
+import           Control.Monad ( foldM, guard, mzero )
 import           Control.Monad.Reader ( runReader )
-import           Data.Char (isAlphaNum, isSpace, toLower, toUpper)
-import           Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
+import           Data.Char ( toLower, toUpper)
+import           Data.List ( foldl', intersperse, isPrefixOf )
 import qualified Data.Map as M
 import           Data.Maybe ( fromMaybe, isNothing )
-import           Network.HTTP (urlEncode)
+import           Network.HTTP ( urlEncode )
 
 
 -- | Parse org-mode string and return a Pandoc document.
@@ -59,54 +57,6 @@ readOrg :: ReaderOptions -- ^ Reader options
         -> Either PandocError Pandoc
 readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
 
---
--- Functions acting on the parser state
---
-recordAnchorId :: String -> OrgParser ()
-recordAnchorId i = updateState $ \s ->
-  s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
-
-pushToInlineCharStack :: Char -> OrgParser ()
-pushToInlineCharStack c = updateState $ \s ->
-  s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
-
-popInlineCharStack :: OrgParser ()
-popInlineCharStack = updateState $ \s ->
-  s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
-
-surroundingEmphasisChar :: OrgParser [Char]
-surroundingEmphasisChar =
-  take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
-
-startEmphasisNewlinesCounting :: Int -> OrgParser ()
-startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
-  s{ orgStateEmphasisNewlines = Just maxNewlines }
-
-decEmphasisNewlinesCount :: OrgParser ()
-decEmphasisNewlinesCount = updateState $ \s ->
-  s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
-
-newlinesCountWithinLimits :: OrgParser Bool
-newlinesCountWithinLimits = do
-  st <- getState
-  return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
-
-resetEmphasisNewlines :: OrgParser ()
-resetEmphasisNewlines = updateState $ \s ->
-  s{ orgStateEmphasisNewlines = Nothing }
-
-addLinkFormat :: String
-              -> (String -> String)
-              -> OrgParser ()
-addLinkFormat key formatter = updateState $ \s ->
-  let fs = orgStateLinkFormatters s
-  in s{ orgStateLinkFormatters = M.insert key formatter fs }
-
-addToNotesTable :: OrgNoteRecord -> OrgParser ()
-addToNotesTable note = do
-  oldnotes <- orgStateNotes' <$> getState
-  updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
-
 --
 -- Export Settings
 --
@@ -259,7 +209,7 @@ block = choice [ mempty <$ blanklines
                , genericDrawer
                , specialLine
                , header
-               , return <$> hline
+               , horizontalRule
                , list
                , latexFragment
                , noteBlock
@@ -457,9 +407,6 @@ indentWith num = do
 
 type SwitchOption = (Char, Maybe String)
 
-orgArgWord :: OrgParser String
-orgArgWord = many1 orgArgWordChar
-
 -- | Parse code block arguments
 -- TODO: We currently don't handle switches.
 codeHeaderArgs :: OrgParser ([String], [(String, String)])
@@ -474,7 +421,10 @@ codeHeaderArgs = try $ do
          , map toRundocAttrib (("language", language) : parameters)
          )
     else ([ pandocLang ], parameters)
- where hasRundocParameters = not . null
+ where
+   hasRundocParameters = not . null
+   toRundocAttrib = first ("rundoc-" ++)
+
 
 switch :: OrgParser SwitchOption
 switch = try $ simpleSwitch <|> lineNumbersSwitch
@@ -508,17 +458,6 @@ blockOption = try $ do
   paramValue <- option "yes" orgParamValue
   return (argKey, paramValue)
 
-inlineBlockOption :: OrgParser (String, String)
-inlineBlockOption = try $ do
-  argKey <- orgArgKey
-  paramValue <- option "yes" orgInlineParamValue
-  return (argKey, paramValue)
-
-orgArgKey :: OrgParser String
-orgArgKey = try $
-  skipSpaces *> char ':'
-             *> many1 orgArgWordChar
-
 orgParamValue :: OrgParser String
 orgParamValue = try $
   skipSpaces
@@ -526,19 +465,6 @@ orgParamValue = try $
     *> many1 (noneOf "\t\n\r ")
     <* skipSpaces
 
-orgInlineParamValue :: OrgParser String
-orgInlineParamValue = try $
-  skipSpaces
-    *> notFollowedBy (char ':')
-    *> many1 (noneOf "\t\n\r ]")
-    <* skipSpaces
-
-orgArgWordChar :: OrgParser Char
-orgArgWordChar = alphaNum <|> oneOf "-_"
-
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first ("rundoc-" ++)
-
 commaEscaped :: String -> String
 commaEscaped (',':cs@('*':_))     = cs
 commaEscaped (',':cs@('#':'+':_)) = cs
@@ -552,7 +478,10 @@ exampleCode :: String -> Blocks
 exampleCode = B.codeBlockWith ("", ["example"], [])
 
 exampleLine :: OrgParser String
-exampleLine = try $ skipSpaces *> string ": " *> anyLine
+exampleLine = try $ exampleLineStart *> anyLine
+
+horizontalRule :: OrgParser (F Blocks)
+horizontalRule = return B.horizontalRule <$ try hline
 
 
 --
@@ -582,11 +511,6 @@ genericDrawer = try $ do
   drawerDiv :: String -> F Blocks -> F Blocks
   drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
 
-drawerStart :: OrgParser String
-drawerStart = try $
-  skipSpaces *> drawerName <* skipSpaces <* newline
- where drawerName = char ':' *> manyTill nonspaceChar (char ':')
-
 drawerLine :: OrgParser String
 drawerLine = anyLine
 
@@ -639,31 +563,38 @@ figure = try $ do
   let attr       = (mempty, mempty, figKeyVals)
   return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
  where
+   withFigPrefix :: String -> String
    withFigPrefix cs =
      if "fig:" `isPrefixOf` cs
      then cs
      else "fig:" ++ cs
 
+   selfTarget :: OrgParser String
+   selfTarget = try $ char '[' *> linkTarget <* char ']'
+
+
 --
 -- Comments, Options and Metadata
 --
+
+addLinkFormat :: String
+              -> (String -> String)
+              -> OrgParser ()
+addLinkFormat key formatter = updateState $ \s ->
+  let fs = orgStateLinkFormatters s
+  in s{ orgStateLinkFormatters = M.insert key formatter fs }
+
 specialLine :: OrgParser (F Blocks)
 specialLine = fmap return . try $ metaLine <|> commentLine
 
-metaLine :: OrgParser Blocks
-metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
-
 -- The order, in which blocks are tried, makes sure that we're not looking at
 -- the beginning of a block, so we don't need to check for it
-metaLineStart :: OrgParser ()
-metaLineStart = try $ skipSpaces <* string "#+"
+metaLine :: OrgParser Blocks
+metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
 
 commentLine :: OrgParser Blocks
 commentLine = commentLineStart *> anyLine *> pure mempty
 
-commentLineStart :: OrgParser ()
-commentLineStart = try $ skipSpaces <* string "# "
-
 declarationLine :: OrgParser ()
 declarationLine = try $ do
   key <- metaKey
@@ -741,23 +672,6 @@ header = try $ do
           *> many1 tag
           <* skipSpaces
 
-headerStart :: OrgParser Int
-headerStart = try $
-  (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
-
-
--- Don't use (or need) the reader wrapper here, we want hline to be
--- @show@able.  Otherwise we can't use it with @notFollowedBy'@.
-
--- | Horizontal Line (five -- dashes or more)
-hline :: OrgParser Blocks
-hline = try $ do
-  skipSpaces
-  string "-----"
-  many (char '-')
-  skipSpaces
-  newline
-  return B.horizontalRule
 
 --
 -- Tables
@@ -793,9 +707,6 @@ orgToPandocTable :: OrgTable
 orgToPandocTable (OrgTable aligns heads lns) caption =
   B.table caption (zip aligns $ repeat 0) heads lns
 
-tableStart :: OrgParser Char
-tableStart = try $ skipSpaces *> char '|'
-
 tableRows :: OrgParser [OrgTableRow]
 tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
 
@@ -899,25 +810,12 @@ latexFragment = try $ do
                               , "\\end{", e, "}\n"
                               ]
 
-latexEnvStart :: OrgParser String
-latexEnvStart = try $ do
-  skipSpaces *> string "\\begin{"
-             *> latexEnvName
-             <* string "}"
-             <* blankline
-
 latexEnd :: String -> OrgParser ()
 latexEnd envName = try $
   () <$ skipSpaces
      <* string ("\\end{" ++ envName ++ "}")
      <* blankline
 
--- | Parses a LaTeX environment name.
-latexEnvName :: OrgParser String
-latexEnvName = try $ do
-  mappend <$> many1 alphaNum
-          <*> option "" (string "*")
-
 
 --
 -- Footnote defintions
@@ -942,7 +840,7 @@ paraOrPlain = try $ do
   -- is directly followed by a list item, in which case the block is read as
   -- plain text.
   try (guard nl
-       *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart))
+       *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
        *> return (B.para <$> ils))
     <|>  (return (B.plain <$> ils))
 
@@ -971,38 +869,21 @@ orderedList :: OrgParser (F Blocks)
 orderedList = fmap B.orderedList . fmap compactify' . sequence
               <$> many1 (listItem orderedListStart)
 
-genericListStart :: OrgParser String
-                 -> OrgParser Int
-genericListStart listMarker = try $
-  (+) <$> (length <$> many spaceChar)
-      <*> (length <$> listMarker <* many1 spaceChar)
-
--- parses bullet list marker. maybe we know the indent level
-bulletListStart :: OrgParser Int
-bulletListStart = bulletListStart' Nothing
-
 bulletListStart' :: Maybe Int -> OrgParser Int
 -- returns length of bulletList prefix, inclusive of marker
 bulletListStart' Nothing  = do ind <- length <$> many spaceChar
-                               when (ind == 0) $ notFollowedBy (char '*')
-                               oneOf bullets
-                               many1 spaceChar
+                               oneOf (bullets $ ind == 0)
+                               skipSpaces1
                                return (ind + 1)
- -- Unindented lists are legal, but they can't use '*' bullets
- -- We return n to maintain compatibility with the generic listItem
 bulletListStart' (Just n) = do count (n-1) spaceChar
-                               when (n == 1) $ notFollowedBy (char '*')
-                               oneOf bullets
+                               oneOf (bullets $ n == 1)
                                many1 spaceChar
                                return n
 
-bullets :: String
-bullets = "*+-"
-
-orderedListStart :: OrgParser Int
-orderedListStart = genericListStart orderedListMarker
-  -- Ordered list markers allowed in org-mode
-  where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+-- Unindented lists are legal, but they can't use '*' bullets.
+-- We return n to maintain compatibility with the generic listItem.
+bullets :: Bool -> String
+bullets unindented = if unindented then "+-" else "*+-"
 
 definitionListItem :: OrgParser Int
                    -> OrgParser (F (Inlines, [Blocks]))
@@ -1040,602 +921,6 @@ listContinuation markerLength = try $
               <*> many blankline)
  where listLine = try $ indentWith markerLength *> anyLineNewline
 
+-- | Parse any line, include the final newline in the output.
 anyLineNewline :: OrgParser String
 anyLineNewline = (++ "\n") <$> anyLine
-
-
---
--- inline
---
-
-inline :: OrgParser (F Inlines)
-inline =
-  choice [ whitespace
-         , linebreak
-         , cite
-         , footnote
-         , linkOrImage
-         , anchor
-         , inlineCodeBlock
-         , str
-         , endline
-         , emph
-         , strong
-         , strikeout
-         , underline
-         , code
-         , math
-         , displayMath
-         , verbatim
-         , subscript
-         , superscript
-         , inlineLaTeX
-         , smart
-         , symbol
-         ] <* (guard =<< newlinesCountWithinLimits)
-  <?> "inline"
-
-parseInlines :: OrgParser (F Inlines)
-parseInlines = trimInlinesF . mconcat <$> many1 inline
-
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
-
-
-whitespace :: OrgParser (F Inlines)
-whitespace = pure B.space <$ skipMany1 spaceChar
-                          <* updateLastPreCharPos
-                          <* updateLastForbiddenCharPos
-             <?> "whitespace"
-
-linebreak :: OrgParser (F Inlines)
-linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
-
-str :: OrgParser (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
-      <* updateLastStrPos
-
--- | An endline character that can be treated as a space, not a structural
--- break.  This should reflect the values of the Emacs variable
--- @org-element-pagaraph-separate@.
-endline :: OrgParser (F Inlines)
-endline = try $ do
-  newline
-  notFollowedBy blankline
-  notFollowedBy' exampleLine
-  notFollowedBy' hline
-  notFollowedBy' noteMarker
-  notFollowedBy' tableStart
-  notFollowedBy' drawerStart
-  notFollowedBy' headerStart
-  notFollowedBy' metaLineStart
-  notFollowedBy' latexEnvStart
-  notFollowedBy' commentLineStart
-  notFollowedBy' bulletListStart
-  notFollowedBy' orderedListStart
-  decEmphasisNewlinesCount
-  guard =<< newlinesCountWithinLimits
-  updateLastPreCharPos
-  return . return $ B.softbreak
-
-cite :: OrgParser (F Inlines)
-cite = try $ do
-  guardEnabled Ext_citations
-  (cs, raw) <- withRaw normalCite
-  return $ (flip B.cite (B.text raw)) <$> cs
-
-normalCite :: OrgParser (F [Citation])
-normalCite = try $  char '['
-                 *> skipSpaces
-                 *> citeList
-                 <* skipSpaces
-                 <* char ']'
-
-citeList :: OrgParser (F [Citation])
-citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
-
-citation :: OrgParser (F Citation)
-citation = try $ do
-  pref <- prefix
-  (suppress_author, key) <- citeKey
-  suff <- suffix
-  return $ do
-    x <- pref
-    y <- suff
-    return $ Citation{ citationId      = key
-                     , citationPrefix  = B.toList x
-                     , citationSuffix  = B.toList y
-                     , citationMode    = if suppress_author
-                                            then SuppressAuthor
-                                            else NormalCitation
-                     , citationNoteNum = 0
-                     , citationHash    = 0
-                     }
- where
-   prefix = trimInlinesF . mconcat <$>
-            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
-   suffix = try $ do
-     hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
-     skipSpaces
-     rest <- trimInlinesF . mconcat <$>
-             many (notFollowedBy (oneOf ";]") *> inline)
-     return $ if hasSpace
-              then (B.space <>) <$> rest
-              else rest
-
-footnote :: OrgParser (F Inlines)
-footnote = try $ inlineNote <|> referencedNote
-
-inlineNote :: OrgParser (F Inlines)
-inlineNote = try $ do
-  string "[fn:"
-  ref <- many alphaNum
-  char ':'
-  note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
-  when (not $ null ref) $
-       addToNotesTable ("fn:" ++ ref, note)
-  return $ B.note <$> note
-
-referencedNote :: OrgParser (F Inlines)
-referencedNote = try $ do
-  ref <- noteMarker
-  return $ do
-    notes <- asksF orgStateNotes'
-    case lookup ref notes of
-      Nothing   -> return $ B.str $ "[" ++ ref ++ "]"
-      Just contents  -> do
-        st <- askF
-        let contents' = runF contents st{ orgStateNotes' = [] }
-        return $ B.note contents'
-
-noteMarker :: OrgParser String
-noteMarker = try $ do
-  char '['
-  choice [ many1Till digit (char ']')
-         , (++) <$> string "fn:"
-                <*> many1Till (noneOf "\n\r\t ") (char ']')
-         ]
-
-linkOrImage :: OrgParser (F Inlines)
-linkOrImage = explicitOrImageLink
-              <|> selflinkOrImage
-              <|> angleLink
-              <|> plainLink
-              <?> "link or image"
-
-explicitOrImageLink :: OrgParser (F Inlines)
-explicitOrImageLink = try $ do
-  char '['
-  srcF   <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
-  title  <- enclosedRaw (char '[') (char ']')
-  title' <- parseFromString (mconcat <$> many inline) title
-  char ']'
-  return $ do
-    src <- srcF
-    if isImageFilename title
-      then pure $ B.link src "" $ B.image title mempty mempty
-      else linkToInlinesF src =<< title'
-
-selflinkOrImage :: OrgParser (F Inlines)
-selflinkOrImage = try $ do
-  src <- char '[' *> linkTarget <* char ']'
-  return $ linkToInlinesF src (B.str src)
-
-plainLink :: OrgParser (F Inlines)
-plainLink = try $ do
-  (orig, src) <- uri
-  returnF $ B.link src "" (B.str orig)
-
-angleLink :: OrgParser (F Inlines)
-angleLink = try $ do
-  char '<'
-  link <- plainLink
-  char '>'
-  return link
-
-selfTarget :: OrgParser String
-selfTarget = try $ char '[' *> linkTarget <* char ']'
-
-linkTarget :: OrgParser String
-linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
-
-possiblyEmptyLinkTarget :: OrgParser String
-possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-
-applyCustomLinkFormat :: String -> OrgParser (F String)
-applyCustomLinkFormat link = do
-  let (linkType, rest) = break (== ':') link
-  return $ do
-    formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
-    return $ maybe link ($ drop 1 rest) formatter
-
--- | Take a link and return a function which produces new inlines when given
--- description inlines.
-linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF linkStr =
-  case linkStr of
-    ""      -> pure . B.link mempty ""       -- wiki link (empty by convention)
-    ('#':_) -> pure . B.link linkStr ""      -- document-local fraction
-    _       -> case cleanLinkString linkStr of
-                 (Just cleanedLink) -> if isImageFilename cleanedLink
-                                       then const . pure $ B.image cleanedLink "" ""
-                                       else pure . B.link cleanedLink ""
-                 Nothing -> internalLink linkStr  -- other internal link
-
--- | Cleanup and canonicalize a string describing a link.  Return @Nothing@ if
--- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
-  case s of
-    '/':_                  -> Just $ "file://" ++ s  -- absolute path
-    '.':'/':_              -> Just s                 -- relative path
-    '.':'.':'/':_          -> Just s                 -- relative path
-    -- Relative path or URL (file schema)
-    'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
-    _ | isUrl s            -> Just s                 -- URL
-    _                      -> Nothing
- where
-   isUrl :: String -> Bool
-   isUrl cs =
-     let (scheme, path) = break (== ':') cs
-     in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
-          && not (null path)
-
-isImageFilename :: String -> Bool
-isImageFilename filename =
-  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
-  (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
-   ':' `notElem` filename)
- where
-   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
-   protocols = [ "file", "http", "https" ]
-
-internalLink :: String -> Inlines -> F Inlines
-internalLink link title = do
-  anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
-  if anchorB
-    then return $ B.link ('#':link) "" title
-    else return $ B.emph title
-
--- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
--- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
--- @org-target-regexp@, which is fairly liberal.  Since no link is created if
--- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
--- an anchor.
-
-anchor :: OrgParser (F Inlines)
-anchor =  try $ do
-  anchorId <- parseAnchor
-  recordAnchorId anchorId
-  returnF $ B.spanWith (solidify anchorId, [], []) mempty
- where
-       parseAnchor = string "<<"
-                     *> many1 (noneOf "\t\n\r<>\"' ")
-                     <* string ">>"
-                     <* skipSpaces
-
--- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
--- the org function @org-export-solidify-link-text@.
-
-solidify :: String -> String
-solidify = map replaceSpecialChar
- where replaceSpecialChar c
-           | isAlphaNum c    = c
-           | c `elem` ("_.-:" :: String) = c
-           | otherwise       = '-'
-
--- | Parses an inline code block and marks it as an babel block.
-inlineCodeBlock :: OrgParser (F Inlines)
-inlineCodeBlock = try $ do
-  string "src_"
-  lang <- many1 orgArgWordChar
-  opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
-  inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
-  let attrClasses = [translateLang lang, rundocBlockClass]
-  let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
-  returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
-
-enclosedByPair :: Char          -- ^ opening char
-               -> Char          -- ^ closing char
-               -> OrgParser a   -- ^ parser
-               -> OrgParser [a]
-enclosedByPair s e p = char s *> many1Till p (char e)
-
-emph      :: OrgParser (F Inlines)
-emph      = fmap B.emph         <$> emphasisBetween '/'
-
-strong    :: OrgParser (F Inlines)
-strong    = fmap B.strong       <$> emphasisBetween '*'
-
-strikeout :: OrgParser (F Inlines)
-strikeout = fmap B.strikeout    <$> emphasisBetween '+'
-
--- There is no underline, so we use strong instead.
-underline :: OrgParser (F Inlines)
-underline = fmap B.strong       <$> emphasisBetween '_'
-
-verbatim  :: OrgParser (F Inlines)
-verbatim  = return . B.code     <$> verbatimBetween '='
-
-code      :: OrgParser (F Inlines)
-code      = return . B.code     <$> verbatimBetween '~'
-
-subscript   :: OrgParser (F Inlines)
-subscript   = fmap B.subscript   <$> try (char '_' *> subOrSuperExpr)
-
-superscript :: OrgParser (F Inlines)
-superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
-
-math      :: OrgParser (F Inlines)
-math      = return . B.math      <$> choice [ math1CharBetween '$'
-                                            , mathStringBetween '$'
-                                            , rawMathBetween "\\(" "\\)"
-                                            ]
-
-displayMath :: OrgParser (F Inlines)
-displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
-                                                , rawMathBetween "$$"  "$$"
-                                                ]
-
-updatePositions :: Char
-                -> OrgParser (Char)
-updatePositions c = do
-  when (c `elem` emphasisPreChars) updateLastPreCharPos
-  when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
-  return c
-
-symbol :: OrgParser (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
-
-emphasisBetween :: Char
-                -> OrgParser (F Inlines)
-emphasisBetween c = try $ do
-  startEmphasisNewlinesCounting emphasisAllowedNewlines
-  res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
-  isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
-  when isTopLevelEmphasis
-       resetEmphasisNewlines
-  return res
-
-verbatimBetween :: Char
-                -> OrgParser String
-verbatimBetween c = try $
-  emphasisStart c *>
-  many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
-
--- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: Char
-                  -> OrgParser String
-mathStringBetween c = try $ do
-  mathStart c
-  body <- many1TillNOrLessNewlines mathAllowedNewlines
-                                   (noneOf (c:"\n\r"))
-                                   (lookAhead $ mathEnd c)
-  final <- mathEnd c
-  return $ body ++ [final]
-
--- | Parse a single character between @c@ using math rules
-math1CharBetween :: Char
-                -> OrgParser String
-math1CharBetween c = try $ do
-  char c
-  res <- noneOf $ c:mathForbiddenBorderChars
-  char c
-  eof <|> () <$ lookAhead (oneOf mathPostChars)
-  return [res]
-
-rawMathBetween :: String
-               -> String
-               -> OrgParser String
-rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
-
--- | Parses the start (opening character) of emphasis
-emphasisStart :: Char -> OrgParser Char
-emphasisStart c = try $ do
-  guard =<< afterEmphasisPreChar
-  guard =<< notAfterString
-  char c
-  lookAhead (noneOf emphasisForbiddenBorderChars)
-  pushToInlineCharStack c
-  return c
-
--- | Parses the closing character of emphasis
-emphasisEnd :: Char -> OrgParser Char
-emphasisEnd c = try $ do
-  guard =<< notAfterForbiddenBorderChar
-  char c
-  eof <|> () <$ lookAhead acceptablePostChars
-  updateLastStrPos
-  popInlineCharStack
-  return c
- where acceptablePostChars =
-           surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
-
-mathStart :: Char -> OrgParser Char
-mathStart c = try $
-  char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
-
-mathEnd :: Char -> OrgParser Char
-mathEnd c = try $ do
-  res <- noneOf (c:mathForbiddenBorderChars)
-  char c
-  eof <|> () <$ lookAhead (oneOf mathPostChars)
-  return res
-
-
-enclosedInlines :: OrgParser a
-                -> OrgParser b
-                -> OrgParser (F Inlines)
-enclosedInlines start end = try $
-  trimInlinesF . mconcat <$> enclosed start end inline
-
-enclosedRaw :: OrgParser a
-            -> OrgParser b
-            -> OrgParser String
-enclosedRaw start end = try $
-  start *> (onSingleLine <|> spanningTwoLines)
- where onSingleLine = try $ many1Till (noneOf "\n\r") end
-       spanningTwoLines = try $
-         anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
-
--- | Like many1Till, but parses at most @n+1@ lines.  @p@ must not consume
---   newlines.
-many1TillNOrLessNewlines :: Int
-                         -> OrgParser Char
-                         -> OrgParser a
-                         -> OrgParser String
-many1TillNOrLessNewlines n p end = try $
-  nMoreLines (Just n) mempty >>= oneOrMore
- where
-   nMoreLines Nothing  cs = return cs
-   nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
-   nMoreLines k        cs = try $ (final k cs <|> rest k cs)
-                                  >>= uncurry nMoreLines
-   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> try finalLine
-   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
-   finalLine = try $ manyTill p end
-   minus1 k = k - 1
-   oneOrMore cs = guard (not $ null cs) *> return cs
-
--- Org allows customization of the way it reads emphasis.  We use the defaults
--- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
--- for details).
-
--- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
-emphasisPreChars :: [Char]
-emphasisPreChars = "\t \"'({"
-
--- | Chars allowed at after emphasis
-emphasisPostChars :: [Char]
-emphasisPostChars = "\t\n !\"'),-.:;?\\}"
-
--- | Chars not allowed at the (inner) border of emphasis
-emphasisForbiddenBorderChars :: [Char]
-emphasisForbiddenBorderChars = "\t\n\r \"',"
-
--- | The maximum number of newlines within
-emphasisAllowedNewlines :: Int
-emphasisAllowedNewlines = 1
-
--- LaTeX-style math: see `org-latex-regexps` for details
-
--- | Chars allowed after an inline ($...$) math statement
-mathPostChars :: [Char]
-mathPostChars = "\t\n \"'),-.:;?"
-
--- | Chars not allowed at the (inner) border of math
-mathForbiddenBorderChars :: [Char]
-mathForbiddenBorderChars = "\t\n\r ,;.$"
-
--- | Maximum number of newlines in an inline math statement
-mathAllowedNewlines :: Int
-mathAllowedNewlines = 2
-
--- | Whether we are right behind a char allowed before emphasis
-afterEmphasisPreChar :: OrgParser Bool
-afterEmphasisPreChar = do
-  pos <- getPosition
-  lastPrePos <- orgStateLastPreCharPos <$> getState
-  return . fromMaybe True $ (== pos) <$> lastPrePos
-
--- | Whether the parser is right after a forbidden border char
-notAfterForbiddenBorderChar :: OrgParser Bool
-notAfterForbiddenBorderChar = do
-  pos <- getPosition
-  lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
-  return $ lastFBCPos /= Just pos
-
--- | Read a sub- or superscript expression
-subOrSuperExpr :: OrgParser (F Inlines)
-subOrSuperExpr = try $
-  choice [ id                   <$> charsInBalanced '{' '}' (noneOf "\n\r")
-         , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
-         , simpleSubOrSuperString
-         ] >>= parseFromString (mconcat <$> many inline)
- where enclosing (left, right) s = left : s ++ [right]
-
-simpleSubOrSuperString :: OrgParser String
-simpleSubOrSuperString = try $ do
-  state <- getState
-  guard . exportSubSuperscripts . orgStateExportSettings $ state
-  choice [ string "*"
-         , mappend <$> option [] ((:[]) <$> oneOf "+-")
-                   <*> many1 alphaNum
-         ]
-
-inlineLaTeX :: OrgParser (F Inlines)
-inlineLaTeX = try $ do
-  cmd <- inlineLaTeXCommand
-  maybe mzero returnF $
-     parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
- where
-   parseAsMath :: String -> Maybe Inlines
-   parseAsMath cs = B.fromList <$> texMathToPandoc cs
-
-   parseAsInlineLaTeX :: String -> Maybe Inlines
-   parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
-
-   parseAsMathMLSym :: String -> Maybe Inlines
-   parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
-    -- drop initial backslash and any trailing "{}"
-    where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
-
-   state :: ParserState
-   state = def{ stateOptions = def{ readerParseRaw = True }}
-
-   texMathToPandoc :: String -> Maybe [Inline]
-   texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
-
-maybeRight :: Either a b -> Maybe b
-maybeRight = either (const Nothing) Just
-
-inlineLaTeXCommand :: OrgParser String
-inlineLaTeXCommand = try $ do
-  rest <- getInput
-  case runParser rawLaTeXInline def "source" rest of
-    Right (RawInline _ cs) -> do
-      -- drop any trailing whitespace, those are not be part of the command as
-      -- far as org mode is concerned.
-      let cmdNoSpc = dropWhileEnd isSpace cs
-      let len = length cmdNoSpc
-      count len anyChar
-      return cmdNoSpc
-    _ -> mzero
-
--- Taken from Data.OldList.
-dropWhileEnd :: (a -> Bool) -> [a] -> [a]
-dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-
-smart :: OrgParser (F Inlines)
-smart = do
-  getOption readerSmart >>= guard
-  doubleQuoted <|> singleQuoted <|>
-    choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
-  where
-    orgDash = dash <* updatePositions '-'
-    orgEllipses = ellipses <* updatePositions '.'
-    orgApostrophe =
-          (char '\'' <|> char '\8217') <* updateLastPreCharPos
-                                       <* updateLastForbiddenCharPos
-                                       *> return (B.str "\x2019")
-
-singleQuoted :: OrgParser (F Inlines)
-singleQuoted = try $ do
-  singleQuoteStart
-  updatePositions '\''
-  withQuoteContext InSingleQuote $
-    fmap B.singleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline (singleQuoteEnd <* updatePositions '\'')
-
--- doubleQuoted will handle regular double-quoted sections, as well
--- as dialogues with an open double-quote without a close double-quote
--- in the same paragraph.
-doubleQuoted :: OrgParser (F Inlines)
-doubleQuoted = try $ do
-  doubleQuoteStart
-  updatePositions '"'
-  contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
-  (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
-       (fmap B.doubleQuoted . trimInlinesF $ contents))
-   <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
new file mode 100644
index 000000000..e4dc31342
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -0,0 +1,112 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Org.Options
+   Copyright   : Copyright (C) 2014-2016 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode inline elements.
+-}
+module Text.Pandoc.Readers.Org.BlockStarts
+  ( exampleLineStart
+  , hline
+  , noteMarker
+  , tableStart
+  , drawerStart
+  , headerStart
+  , metaLineStart
+  , latexEnvStart
+  , commentLineStart
+  , bulletListStart
+  , orderedListStart
+  ) where
+
+import           Text.Pandoc.Readers.Org.Parsing
+
+-- | Horizontal Line (five -- dashes or more)
+hline :: OrgParser ()
+hline = try $ do
+  skipSpaces
+  string "-----"
+  many (char '-')
+  skipSpaces
+  newline
+  return ()
+
+-- | Read the start of a header line, return the header level
+headerStart :: OrgParser Int
+headerStart = try $
+  (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
+
+tableStart :: OrgParser Char
+tableStart = try $ skipSpaces *> char '|'
+
+latexEnvStart :: OrgParser String
+latexEnvStart = try $ do
+  skipSpaces *> string "\\begin{"
+             *> latexEnvName
+             <* string "}"
+             <* blankline
+ where
+   latexEnvName :: OrgParser String
+   latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
+
+
+-- | Parses bullet list marker.
+bulletListStart :: OrgParser ()
+bulletListStart = try $
+  choice
+  [ () <$ skipSpaces  <* oneOf "+-" <* skipSpaces1
+  , () <$ skipSpaces1 <* char '*'   <* skipSpaces1
+  ]
+
+genericListStart :: OrgParser String
+                 -> OrgParser Int
+genericListStart listMarker = try $
+  (+) <$> (length <$> many spaceChar)
+      <*> (length <$> listMarker <* many1 spaceChar)
+
+orderedListStart :: OrgParser Int
+orderedListStart = genericListStart orderedListMarker
+  -- Ordered list markers allowed in org-mode
+  where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+
+drawerStart :: OrgParser String
+drawerStart = try $
+  skipSpaces *> drawerName <* skipSpaces <* newline
+ where drawerName = char ':' *> manyTill nonspaceChar (char ':')
+
+metaLineStart :: OrgParser ()
+metaLineStart = try $ skipSpaces <* string "#+"
+
+commentLineStart :: OrgParser ()
+commentLineStart = try $ skipSpaces <* string "# "
+
+exampleLineStart :: OrgParser ()
+exampleLineStart = () <$ try (skipSpaces *> string ": ")
+
+noteMarker :: OrgParser String
+noteMarker = try $ do
+  char '['
+  choice [ many1Till digit (char ']')
+         , (++) <$> string "fn:"
+                <*> many1Till (noneOf "\n\r\t ") (char ']')
+         ]
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
new file mode 100644
index 000000000..0c3840979
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -0,0 +1,715 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Org.Options
+   Copyright   : Copyright (C) 2014-2016 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode inline elements.
+-}
+module Text.Pandoc.Readers.Org.Inlines
+  ( inline
+  , addToNotesTable
+  , parseInlines
+  , isImageFilename
+  , linkTarget
+  ) where
+
+import           Text.Pandoc.Readers.Org.BlockStarts
+import           Text.Pandoc.Readers.Org.ParserState
+import           Text.Pandoc.Readers.Org.Parsing
+
+import qualified Text.Pandoc.Builder as B
+import           Text.Pandoc.Builder ( Inlines )
+import           Text.Pandoc.Definition
+import           Text.Pandoc.Compat.Monoid ( (<>) )
+import           Text.Pandoc.Options
+import           Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
+import           Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
+import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
+
+import           Control.Arrow ( first )
+import           Control.Monad ( guard, mplus, mzero, when )
+import           Data.Char ( isAlphaNum, isSpace )
+import           Data.List ( isPrefixOf, isSuffixOf )
+import           Data.Maybe ( fromMaybe )
+import qualified Data.Map as M
+
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first ("rundoc-" ++)
+
+translateLang :: String -> String
+translateLang "C"          = "c"
+translateLang "C++"        = "cpp"
+translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
+translateLang "js"         = "javascript"
+translateLang "lisp"       = "commonlisp"
+translateLang "R"          = "r"
+translateLang "sh"         = "bash"
+translateLang "sqlite"     = "sql"
+translateLang cs = cs
+
+--
+-- Functions acting on the parser state
+--
+recordAnchorId :: String -> OrgParser ()
+recordAnchorId i = updateState $ \s ->
+  s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+
+pushToInlineCharStack :: Char -> OrgParser ()
+pushToInlineCharStack c = updateState $ \s ->
+  s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
+
+popInlineCharStack :: OrgParser ()
+popInlineCharStack = updateState $ \s ->
+  s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
+
+surroundingEmphasisChar :: OrgParser [Char]
+surroundingEmphasisChar =
+  take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+
+startEmphasisNewlinesCounting :: Int -> OrgParser ()
+startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
+  s{ orgStateEmphasisNewlines = Just maxNewlines }
+
+decEmphasisNewlinesCount :: OrgParser ()
+decEmphasisNewlinesCount = updateState $ \s ->
+  s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+
+newlinesCountWithinLimits :: OrgParser Bool
+newlinesCountWithinLimits = do
+  st <- getState
+  return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
+
+resetEmphasisNewlines :: OrgParser ()
+resetEmphasisNewlines = updateState $ \s ->
+  s{ orgStateEmphasisNewlines = Nothing }
+
+addToNotesTable :: OrgNoteRecord -> OrgParser ()
+addToNotesTable note = do
+  oldnotes <- orgStateNotes' <$> getState
+  updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
+
+-- | Parse a single Org-mode inline element
+inline :: OrgParser (F Inlines)
+inline =
+  choice [ whitespace
+         , linebreak
+         , cite
+         , footnote
+         , linkOrImage
+         , anchor
+         , inlineCodeBlock
+         , str
+         , endline
+         , emph
+         , strong
+         , strikeout
+         , underline
+         , code
+         , math
+         , displayMath
+         , verbatim
+         , subscript
+         , superscript
+         , inlineLaTeX
+         , smart
+         , symbol
+         ] <* (guard =<< newlinesCountWithinLimits)
+  <?> "inline"
+
+parseInlines :: OrgParser (F Inlines)
+parseInlines = trimInlinesF . mconcat <$> many1 inline
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
+
+
+whitespace :: OrgParser (F Inlines)
+whitespace = pure B.space <$ skipMany1 spaceChar
+                          <* updateLastPreCharPos
+                          <* updateLastForbiddenCharPos
+             <?> "whitespace"
+
+linebreak :: OrgParser (F Inlines)
+linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+
+str :: OrgParser (F Inlines)
+str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+      <* updateLastStrPos
+
+-- | An endline character that can be treated as a space, not a structural
+-- break.  This should reflect the values of the Emacs variable
+-- @org-element-pagaraph-separate@.
+endline :: OrgParser (F Inlines)
+endline = try $ do
+  newline
+  notFollowedBy blankline
+  notFollowedBy' exampleLineStart
+  notFollowedBy' hline
+  notFollowedBy' noteMarker
+  notFollowedBy' tableStart
+  notFollowedBy' drawerStart
+  notFollowedBy' headerStart
+  notFollowedBy' metaLineStart
+  notFollowedBy' latexEnvStart
+  notFollowedBy' commentLineStart
+  notFollowedBy' bulletListStart
+  notFollowedBy' orderedListStart
+  decEmphasisNewlinesCount
+  guard =<< newlinesCountWithinLimits
+  updateLastPreCharPos
+  return . return $ B.softbreak
+
+cite :: OrgParser (F Inlines)
+cite = try $ do
+  guardEnabled Ext_citations
+  (cs, raw) <- withRaw normalCite
+  return $ (flip B.cite (B.text raw)) <$> cs
+
+normalCite :: OrgParser (F [Citation])
+normalCite = try $  char '['
+                 *> skipSpaces
+                 *> citeList
+                 <* skipSpaces
+                 <* char ']'
+
+citeList :: OrgParser (F [Citation])
+citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+
+citation :: OrgParser (F Citation)
+citation = try $ do
+  pref <- prefix
+  (suppress_author, key) <- citeKey
+  suff <- suffix
+  return $ do
+    x <- pref
+    y <- suff
+    return $ Citation{ citationId      = key
+                     , citationPrefix  = B.toList x
+                     , citationSuffix  = B.toList y
+                     , citationMode    = if suppress_author
+                                            then SuppressAuthor
+                                            else NormalCitation
+                     , citationNoteNum = 0
+                     , citationHash    = 0
+                     }
+ where
+   prefix = trimInlinesF . mconcat <$>
+            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+   suffix = try $ do
+     hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+     skipSpaces
+     rest <- trimInlinesF . mconcat <$>
+             many (notFollowedBy (oneOf ";]") *> inline)
+     return $ if hasSpace
+              then (B.space <>) <$> rest
+              else rest
+
+footnote :: OrgParser (F Inlines)
+footnote = try $ inlineNote <|> referencedNote
+
+inlineNote :: OrgParser (F Inlines)
+inlineNote = try $ do
+  string "[fn:"
+  ref <- many alphaNum
+  char ':'
+  note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
+  when (not $ null ref) $
+       addToNotesTable ("fn:" ++ ref, note)
+  return $ B.note <$> note
+
+referencedNote :: OrgParser (F Inlines)
+referencedNote = try $ do
+  ref <- noteMarker
+  return $ do
+    notes <- asksF orgStateNotes'
+    case lookup ref notes of
+      Nothing   -> return $ B.str $ "[" ++ ref ++ "]"
+      Just contents  -> do
+        st <- askF
+        let contents' = runF contents st{ orgStateNotes' = [] }
+        return $ B.note contents'
+
+linkOrImage :: OrgParser (F Inlines)
+linkOrImage = explicitOrImageLink
+              <|> selflinkOrImage
+              <|> angleLink
+              <|> plainLink
+              <?> "link or image"
+
+explicitOrImageLink :: OrgParser (F Inlines)
+explicitOrImageLink = try $ do
+  char '['
+  srcF   <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
+  title  <- enclosedRaw (char '[') (char ']')
+  title' <- parseFromString (mconcat <$> many inline) title
+  char ']'
+  return $ do
+    src <- srcF
+    if isImageFilename title
+      then pure $ B.link src "" $ B.image title mempty mempty
+      else linkToInlinesF src =<< title'
+
+selflinkOrImage :: OrgParser (F Inlines)
+selflinkOrImage = try $ do
+  src <- char '[' *> linkTarget <* char ']'
+  return $ linkToInlinesF src (B.str src)
+
+plainLink :: OrgParser (F Inlines)
+plainLink = try $ do
+  (orig, src) <- uri
+  returnF $ B.link src "" (B.str orig)
+
+angleLink :: OrgParser (F Inlines)
+angleLink = try $ do
+  char '<'
+  link <- plainLink
+  char '>'
+  return link
+
+linkTarget :: OrgParser String
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
+
+possiblyEmptyLinkTarget :: OrgParser String
+possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
+
+applyCustomLinkFormat :: String -> OrgParser (F String)
+applyCustomLinkFormat link = do
+  let (linkType, rest) = break (== ':') link
+  return $ do
+    formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
+    return $ maybe link ($ drop 1 rest) formatter
+
+-- | Take a link and return a function which produces new inlines when given
+-- description inlines.
+linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF linkStr =
+  case linkStr of
+    ""      -> pure . B.link mempty ""       -- wiki link (empty by convention)
+    ('#':_) -> pure . B.link linkStr ""      -- document-local fraction
+    _       -> case cleanLinkString linkStr of
+                 (Just cleanedLink) -> if isImageFilename cleanedLink
+                                       then const . pure $ B.image cleanedLink "" ""
+                                       else pure . B.link cleanedLink ""
+                 Nothing -> internalLink linkStr  -- other internal link
+
+-- | Cleanup and canonicalize a string describing a link.  Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
+  case s of
+    '/':_                  -> Just $ "file://" ++ s  -- absolute path
+    '.':'/':_              -> Just s                 -- relative path
+    '.':'.':'/':_          -> Just s                 -- relative path
+    -- Relative path or URL (file schema)
+    'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+    _ | isUrl s            -> Just s                 -- URL
+    _                      -> Nothing
+ where
+   isUrl :: String -> Bool
+   isUrl cs =
+     let (scheme, path) = break (== ':') cs
+     in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+          && not (null path)
+
+isImageFilename :: String -> Bool
+isImageFilename filename =
+  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
+  (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+   ':' `notElem` filename)
+ where
+   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+   protocols = [ "file", "http", "https" ]
+
+internalLink :: String -> Inlines -> F Inlines
+internalLink link title = do
+  anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
+  if anchorB
+    then return $ B.link ('#':link) "" title
+    else return $ B.emph title
+
+-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
+-- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
+-- @org-target-regexp@, which is fairly liberal.  Since no link is created if
+-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
+-- an anchor.
+
+anchor :: OrgParser (F Inlines)
+anchor =  try $ do
+  anchorId <- parseAnchor
+  recordAnchorId anchorId
+  returnF $ B.spanWith (solidify anchorId, [], []) mempty
+ where
+       parseAnchor = string "<<"
+                     *> many1 (noneOf "\t\n\r<>\"' ")
+                     <* string ">>"
+                     <* skipSpaces
+
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
+-- the org function @org-export-solidify-link-text@.
+
+solidify :: String -> String
+solidify = map replaceSpecialChar
+ where replaceSpecialChar c
+           | isAlphaNum c    = c
+           | c `elem` ("_.-:" :: String) = c
+           | otherwise       = '-'
+
+-- | Parses an inline code block and marks it as an babel block.
+inlineCodeBlock :: OrgParser (F Inlines)
+inlineCodeBlock = try $ do
+  string "src_"
+  lang <- many1 orgArgWordChar
+  opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
+  inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
+  let attrClasses = [translateLang lang, rundocBlockClass]
+  let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
+  returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ where
+   inlineBlockOption :: OrgParser (String, String)
+   inlineBlockOption = try $ do
+     argKey <- orgArgKey
+     paramValue <- option "yes" orgInlineParamValue
+     return (argKey, paramValue)
+
+   orgInlineParamValue :: OrgParser String
+   orgInlineParamValue = try $
+     skipSpaces
+       *> notFollowedBy (char ':')
+       *> many1 (noneOf "\t\n\r ]")
+       <* skipSpaces
+
+
+
+enclosedByPair :: Char          -- ^ opening char
+               -> Char          -- ^ closing char
+               -> OrgParser a   -- ^ parser
+               -> OrgParser [a]
+enclosedByPair s e p = char s *> many1Till p (char e)
+
+emph      :: OrgParser (F Inlines)
+emph      = fmap B.emph         <$> emphasisBetween '/'
+
+strong    :: OrgParser (F Inlines)
+strong    = fmap B.strong       <$> emphasisBetween '*'
+
+strikeout :: OrgParser (F Inlines)
+strikeout = fmap B.strikeout    <$> emphasisBetween '+'
+
+-- There is no underline, so we use strong instead.
+underline :: OrgParser (F Inlines)
+underline = fmap B.strong       <$> emphasisBetween '_'
+
+verbatim  :: OrgParser (F Inlines)
+verbatim  = return . B.code     <$> verbatimBetween '='
+
+code      :: OrgParser (F Inlines)
+code      = return . B.code     <$> verbatimBetween '~'
+
+subscript   :: OrgParser (F Inlines)
+subscript   = fmap B.subscript   <$> try (char '_' *> subOrSuperExpr)
+
+superscript :: OrgParser (F Inlines)
+superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
+
+math      :: OrgParser (F Inlines)
+math      = return . B.math      <$> choice [ math1CharBetween '$'
+                                            , mathStringBetween '$'
+                                            , rawMathBetween "\\(" "\\)"
+                                            ]
+
+displayMath :: OrgParser (F Inlines)
+displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+                                                , rawMathBetween "$$"  "$$"
+                                                ]
+
+updatePositions :: Char
+                -> OrgParser (Char)
+updatePositions c = do
+  when (c `elem` emphasisPreChars) updateLastPreCharPos
+  when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
+  return c
+
+symbol :: OrgParser (F Inlines)
+symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+
+emphasisBetween :: Char
+                -> OrgParser (F Inlines)
+emphasisBetween c = try $ do
+  startEmphasisNewlinesCounting emphasisAllowedNewlines
+  res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
+  isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
+  when isTopLevelEmphasis
+       resetEmphasisNewlines
+  return res
+
+verbatimBetween :: Char
+                -> OrgParser String
+verbatimBetween c = try $
+  emphasisStart c *>
+  many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
+
+-- | Parses a raw string delimited by @c@ using Org's math rules
+mathStringBetween :: Char
+                  -> OrgParser String
+mathStringBetween c = try $ do
+  mathStart c
+  body <- many1TillNOrLessNewlines mathAllowedNewlines
+                                   (noneOf (c:"\n\r"))
+                                   (lookAhead $ mathEnd c)
+  final <- mathEnd c
+  return $ body ++ [final]
+
+-- | Parse a single character between @c@ using math rules
+math1CharBetween :: Char
+                -> OrgParser String
+math1CharBetween c = try $ do
+  char c
+  res <- noneOf $ c:mathForbiddenBorderChars
+  char c
+  eof <|> () <$ lookAhead (oneOf mathPostChars)
+  return [res]
+
+rawMathBetween :: String
+               -> String
+               -> OrgParser String
+rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+
+-- | Parses the start (opening character) of emphasis
+emphasisStart :: Char -> OrgParser Char
+emphasisStart c = try $ do
+  guard =<< afterEmphasisPreChar
+  guard =<< notAfterString
+  char c
+  lookAhead (noneOf emphasisForbiddenBorderChars)
+  pushToInlineCharStack c
+  return c
+
+-- | Parses the closing character of emphasis
+emphasisEnd :: Char -> OrgParser Char
+emphasisEnd c = try $ do
+  guard =<< notAfterForbiddenBorderChar
+  char c
+  eof <|> () <$ lookAhead acceptablePostChars
+  updateLastStrPos
+  popInlineCharStack
+  return c
+ where acceptablePostChars =
+           surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
+
+mathStart :: Char -> OrgParser Char
+mathStart c = try $
+  char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
+
+mathEnd :: Char -> OrgParser Char
+mathEnd c = try $ do
+  res <- noneOf (c:mathForbiddenBorderChars)
+  char c
+  eof <|> () <$ lookAhead (oneOf mathPostChars)
+  return res
+
+
+enclosedInlines :: OrgParser a
+                -> OrgParser b
+                -> OrgParser (F Inlines)
+enclosedInlines start end = try $
+  trimInlinesF . mconcat <$> enclosed start end inline
+
+enclosedRaw :: OrgParser a
+            -> OrgParser b
+            -> OrgParser String
+enclosedRaw start end = try $
+  start *> (onSingleLine <|> spanningTwoLines)
+ where onSingleLine = try $ many1Till (noneOf "\n\r") end
+       spanningTwoLines = try $
+         anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
+
+-- | Like many1Till, but parses at most @n+1@ lines.  @p@ must not consume
+--   newlines.
+many1TillNOrLessNewlines :: Int
+                         -> OrgParser Char
+                         -> OrgParser a
+                         -> OrgParser String
+many1TillNOrLessNewlines n p end = try $
+  nMoreLines (Just n) mempty >>= oneOrMore
+ where
+   nMoreLines Nothing  cs = return cs
+   nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
+   nMoreLines k        cs = try $ (final k cs <|> rest k cs)
+                                  >>= uncurry nMoreLines
+   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> try finalLine
+   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
+   finalLine = try $ manyTill p end
+   minus1 k = k - 1
+   oneOrMore cs = guard (not $ null cs) *> return cs
+
+-- Org allows customization of the way it reads emphasis.  We use the defaults
+-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
+-- for details).
+
+-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
+emphasisPreChars :: [Char]
+emphasisPreChars = "\t \"'({"
+
+-- | Chars allowed at after emphasis
+emphasisPostChars :: [Char]
+emphasisPostChars = "\t\n !\"'),-.:;?\\}"
+
+-- | Chars not allowed at the (inner) border of emphasis
+emphasisForbiddenBorderChars :: [Char]
+emphasisForbiddenBorderChars = "\t\n\r \"',"
+
+-- | The maximum number of newlines within
+emphasisAllowedNewlines :: Int
+emphasisAllowedNewlines = 1
+
+-- LaTeX-style math: see `org-latex-regexps` for details
+
+-- | Chars allowed after an inline ($...$) math statement
+mathPostChars :: [Char]
+mathPostChars = "\t\n \"'),-.:;?"
+
+-- | Chars not allowed at the (inner) border of math
+mathForbiddenBorderChars :: [Char]
+mathForbiddenBorderChars = "\t\n\r ,;.$"
+
+-- | Maximum number of newlines in an inline math statement
+mathAllowedNewlines :: Int
+mathAllowedNewlines = 2
+
+-- | Whether we are right behind a char allowed before emphasis
+afterEmphasisPreChar :: OrgParser Bool
+afterEmphasisPreChar = do
+  pos <- getPosition
+  lastPrePos <- orgStateLastPreCharPos <$> getState
+  return . fromMaybe True $ (== pos) <$> lastPrePos
+
+-- | Whether the parser is right after a forbidden border char
+notAfterForbiddenBorderChar :: OrgParser Bool
+notAfterForbiddenBorderChar = do
+  pos <- getPosition
+  lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
+  return $ lastFBCPos /= Just pos
+
+-- | Read a sub- or superscript expression
+subOrSuperExpr :: OrgParser (F Inlines)
+subOrSuperExpr = try $
+  choice [ id                   <$> charsInBalanced '{' '}' (noneOf "\n\r")
+         , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
+         , simpleSubOrSuperString
+         ] >>= parseFromString (mconcat <$> many inline)
+ where enclosing (left, right) s = left : s ++ [right]
+
+simpleSubOrSuperString :: OrgParser String
+simpleSubOrSuperString = try $ do
+  state <- getState
+  guard . exportSubSuperscripts . orgStateExportSettings $ state
+  choice [ string "*"
+         , mappend <$> option [] ((:[]) <$> oneOf "+-")
+                   <*> many1 alphaNum
+         ]
+
+inlineLaTeX :: OrgParser (F Inlines)
+inlineLaTeX = try $ do
+  cmd <- inlineLaTeXCommand
+  maybe mzero returnF $
+     parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
+ where
+   parseAsMath :: String -> Maybe Inlines
+   parseAsMath cs = B.fromList <$> texMathToPandoc cs
+
+   parseAsInlineLaTeX :: String -> Maybe Inlines
+   parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
+
+   parseAsMathMLSym :: String -> Maybe Inlines
+   parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
+    -- drop initial backslash and any trailing "{}"
+    where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
+
+   state :: ParserState
+   state = def{ stateOptions = def{ readerParseRaw = True }}
+
+   texMathToPandoc :: String -> Maybe [Inline]
+   texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
+
+maybeRight :: Either a b -> Maybe b
+maybeRight = either (const Nothing) Just
+
+inlineLaTeXCommand :: OrgParser String
+inlineLaTeXCommand = try $ do
+  rest <- getInput
+  case runParser rawLaTeXInline def "source" rest of
+    Right (RawInline _ cs) -> do
+      -- drop any trailing whitespace, those are not be part of the command as
+      -- far as org mode is concerned.
+      let cmdNoSpc = dropWhileEnd isSpace cs
+      let len = length cmdNoSpc
+      count len anyChar
+      return cmdNoSpc
+    _ -> mzero
+
+-- Taken from Data.OldList.
+dropWhileEnd :: (a -> Bool) -> [a] -> [a]
+dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
+
+smart :: OrgParser (F Inlines)
+smart = do
+  getOption readerSmart >>= guard
+  doubleQuoted <|> singleQuoted <|>
+    choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
+  where
+    orgDash = dash <* updatePositions '-'
+    orgEllipses = ellipses <* updatePositions '.'
+    orgApostrophe =
+          (char '\'' <|> char '\8217') <* updateLastPreCharPos
+                                       <* updateLastForbiddenCharPos
+                                       *> return (B.str "\x2019")
+
+singleQuoted :: OrgParser (F Inlines)
+singleQuoted = try $ do
+  singleQuoteStart
+  updatePositions '\''
+  withQuoteContext InSingleQuote $
+    fmap B.singleQuoted . trimInlinesF . mconcat <$>
+      many1Till inline (singleQuoteEnd <* updatePositions '\'')
+
+-- doubleQuoted will handle regular double-quoted sections, as well
+-- as dialogues with an open double-quote without a close double-quote
+-- in the same paragraph.
+doubleQuoted :: OrgParser (F Inlines)
+doubleQuoted = try $ do
+  doubleQuoteStart
+  updatePositions '"'
+  contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+  (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
+       (fmap B.doubleQuoted . trimInlinesF $ contents))
+   <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index efe2ae25f..9a1420645 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -34,10 +34,14 @@ module Text.Pandoc.Readers.Org.Parsing
   , blanklines
   , newline
   , parseFromString
+  , skipSpaces1
   , inList
   , withContext
   , updateLastForbiddenCharPos
   , updateLastPreCharPos
+  , orgArgKey
+  , orgArgWord
+  , orgArgWordChar
   -- * Re-exports from Text.Pandoc.Parser
   , ParserContext (..)
   , many1Till
@@ -133,6 +137,10 @@ parseFromString parser str' = do
   updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
   return result
 
+-- | Skip one or more tab or space characters.
+skipSpaces1 :: OrgParser ()
+skipSpaces1 = skipMany1 spaceChar
+
 -- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
 newline :: OrgParser Char
 newline =
@@ -180,3 +188,14 @@ updateLastForbiddenCharPos = getPosition >>= \p ->
 updateLastPreCharPos :: OrgParser ()
 updateLastPreCharPos = getPosition >>= \p ->
   updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
+
+orgArgKey :: OrgParser String
+orgArgKey = try $
+  skipSpaces *> char ':'
+             *> many1 orgArgWordChar
+
+orgArgWord :: OrgParser String
+orgArgWord = many1 orgArgWordChar
+
+orgArgWordChar :: OrgParser Char
+orgArgWordChar = alphaNum <|> oneOf "-_"
-- 
cgit v1.2.3


From eea6d6568f99eda689b93210a22692c7f79b4bbf Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Wed, 25 May 2016 22:50:51 +0200
Subject: Org reader: extract blocks parser to module

Block parsing code is moved to a separate module.

This is part of the Org-mode reader cleanup effort.
---
 pandoc.cabal                          |   1 +
 src/Text/Pandoc/Readers/Org.hs        | 853 +-------------------------------
 src/Text/Pandoc/Readers/Org/Blocks.hs | 891 ++++++++++++++++++++++++++++++++++
 3 files changed, 901 insertions(+), 844 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/Org/Blocks.hs

(limited to 'src/Text')

diff --git a/pandoc.cabal b/pandoc.cabal
index 7286bd890..7d7250a21 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -394,6 +394,7 @@ Library
                    Text.Pandoc.Readers.Odt.Arrows.State,
                    Text.Pandoc.Readers.Odt.Arrows.Utils,
                    Text.Pandoc.Readers.Org.BlockStarts,
+                   Text.Pandoc.Readers.Org.Blocks,
                    Text.Pandoc.Readers.Org.Inlines,
                    Text.Pandoc.Readers.Org.ParserState,
                    Text.Pandoc.Readers.Org.Parsing,
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 605d2220e..1042b5a21 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE FlexibleContexts #-}
 {-
 Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 
@@ -28,27 +27,15 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
-import           Text.Pandoc.Readers.Org.BlockStarts
-import           Text.Pandoc.Readers.Org.Inlines
-import           Text.Pandoc.Readers.Org.ParserState
-import           Text.Pandoc.Readers.Org.Parsing
+import           Text.Pandoc.Readers.Org.Blocks ( blockList, meta )
+import           Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM )
+import           Text.Pandoc.Readers.Org.ParserState ( OrgParserState (..) )
 
-import qualified Text.Pandoc.Builder as B
-import           Text.Pandoc.Builder ( Inlines, Blocks )
 import           Text.Pandoc.Definition
-import           Text.Pandoc.Compat.Monoid ((<>))
 import           Text.Pandoc.Error
 import           Text.Pandoc.Options
-import           Text.Pandoc.Shared ( compactify', compactify'DL )
 
-import           Control.Arrow ( first )
-import           Control.Monad ( foldM, guard, mzero )
 import           Control.Monad.Reader ( runReader )
-import           Data.Char ( toLower, toUpper)
-import           Data.List ( foldl', intersperse, isPrefixOf )
-import qualified Data.Map as M
-import           Data.Maybe ( fromMaybe, isNothing )
-import           Network.HTTP ( urlEncode )
 
 
 -- | Parse org-mode string and return a Pandoc document.
@@ -57,106 +44,17 @@ readOrg :: ReaderOptions -- ^ Reader options
         -> Either PandocError Pandoc
 readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
 
---
--- Export Settings
---
-exportSetting :: OrgParser ()
-exportSetting = choice
-  [ booleanSetting "^" setExportSubSuperscripts
-  , ignoredSetting "'"
-  , ignoredSetting "*"
-  , ignoredSetting "-"
-  , ignoredSetting ":"
-  , ignoredSetting "<"
-  , ignoredSetting "\\n"
-  , ignoredSetting "arch"
-  , ignoredSetting "author"
-  , ignoredSetting "c"
-  , ignoredSetting "creator"
-  , complementableListSetting "d" setExportDrawers
-  , ignoredSetting "date"
-  , ignoredSetting "e"
-  , ignoredSetting "email"
-  , ignoredSetting "f"
-  , ignoredSetting "H"
-  , ignoredSetting "inline"
-  , ignoredSetting "num"
-  , ignoredSetting "p"
-  , ignoredSetting "pri"
-  , ignoredSetting "prop"
-  , ignoredSetting "stat"
-  , ignoredSetting "tags"
-  , ignoredSetting "tasks"
-  , ignoredSetting "tex"
-  , ignoredSetting "timestamp"
-  , ignoredSetting "title"
-  , ignoredSetting "toc"
-  , ignoredSetting "todo"
-  , ignoredSetting "|"
-  ] <?> "export setting"
-
-booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser ()
-booleanSetting settingIdentifier setter = try $ do
-  string settingIdentifier
-  char ':'
-  value <- elispBoolean
-  updateState $ modifyExportSettings setter value
-
--- | Read an elisp boolean.  Only NIL is treated as false, non-NIL values are
--- interpreted as true.
-elispBoolean :: OrgParser Bool
-elispBoolean = try $ do
-  value <- many1 nonspaceChar
-  return $ case map toLower value of
-             "nil" -> False
-             "{}"  -> False
-             "()"  -> False
-             _     -> True
-
--- | A list or a complement list (i.e. a list starting with `not`).
-complementableListSetting :: String
-                          -> ExportSettingSetter (Either [String] [String])
-                          -> OrgParser ()
-complementableListSetting settingIdentifier setter = try $ do
-  _     <- string settingIdentifier <* char ':'
-  value <- choice [ Left <$> complementStringList
-                  , Right <$> stringList
-                  , (\b -> if b then Left [] else Right []) <$> elispBoolean
-                  ]
-  updateState $ modifyExportSettings setter value
- where
-   -- Read a plain list of strings.
-   stringList :: OrgParser [String]
-   stringList = try $
-     char '('
-       *> sepBy elispString spaces
-       <* char ')'
-
-   -- Read an emacs lisp list specifying a complement set.
-   complementStringList :: OrgParser [String]
-   complementStringList = try $
-     string "(not "
-       *> sepBy elispString spaces
-       <* char ')'
-
-   elispString :: OrgParser String
-   elispString = try $
-     char '"'
-       *> manyTill alphaNum (char '"')
-
-ignoredSetting :: String -> OrgParser ()
-ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
-
 --
 -- Parser
 --
 parseOrg :: OrgParser Pandoc
 parseOrg = do
-  blocks' <- parseBlocks
-  st <- getState
-  let meta = runF (orgStateMeta' st) st
-  let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
-  return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
+  blocks' <- blockList
+  meta'   <- meta
+  return . Pandoc meta' $ removeUnwantedBlocks blocks'
+ where
+   removeUnwantedBlocks :: [Block] -> [Block]
+   removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
 
 -- | Drop COMMENT headers and the document tree below those headers.
 dropCommentTrees :: [Block] -> [Block]
@@ -191,736 +89,3 @@ isHeaderLevelLowerEq n blk =
   case blk of
     (Header level _ _) -> n >= level
     _                  -> False
-
-
---
--- parsing blocks
---
-
-parseBlocks :: OrgParser (F Blocks)
-parseBlocks = mconcat <$> manyTill block eof
-
-block :: OrgParser (F Blocks)
-block = choice [ mempty <$ blanklines
-               , table
-               , orgBlock
-               , figure
-               , example
-               , genericDrawer
-               , specialLine
-               , header
-               , horizontalRule
-               , list
-               , latexFragment
-               , noteBlock
-               , paraOrPlain
-               ] <?> "block"
-
-
---
--- Block Attributes
---
-
--- | Attributes that may be added to figures (like a name or caption).
-data BlockAttributes = BlockAttributes
-  { blockAttrName      :: Maybe String
-  , blockAttrCaption   :: Maybe (F Inlines)
-  , blockAttrKeyValues :: [(String, String)]
-  }
-
-stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String)
-stringyMetaAttribute attrCheck = try $ do
-  metaLineStart
-  attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
-  guard $ attrCheck attrName
-  skipSpaces
-  attrValue <- anyLine
-  return (attrName, attrValue)
-
-blockAttributes :: OrgParser BlockAttributes
-blockAttributes = try $ do
-  kv <- many (stringyMetaAttribute attrCheck)
-  let caption = foldl' (appendValues "CAPTION") Nothing kv
-  let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
-  let name    = lookup "NAME" kv
-  caption' <- maybe (return Nothing)
-                    (fmap Just . parseFromString parseInlines)
-                    caption
-  kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
-  return $ BlockAttributes
-           { blockAttrName = name
-           , blockAttrCaption = caption'
-           , blockAttrKeyValues = kvAttrs'
-           }
- where
-   attrCheck :: String -> Bool
-   attrCheck attr =
-     case attr of
-       "NAME"      -> True
-       "CAPTION"   -> True
-       "ATTR_HTML" -> True
-       _           -> False
-
-   appendValues :: String -> Maybe String -> (String, String) -> Maybe String
-   appendValues attrName accValue (key, value) =
-     if key /= attrName
-     then accValue
-     else case accValue of
-            Just acc -> Just $ acc ++ ' ':value
-            Nothing  -> Just value
-
-keyValues :: OrgParser [(String, String)]
-keyValues = try $
-  manyTill ((,) <$> key <*> value) newline
- where
-   key :: OrgParser String
-   key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
-
-   value :: OrgParser String
-   value = skipSpaces *> manyTill anyChar endOfValue
-
-   endOfValue :: OrgParser ()
-   endOfValue =
-     lookAhead $ (() <$ try (many1 spaceChar <* key))
-              <|> () <$ newline
-
-
---
--- Org Blocks (#+BEGIN_... / #+END_...)
---
-
-type BlockProperties = (Int, String)  -- (Indentation, Block-Type)
-
-updateIndent :: BlockProperties -> Int -> BlockProperties
-updateIndent (_, blkType) indent = (indent, blkType)
-
-orgBlock :: OrgParser (F Blocks)
-orgBlock = try $ do
-  blockAttrs <- blockAttributes
-  blockProp@(_, blkType) <- blockHeaderStart
-  ($ blockProp) $
-    case blkType of
-      "comment" -> withRaw'   (const mempty)
-      "html"    -> withRaw'   (return . (B.rawBlock blkType))
-      "latex"   -> withRaw'   (return . (B.rawBlock blkType))
-      "ascii"   -> withRaw'   (return . (B.rawBlock blkType))
-      "example" -> withRaw'   (return . exampleCode)
-      "quote"   -> withParsed (fmap B.blockQuote)
-      "verse"   -> verseBlock
-      "src"     -> codeBlock blockAttrs
-      _         -> withParsed (fmap $ divWithClass blkType)
-
-blockHeaderStart :: OrgParser (Int, String)
-blockHeaderStart = try $ (,) <$> indentation <*> blockType
- where
-  blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
-
-indentation :: OrgParser Int
-indentation = try $ do
-  tabStop  <- getOption readerTabStop
-  s        <- many spaceChar
-  return $ spaceLength tabStop s
-
-spaceLength :: Int -> String -> Int
-spaceLength tabStop s = (sum . map charLen) s
- where
-  charLen ' '  = 1
-  charLen '\t' = tabStop
-  charLen _    = 0
-
-withRaw'   :: (String   -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
-withRaw'   f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
-
-withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
-withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
-
-ignHeaders :: OrgParser ()
-ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-
-divWithClass :: String -> Blocks -> Blocks
-divWithClass cls = B.divWith ("", [cls], [])
-
-verseBlock :: BlockProperties -> OrgParser (F Blocks)
-verseBlock blkProp = try $ do
-  ignHeaders
-  content <- rawBlockContent blkProp
-  fmap B.para . mconcat . intersperse (pure B.linebreak)
-    <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content)
-
-exportsCode :: [(String, String)] -> Bool
-exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
-                         || ("rundoc-exports", "results") `elem` attrs)
-
-exportsResults :: [(String, String)] -> Bool
-exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
-                       || ("rundoc-exports", "both") `elem` attrs
-
-followingResultsBlock :: OrgParser (Maybe (F Blocks))
-followingResultsBlock =
-       optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
-                                     *> blankline
-                                     *> block)
-
-codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks)
-codeBlock blockAttrs blkProp = do
-  skipSpaces
-  (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
-  leadingIndent     <- lookAhead indentation
-  content           <- rawBlockContent (updateIndent blkProp leadingIndent)
-  resultsContent    <- followingResultsBlock
-  let id'            = fromMaybe mempty $ blockAttrName blockAttrs
-  let includeCode    = exportsCode kv
-  let includeResults = exportsResults kv
-  let codeBlck       = B.codeBlockWith ( id', classes, kv ) content
-  let labelledBlck   = maybe (pure codeBlck)
-                             (labelDiv codeBlck)
-                             (blockAttrCaption blockAttrs)
-  let resultBlck     = fromMaybe mempty resultsContent
-  return $ (if includeCode then labelledBlck else mempty)
-           <> (if includeResults then resultBlck else mempty)
- where
-   labelDiv blk value =
-       B.divWith nullAttr <$> (mappend <$> labelledBlock value
-                                       <*> pure blk)
-   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
-
-rawBlockContent :: BlockProperties -> OrgParser String
-rawBlockContent (indent, blockType) = try $
-  unlines . map commaEscaped <$> manyTill indentedLine blockEnder
- where
-   indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
-   blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType)
-
-parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
-parsedBlockContent blkProps = try $ do
-  raw <- rawBlockContent blkProps
-  parseFromString parseBlocks (raw ++ "\n")
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Int -> OrgParser String
-indentWith num = do
-  tabStop <- getOption readerTabStop
-  if num < tabStop
-     then count num (char ' ')
-     else choice [ try (count num (char ' '))
-                 , try (char '\t' >> count (num - tabStop) (char ' ')) ]
-
-type SwitchOption = (Char, Maybe String)
-
--- | Parse code block arguments
--- TODO: We currently don't handle switches.
-codeHeaderArgs :: OrgParser ([String], [(String, String)])
-codeHeaderArgs = try $ do
-  language   <- skipSpaces *> orgArgWord
-  _          <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
-  parameters <- manyTill blockOption newline
-  let pandocLang = translateLang language
-  return $
-    if hasRundocParameters parameters
-    then ( [ pandocLang, rundocBlockClass ]
-         , map toRundocAttrib (("language", language) : parameters)
-         )
-    else ([ pandocLang ], parameters)
- where
-   hasRundocParameters = not . null
-   toRundocAttrib = first ("rundoc-" ++)
-
-
-switch :: OrgParser SwitchOption
-switch = try $ simpleSwitch <|> lineNumbersSwitch
- where
-   simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
-   lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
-                       (string "-l \"" *> many1Till nonspaceChar (char '"'))
-
-translateLang :: String -> String
-translateLang "C"          = "c"
-translateLang "C++"        = "cpp"
-translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
-translateLang "js"         = "javascript"
-translateLang "lisp"       = "commonlisp"
-translateLang "R"          = "r"
-translateLang "sh"         = "bash"
-translateLang "sqlite"     = "sql"
-translateLang cs = cs
-
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
-
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
-blockOption :: OrgParser (String, String)
-blockOption = try $ do
-  argKey <- orgArgKey
-  paramValue <- option "yes" orgParamValue
-  return (argKey, paramValue)
-
-orgParamValue :: OrgParser String
-orgParamValue = try $
-  skipSpaces
-    *> notFollowedBy (char ':' )
-    *> many1 (noneOf "\t\n\r ")
-    <* skipSpaces
-
-commaEscaped :: String -> String
-commaEscaped (',':cs@('*':_))     = cs
-commaEscaped (',':cs@('#':'+':_)) = cs
-commaEscaped cs                   = cs
-
-example :: OrgParser (F Blocks)
-example = try $ do
-  return . return . exampleCode =<< unlines <$> many1 exampleLine
-
-exampleCode :: String -> Blocks
-exampleCode = B.codeBlockWith ("", ["example"], [])
-
-exampleLine :: OrgParser String
-exampleLine = try $ exampleLineStart *> anyLine
-
-horizontalRule :: OrgParser (F Blocks)
-horizontalRule = return B.horizontalRule <$ try hline
-
-
---
--- Drawers
---
-
--- | A generic drawer which has no special meaning for org-mode.
--- Whether or not this drawer is included in the output depends on the drawers
--- export setting.
-genericDrawer :: OrgParser (F Blocks)
-genericDrawer = try $ do
-  name    <- map toUpper <$> drawerStart
-  content <- manyTill drawerLine (try drawerEnd)
-  state   <- getState
-  -- Include drawer if it is explicitly included in or not explicitly excluded
-  -- from the list of drawers that should be exported.  PROPERTIES drawers are
-  -- never exported.
-  case (exportDrawers . orgStateExportSettings $ state) of
-    _           | name == "PROPERTIES" -> return mempty
-    Left  names | name `elem`    names -> return mempty
-    Right names | name `notElem` names -> return mempty
-    _                                  -> drawerDiv name <$> parseLines content
- where
-  parseLines :: [String] -> OrgParser (F Blocks)
-  parseLines = parseFromString parseBlocks . (++ "\n") . unlines
-
-  drawerDiv :: String -> F Blocks -> F Blocks
-  drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-
-drawerLine :: OrgParser String
-drawerLine = anyLine
-
-drawerEnd :: OrgParser String
-drawerEnd = try $
-  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
-
--- | Read a :PROPERTIES: drawer and return the key/value pairs contained
--- within.
-propertiesDrawer :: OrgParser [(String, String)]
-propertiesDrawer = try $ do
-  drawerType <- drawerStart
-  guard $ map toUpper drawerType == "PROPERTIES"
-  manyTill property (try drawerEnd)
- where
-   property :: OrgParser (String, String)
-   property = try $ (,) <$> key <*> value
-
-   key :: OrgParser String
-   key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
-
-   value :: OrgParser String
-   value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
-
-keyValuesToAttr :: [(String, String)] -> Attr
-keyValuesToAttr kvs =
-  let
-    lowerKvs = map (\(k, v) -> (map toLower k, v)) kvs
-    id'  = fromMaybe mempty . lookup "custom_id" $ lowerKvs
-    cls  = fromMaybe mempty . lookup "class"     $ lowerKvs
-    kvs' = filter (flip notElem ["custom_id", "class"] . fst) lowerKvs
-  in
-    (id', words cls, kvs')
-
-
---
--- Figures
---
-
--- | Figures (Image on a line by itself, preceded by name and/or caption)
-figure :: OrgParser (F Blocks)
-figure = try $ do
-  figAttrs <- blockAttributes
-  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
-  guard . not . isNothing . blockAttrCaption $ figAttrs
-  guard (isImageFilename src)
-  let figName    = fromMaybe mempty $ blockAttrName figAttrs
-  let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
-  let figKeyVals = blockAttrKeyValues figAttrs
-  let attr       = (mempty, mempty, figKeyVals)
-  return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
- where
-   withFigPrefix :: String -> String
-   withFigPrefix cs =
-     if "fig:" `isPrefixOf` cs
-     then cs
-     else "fig:" ++ cs
-
-   selfTarget :: OrgParser String
-   selfTarget = try $ char '[' *> linkTarget <* char ']'
-
-
---
--- Comments, Options and Metadata
---
-
-addLinkFormat :: String
-              -> (String -> String)
-              -> OrgParser ()
-addLinkFormat key formatter = updateState $ \s ->
-  let fs = orgStateLinkFormatters s
-  in s{ orgStateLinkFormatters = M.insert key formatter fs }
-
-specialLine :: OrgParser (F Blocks)
-specialLine = fmap return . try $ metaLine <|> commentLine
-
--- The order, in which blocks are tried, makes sure that we're not looking at
--- the beginning of a block, so we don't need to check for it
-metaLine :: OrgParser Blocks
-metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
-
-commentLine :: OrgParser Blocks
-commentLine = commentLineStart *> anyLine *> pure mempty
-
-declarationLine :: OrgParser ()
-declarationLine = try $ do
-  key <- metaKey
-  inlinesF <- metaInlines
-  updateState $ \st ->
-    let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
-    in st { orgStateMeta' = orgStateMeta' st <> meta' }
-  return ()
-
-metaInlines :: OrgParser (F MetaValue)
-metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
-
-metaKey :: OrgParser String
-metaKey = map toLower <$> many1 (noneOf ": \n\r")
-                      <*  char ':'
-                      <*  skipSpaces
-
-optionLine :: OrgParser ()
-optionLine = try $ do
-  key <- metaKey
-  case key of
-    "link"    -> parseLinkFormat >>= uncurry addLinkFormat
-    "options" -> () <$ sepBy spaces exportSetting
-    _         -> mzero
-
-parseLinkFormat :: OrgParser ((String, String -> String))
-parseLinkFormat = try $ do
-  linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
-  linkSubst <- parseFormat
-  return (linkType, linkSubst)
-
--- | An ad-hoc, single-argument-only implementation of a printf-style format
--- parser.
-parseFormat :: OrgParser (String -> String)
-parseFormat = try $ do
-  replacePlain <|> replaceUrl <|> justAppend
- where
-   -- inefficient, but who cares
-   replacePlain = try $ (\x -> concat . flip intersperse x)
-                     <$> sequence [tillSpecifier 's', rest]
-   replaceUrl   = try $ (\x -> concat . flip intersperse x . urlEncode)
-                     <$> sequence [tillSpecifier 'h', rest]
-   justAppend   = try $ (++) <$> rest
-
-   rest            = manyTill anyChar         (eof <|> () <$ oneOf "\n\r")
-   tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
-
---
--- Headers
---
-
--- | Headers
-header :: OrgParser (F Blocks)
-header = try $ do
-  level    <- headerStart
-  title    <- manyTill inline (lookAhead $ optional headerTags <* newline)
-  tags     <- option [] headerTags
-  newline
-  propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
-  inlines  <- runF (tagTitle title tags) <$> getState
-  attr     <- registerHeader propAttr inlines
-  return $ pure (B.headerWith attr level inlines)
- where
-   tagTitle :: [F Inlines] -> [String] -> F Inlines
-   tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags
-
-   tagToInlineF :: String -> F Inlines
-   tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
-
-   headerTags :: OrgParser [String]
-   headerTags = try $
-     let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
-     in skipSpaces
-          *> char ':'
-          *> many1 tag
-          <* skipSpaces
-
-
---
--- Tables
---
-
-data OrgTableRow = OrgContentRow (F [Blocks])
-                 | OrgAlignRow [Alignment]
-                 | OrgHlineRow
-
--- OrgTable is strongly related to the pandoc table ADT.  Using the same
--- (i.e. pandoc-global) ADT would mean that the reader would break if the
--- global structure was to be changed, which would be bad.  The final table
--- should be generated using a builder function.  Column widths aren't
--- implemented yet, so they are not tracked here.
-data OrgTable = OrgTable
-  { orgTableAlignments :: [Alignment]
-  , orgTableHeader     :: [Blocks]
-  , orgTableRows       :: [[Blocks]]
-  }
-
-table :: OrgParser (F Blocks)
-table = try $ do
-  blockAttrs <- blockAttributes
-  lookAhead tableStart
-  do
-    rows <- tableRows
-    let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
-    return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
-
-orgToPandocTable :: OrgTable
-                 -> Inlines
-                 -> Blocks
-orgToPandocTable (OrgTable aligns heads lns) caption =
-  B.table caption (zip aligns $ repeat 0) heads lns
-
-tableRows :: OrgParser [OrgTableRow]
-tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
-
-tableContentRow :: OrgParser OrgTableRow
-tableContentRow = try $
-  OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
-
-tableContentCell :: OrgParser (F Blocks)
-tableContentCell = try $
-  fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
-
-tableAlignRow :: OrgParser OrgTableRow
-tableAlignRow = try $ do
-  tableStart
-  cells <- many1Till tableAlignCell newline
-  -- Empty rows are regular (i.e. content) rows, not alignment rows.
-  guard $ any (/= AlignDefault) cells
-  return $ OrgAlignRow cells
-
-tableAlignCell :: OrgParser Alignment
-tableAlignCell =
-  choice [ try $ emptyCell *> return AlignDefault
-         , try $ skipSpaces
-                   *> char '<'
-                   *> tableAlignFromChar
-                   <* many digit
-                   <* char '>'
-                   <* emptyCell
-         ] <?> "alignment info"
-    where emptyCell = try $ skipSpaces *> endOfCell
-
-tableAlignFromChar :: OrgParser Alignment
-tableAlignFromChar = try $
-  choice [ char 'l' *> return AlignLeft
-         , char 'c' *> return AlignCenter
-         , char 'r' *> return AlignRight
-         ]
-
-tableHline :: OrgParser OrgTableRow
-tableHline = try $
-  OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
-
-endOfCell :: OrgParser Char
-endOfCell = try $ char '|' <|> lookAhead newline
-
-rowsToTable :: [OrgTableRow]
-            -> F OrgTable
-rowsToTable = foldM rowToContent emptyTable
- where emptyTable = OrgTable mempty mempty mempty
-
-normalizeTable :: OrgTable -> OrgTable
-normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows
- where
-   refRow = if heads /= mempty
-            then heads
-            else if rows == mempty then mempty else head rows
-   cols = length refRow
-   fillColumns base padding = take cols $ base ++ repeat padding
-   aligns' = fillColumns aligns AlignDefault
-
--- One or more horizontal rules after the first content line mark the previous
--- line as a header.  All other horizontal lines are discarded.
-rowToContent :: OrgTable
-             -> OrgTableRow
-             -> F OrgTable
-rowToContent orgTable row =
-  case row of
-    OrgHlineRow       -> return singleRowPromotedToHeader
-    OrgAlignRow as    -> return . setAligns $ as
-    OrgContentRow cs  -> appendToBody cs
- where
-   singleRowPromotedToHeader :: OrgTable
-   singleRowPromotedToHeader = case orgTable of
-     OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
-            orgTable{ orgTableHeader = b , orgTableRows = [] }
-     _   -> orgTable
-
-   setAligns :: [Alignment] -> OrgTable
-   setAligns aligns = orgTable{ orgTableAlignments = aligns }
-
-   appendToBody :: F [Blocks] -> F OrgTable
-   appendToBody frow = do
-     newRow <- frow
-     let oldRows = orgTableRows orgTable
-     -- NOTE: This is an inefficient O(n) operation.  This should be changed
-     -- if performance ever becomes a problem.
-     return orgTable{ orgTableRows = oldRows ++ [newRow] }
-
-
---
--- LaTeX fragments
---
-latexFragment :: OrgParser (F Blocks)
-latexFragment = try $ do
-  envName <- latexEnvStart
-  content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
-  return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
- where
-   c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
-                              , c
-                              , "\\end{", e, "}\n"
-                              ]
-
-latexEnd :: String -> OrgParser ()
-latexEnd envName = try $
-  () <$ skipSpaces
-     <* string ("\\end{" ++ envName ++ "}")
-     <* blankline
-
-
---
--- Footnote defintions
---
-noteBlock :: OrgParser (F Blocks)
-noteBlock = try $ do
-  ref <- noteMarker <* skipSpaces
-  content <- mconcat <$> blocksTillHeaderOrNote
-  addToNotesTable (ref, content)
-  return mempty
- where
-   blocksTillHeaderOrNote =
-     many1Till block (eof <|> () <$ lookAhead noteMarker
-                          <|> () <$ lookAhead headerStart)
-
--- Paragraphs or Plain text
-paraOrPlain :: OrgParser (F Blocks)
-paraOrPlain = try $ do
-  ils <- parseInlines
-  nl <- option False (newline *> return True)
-  -- Read block as paragraph, except if we are in a list context and the block
-  -- is directly followed by a list item, in which case the block is read as
-  -- plain text.
-  try (guard nl
-       *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
-       *> return (B.para <$> ils))
-    <|>  (return (B.plain <$> ils))
-
-inlinesTillNewline :: OrgParser (F Inlines)
-inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
-
-
---
--- list blocks
---
-
-list :: OrgParser (F Blocks)
-list = choice [ definitionList, bulletList, orderedList ] <?> "list"
-
-definitionList :: OrgParser (F Blocks)
-definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
-                          fmap B.definitionList . fmap compactify'DL . sequence
-                            <$> many1 (definitionListItem $ bulletListStart' (Just n))
-
-bulletList :: OrgParser (F Blocks)
-bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
-                      fmap B.bulletList . fmap compactify' . sequence
-                        <$> many1 (listItem (bulletListStart' $ Just n))
-
-orderedList :: OrgParser (F Blocks)
-orderedList = fmap B.orderedList . fmap compactify' . sequence
-              <$> many1 (listItem orderedListStart)
-
-bulletListStart' :: Maybe Int -> OrgParser Int
--- returns length of bulletList prefix, inclusive of marker
-bulletListStart' Nothing  = do ind <- length <$> many spaceChar
-                               oneOf (bullets $ ind == 0)
-                               skipSpaces1
-                               return (ind + 1)
-bulletListStart' (Just n) = do count (n-1) spaceChar
-                               oneOf (bullets $ n == 1)
-                               many1 spaceChar
-                               return n
-
--- Unindented lists are legal, but they can't use '*' bullets.
--- We return n to maintain compatibility with the generic listItem.
-bullets :: Bool -> String
-bullets unindented = if unindented then "+-" else "*+-"
-
-definitionListItem :: OrgParser Int
-                   -> OrgParser (F (Inlines, [Blocks]))
-definitionListItem parseMarkerGetLength = try $ do
-  markerLength <- parseMarkerGetLength
-  term <- manyTill (noneOf "\n\r") (try definitionMarker)
-  line1 <- anyLineNewline
-  blank <- option "" ("\n" <$ blankline)
-  cont <- concat <$> many (listContinuation markerLength)
-  term' <- parseFromString parseInlines term
-  contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
-  return $ (,) <$> term' <*> fmap (:[]) contents'
- where
-   definitionMarker =
-     spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
-
-
--- parse raw text for one list item, excluding start marker and continuations
-listItem :: OrgParser Int
-         -> OrgParser (F Blocks)
-listItem start = try . withContext ListItemState $ do
-  markerLength <- try start
-  firstLine <- anyLineNewline
-  blank <- option "" ("\n" <$ blankline)
-  rest <- concat <$> many (listContinuation markerLength)
-  parseFromString parseBlocks $ firstLine ++ blank ++ rest
-
--- continuation of a list item - indented and separated by blankline or endline.
--- Note: nested lists are parsed as continuations.
-listContinuation :: Int
-                 -> OrgParser String
-listContinuation markerLength = try $
-  notFollowedBy' blankline
-  *> (mappend <$> (concat <$> many1 listLine)
-              <*> many blankline)
- where listLine = try $ indentWith markerLength *> anyLineNewline
-
--- | Parse any line, include the final newline in the output.
-anyLineNewline :: OrgParser String
-anyLineNewline = (++ "\n") <$> anyLine
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
new file mode 100644
index 000000000..b374acfe2
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -0,0 +1,891 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Org.Options
+   Copyright   : Copyright (C) 2014-2016 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode block elements.
+-}
+module Text.Pandoc.Readers.Org.Blocks
+  ( blockList
+  , meta
+  ) where
+
+import           Text.Pandoc.Readers.Org.BlockStarts
+import           Text.Pandoc.Readers.Org.Inlines
+import           Text.Pandoc.Readers.Org.ParserState
+import           Text.Pandoc.Readers.Org.Parsing
+
+import qualified Text.Pandoc.Builder as B
+import           Text.Pandoc.Builder ( Inlines, Blocks )
+import           Text.Pandoc.Definition
+import           Text.Pandoc.Compat.Monoid ((<>))
+import           Text.Pandoc.Options
+import           Text.Pandoc.Shared ( compactify', compactify'DL )
+
+import           Control.Arrow ( first )
+import           Control.Monad ( foldM, guard, mzero )
+import           Data.Char ( toLower, toUpper)
+import           Data.List ( foldl', intersperse, isPrefixOf )
+import qualified Data.Map as M
+import           Data.Maybe ( fromMaybe, isNothing )
+import           Network.HTTP ( urlEncode )
+
+
+--
+-- parsing blocks
+--
+
+-- | Get a list of blocks.
+blockList :: OrgParser [Block]
+blockList = do
+  blocks' <- blocks
+  st      <- getState
+  return . B.toList $ runF blocks' st
+
+-- | Get the meta information safed in the state.
+meta :: OrgParser Meta
+meta = do
+  st <- getState
+  return $ runF (orgStateMeta' st) st
+
+blocks :: OrgParser (F Blocks)
+blocks = mconcat <$> manyTill block eof
+
+block :: OrgParser (F Blocks)
+block = choice [ mempty <$ blanklines
+               , table
+               , orgBlock
+               , figure
+               , example
+               , genericDrawer
+               , specialLine
+               , header
+               , horizontalRule
+               , list
+               , latexFragment
+               , noteBlock
+               , paraOrPlain
+               ] <?> "block"
+
+
+--
+-- Block Attributes
+--
+
+-- | Attributes that may be added to figures (like a name or caption).
+data BlockAttributes = BlockAttributes
+  { blockAttrName      :: Maybe String
+  , blockAttrCaption   :: Maybe (F Inlines)
+  , blockAttrKeyValues :: [(String, String)]
+  }
+
+stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String)
+stringyMetaAttribute attrCheck = try $ do
+  metaLineStart
+  attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
+  guard $ attrCheck attrName
+  skipSpaces
+  attrValue <- anyLine
+  return (attrName, attrValue)
+
+blockAttributes :: OrgParser BlockAttributes
+blockAttributes = try $ do
+  kv <- many (stringyMetaAttribute attrCheck)
+  let caption = foldl' (appendValues "CAPTION") Nothing kv
+  let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
+  let name    = lookup "NAME" kv
+  caption' <- maybe (return Nothing)
+                    (fmap Just . parseFromString parseInlines)
+                    caption
+  kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
+  return $ BlockAttributes
+           { blockAttrName = name
+           , blockAttrCaption = caption'
+           , blockAttrKeyValues = kvAttrs'
+           }
+ where
+   attrCheck :: String -> Bool
+   attrCheck attr =
+     case attr of
+       "NAME"      -> True
+       "CAPTION"   -> True
+       "ATTR_HTML" -> True
+       _           -> False
+
+   appendValues :: String -> Maybe String -> (String, String) -> Maybe String
+   appendValues attrName accValue (key, value) =
+     if key /= attrName
+     then accValue
+     else case accValue of
+            Just acc -> Just $ acc ++ ' ':value
+            Nothing  -> Just value
+
+keyValues :: OrgParser [(String, String)]
+keyValues = try $
+  manyTill ((,) <$> key <*> value) newline
+ where
+   key :: OrgParser String
+   key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
+
+   value :: OrgParser String
+   value = skipSpaces *> manyTill anyChar endOfValue
+
+   endOfValue :: OrgParser ()
+   endOfValue =
+     lookAhead $ (() <$ try (many1 spaceChar <* key))
+              <|> () <$ newline
+
+
+--
+-- Org Blocks (#+BEGIN_... / #+END_...)
+--
+
+type BlockProperties = (Int, String)  -- (Indentation, Block-Type)
+
+updateIndent :: BlockProperties -> Int -> BlockProperties
+updateIndent (_, blkType) indent = (indent, blkType)
+
+orgBlock :: OrgParser (F Blocks)
+orgBlock = try $ do
+  blockAttrs <- blockAttributes
+  blockProp@(_, blkType) <- blockHeaderStart
+  ($ blockProp) $
+    case blkType of
+      "comment" -> withRaw'   (const mempty)
+      "html"    -> withRaw'   (return . (B.rawBlock blkType))
+      "latex"   -> withRaw'   (return . (B.rawBlock blkType))
+      "ascii"   -> withRaw'   (return . (B.rawBlock blkType))
+      "example" -> withRaw'   (return . exampleCode)
+      "quote"   -> withParsed (fmap B.blockQuote)
+      "verse"   -> verseBlock
+      "src"     -> codeBlock blockAttrs
+      _         -> withParsed (fmap $ divWithClass blkType)
+
+blockHeaderStart :: OrgParser (Int, String)
+blockHeaderStart = try $ (,) <$> indentation <*> blockType
+ where
+  blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
+
+indentation :: OrgParser Int
+indentation = try $ do
+  tabStop  <- getOption readerTabStop
+  s        <- many spaceChar
+  return $ spaceLength tabStop s
+
+spaceLength :: Int -> String -> Int
+spaceLength tabStop s = (sum . map charLen) s
+ where
+  charLen ' '  = 1
+  charLen '\t' = tabStop
+  charLen _    = 0
+
+withRaw'   :: (String   -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withRaw'   f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
+
+withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
+
+ignHeaders :: OrgParser ()
+ignHeaders = (() <$ newline) <|> (() <$ anyLine)
+
+divWithClass :: String -> Blocks -> Blocks
+divWithClass cls = B.divWith ("", [cls], [])
+
+verseBlock :: BlockProperties -> OrgParser (F Blocks)
+verseBlock blkProp = try $ do
+  ignHeaders
+  content <- rawBlockContent blkProp
+  fmap B.para . mconcat . intersperse (pure B.linebreak)
+    <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content)
+
+exportsCode :: [(String, String)] -> Bool
+exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
+                         || ("rundoc-exports", "results") `elem` attrs)
+
+exportsResults :: [(String, String)] -> Bool
+exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
+                       || ("rundoc-exports", "both") `elem` attrs
+
+followingResultsBlock :: OrgParser (Maybe (F Blocks))
+followingResultsBlock =
+       optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
+                                     *> blankline
+                                     *> block)
+
+codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks)
+codeBlock blockAttrs blkProp = do
+  skipSpaces
+  (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+  leadingIndent     <- lookAhead indentation
+  content           <- rawBlockContent (updateIndent blkProp leadingIndent)
+  resultsContent    <- followingResultsBlock
+  let id'            = fromMaybe mempty $ blockAttrName blockAttrs
+  let includeCode    = exportsCode kv
+  let includeResults = exportsResults kv
+  let codeBlck       = B.codeBlockWith ( id', classes, kv ) content
+  let labelledBlck   = maybe (pure codeBlck)
+                             (labelDiv codeBlck)
+                             (blockAttrCaption blockAttrs)
+  let resultBlck     = fromMaybe mempty resultsContent
+  return $ (if includeCode then labelledBlck else mempty)
+           <> (if includeResults then resultBlck else mempty)
+ where
+   labelDiv blk value =
+       B.divWith nullAttr <$> (mappend <$> labelledBlock value
+                                       <*> pure blk)
+   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+
+rawBlockContent :: BlockProperties -> OrgParser String
+rawBlockContent (indent, blockType) = try $
+  unlines . map commaEscaped <$> manyTill indentedLine blockEnder
+ where
+   indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
+   blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType)
+
+parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
+parsedBlockContent blkProps = try $ do
+  raw <- rawBlockContent blkProps
+  parseFromString blocks (raw ++ "\n")
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith :: Int -> OrgParser String
+indentWith num = do
+  tabStop <- getOption readerTabStop
+  if num < tabStop
+     then count num (char ' ')
+     else choice [ try (count num (char ' '))
+                 , try (char '\t' >> count (num - tabStop) (char ' ')) ]
+
+type SwitchOption = (Char, Maybe String)
+
+-- | Parse code block arguments
+-- TODO: We currently don't handle switches.
+codeHeaderArgs :: OrgParser ([String], [(String, String)])
+codeHeaderArgs = try $ do
+  language   <- skipSpaces *> orgArgWord
+  _          <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
+  parameters <- manyTill blockOption newline
+  let pandocLang = translateLang language
+  return $
+    if hasRundocParameters parameters
+    then ( [ pandocLang, rundocBlockClass ]
+         , map toRundocAttrib (("language", language) : parameters)
+         )
+    else ([ pandocLang ], parameters)
+ where
+   hasRundocParameters = not . null
+   toRundocAttrib = first ("rundoc-" ++)
+
+
+switch :: OrgParser SwitchOption
+switch = try $ simpleSwitch <|> lineNumbersSwitch
+ where
+   simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
+   lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
+                       (string "-l \"" *> many1Till nonspaceChar (char '"'))
+
+translateLang :: String -> String
+translateLang "C"          = "c"
+translateLang "C++"        = "cpp"
+translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
+translateLang "js"         = "javascript"
+translateLang "lisp"       = "commonlisp"
+translateLang "R"          = "r"
+translateLang "sh"         = "bash"
+translateLang "sqlite"     = "sql"
+translateLang cs = cs
+
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+blockOption :: OrgParser (String, String)
+blockOption = try $ do
+  argKey <- orgArgKey
+  paramValue <- option "yes" orgParamValue
+  return (argKey, paramValue)
+
+orgParamValue :: OrgParser String
+orgParamValue = try $
+  skipSpaces
+    *> notFollowedBy (char ':' )
+    *> many1 (noneOf "\t\n\r ")
+    <* skipSpaces
+
+commaEscaped :: String -> String
+commaEscaped (',':cs@('*':_))     = cs
+commaEscaped (',':cs@('#':'+':_)) = cs
+commaEscaped cs                   = cs
+
+example :: OrgParser (F Blocks)
+example = try $ do
+  return . return . exampleCode =<< unlines <$> many1 exampleLine
+
+exampleCode :: String -> Blocks
+exampleCode = B.codeBlockWith ("", ["example"], [])
+
+exampleLine :: OrgParser String
+exampleLine = try $ exampleLineStart *> anyLine
+
+horizontalRule :: OrgParser (F Blocks)
+horizontalRule = return B.horizontalRule <$ try hline
+
+
+--
+-- Drawers
+--
+
+-- | A generic drawer which has no special meaning for org-mode.
+-- Whether or not this drawer is included in the output depends on the drawers
+-- export setting.
+genericDrawer :: OrgParser (F Blocks)
+genericDrawer = try $ do
+  name    <- map toUpper <$> drawerStart
+  content <- manyTill drawerLine (try drawerEnd)
+  state   <- getState
+  -- Include drawer if it is explicitly included in or not explicitly excluded
+  -- from the list of drawers that should be exported.  PROPERTIES drawers are
+  -- never exported.
+  case (exportDrawers . orgStateExportSettings $ state) of
+    _           | name == "PROPERTIES" -> return mempty
+    Left  names | name `elem`    names -> return mempty
+    Right names | name `notElem` names -> return mempty
+    _                                  -> drawerDiv name <$> parseLines content
+ where
+  parseLines :: [String] -> OrgParser (F Blocks)
+  parseLines = parseFromString blocks . (++ "\n") . unlines
+
+  drawerDiv :: String -> F Blocks -> F Blocks
+  drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
+
+drawerLine :: OrgParser String
+drawerLine = anyLine
+
+drawerEnd :: OrgParser String
+drawerEnd = try $
+  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+
+-- | Read a :PROPERTIES: drawer and return the key/value pairs contained
+-- within.
+propertiesDrawer :: OrgParser [(String, String)]
+propertiesDrawer = try $ do
+  drawerType <- drawerStart
+  guard $ map toUpper drawerType == "PROPERTIES"
+  manyTill property (try drawerEnd)
+ where
+   property :: OrgParser (String, String)
+   property = try $ (,) <$> key <*> value
+
+   key :: OrgParser String
+   key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
+
+   value :: OrgParser String
+   value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
+
+keyValuesToAttr :: [(String, String)] -> Attr
+keyValuesToAttr kvs =
+  let
+    lowerKvs = map (\(k, v) -> (map toLower k, v)) kvs
+    id'  = fromMaybe mempty . lookup "custom_id" $ lowerKvs
+    cls  = fromMaybe mempty . lookup "class"     $ lowerKvs
+    kvs' = filter (flip notElem ["custom_id", "class"] . fst) lowerKvs
+  in
+    (id', words cls, kvs')
+
+
+--
+-- Figures
+--
+
+-- | Figures (Image on a line by itself, preceded by name and/or caption)
+figure :: OrgParser (F Blocks)
+figure = try $ do
+  figAttrs <- blockAttributes
+  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
+  guard . not . isNothing . blockAttrCaption $ figAttrs
+  guard (isImageFilename src)
+  let figName    = fromMaybe mempty $ blockAttrName figAttrs
+  let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
+  let figKeyVals = blockAttrKeyValues figAttrs
+  let attr       = (mempty, mempty, figKeyVals)
+  return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
+ where
+   withFigPrefix :: String -> String
+   withFigPrefix cs =
+     if "fig:" `isPrefixOf` cs
+     then cs
+     else "fig:" ++ cs
+
+   selfTarget :: OrgParser String
+   selfTarget = try $ char '[' *> linkTarget <* char ']'
+
+
+--
+-- Comments, Options and Metadata
+--
+
+addLinkFormat :: String
+              -> (String -> String)
+              -> OrgParser ()
+addLinkFormat key formatter = updateState $ \s ->
+  let fs = orgStateLinkFormatters s
+  in s{ orgStateLinkFormatters = M.insert key formatter fs }
+
+specialLine :: OrgParser (F Blocks)
+specialLine = fmap return . try $ metaLine <|> commentLine
+
+-- The order, in which blocks are tried, makes sure that we're not looking at
+-- the beginning of a block, so we don't need to check for it
+metaLine :: OrgParser Blocks
+metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
+
+commentLine :: OrgParser Blocks
+commentLine = commentLineStart *> anyLine *> pure mempty
+
+declarationLine :: OrgParser ()
+declarationLine = try $ do
+  key <- metaKey
+  inlinesF <- metaInlines
+  updateState $ \st ->
+    let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
+    in st { orgStateMeta' = orgStateMeta' st <> meta' }
+  return ()
+
+metaInlines :: OrgParser (F MetaValue)
+metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
+
+metaKey :: OrgParser String
+metaKey = map toLower <$> many1 (noneOf ": \n\r")
+                      <*  char ':'
+                      <*  skipSpaces
+
+optionLine :: OrgParser ()
+optionLine = try $ do
+  key <- metaKey
+  case key of
+    "link"    -> parseLinkFormat >>= uncurry addLinkFormat
+    "options" -> () <$ sepBy spaces exportSetting
+    _         -> mzero
+
+--
+-- Export Settings
+--
+
+-- | Read and process org-mode specific export options.
+exportSetting :: OrgParser ()
+exportSetting = choice
+  [ booleanSetting "^" setExportSubSuperscripts
+  , ignoredSetting "'"
+  , ignoredSetting "*"
+  , ignoredSetting "-"
+  , ignoredSetting ":"
+  , ignoredSetting "<"
+  , ignoredSetting "\\n"
+  , ignoredSetting "arch"
+  , ignoredSetting "author"
+  , ignoredSetting "c"
+  , ignoredSetting "creator"
+  , complementableListSetting "d" setExportDrawers
+  , ignoredSetting "date"
+  , ignoredSetting "e"
+  , ignoredSetting "email"
+  , ignoredSetting "f"
+  , ignoredSetting "H"
+  , ignoredSetting "inline"
+  , ignoredSetting "num"
+  , ignoredSetting "p"
+  , ignoredSetting "pri"
+  , ignoredSetting "prop"
+  , ignoredSetting "stat"
+  , ignoredSetting "tags"
+  , ignoredSetting "tasks"
+  , ignoredSetting "tex"
+  , ignoredSetting "timestamp"
+  , ignoredSetting "title"
+  , ignoredSetting "toc"
+  , ignoredSetting "todo"
+  , ignoredSetting "|"
+  ] <?> "export setting"
+
+booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser ()
+booleanSetting settingIdentifier setter = try $ do
+  string settingIdentifier
+  char ':'
+  value <- elispBoolean
+  updateState $ modifyExportSettings setter value
+
+-- | Read an elisp boolean.  Only NIL is treated as false, non-NIL values are
+-- interpreted as true.
+elispBoolean :: OrgParser Bool
+elispBoolean = try $ do
+  value <- many1 nonspaceChar
+  return $ case map toLower value of
+             "nil" -> False
+             "{}"  -> False
+             "()"  -> False
+             _     -> True
+
+-- | A list or a complement list (i.e. a list starting with `not`).
+complementableListSetting :: String
+                          -> ExportSettingSetter (Either [String] [String])
+                          -> OrgParser ()
+complementableListSetting settingIdentifier setter = try $ do
+  _     <- string settingIdentifier <* char ':'
+  value <- choice [ Left <$> complementStringList
+                  , Right <$> stringList
+                  , (\b -> if b then Left [] else Right []) <$> elispBoolean
+                  ]
+  updateState $ modifyExportSettings setter value
+ where
+   -- Read a plain list of strings.
+   stringList :: OrgParser [String]
+   stringList = try $
+     char '('
+       *> sepBy elispString spaces
+       <* char ')'
+
+   -- Read an emacs lisp list specifying a complement set.
+   complementStringList :: OrgParser [String]
+   complementStringList = try $
+     string "(not "
+       *> sepBy elispString spaces
+       <* char ')'
+
+   elispString :: OrgParser String
+   elispString = try $
+     char '"'
+       *> manyTill alphaNum (char '"')
+
+ignoredSetting :: String -> OrgParser ()
+ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
+
+
+parseLinkFormat :: OrgParser ((String, String -> String))
+parseLinkFormat = try $ do
+  linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+  linkSubst <- parseFormat
+  return (linkType, linkSubst)
+
+-- | An ad-hoc, single-argument-only implementation of a printf-style format
+-- parser.
+parseFormat :: OrgParser (String -> String)
+parseFormat = try $ do
+  replacePlain <|> replaceUrl <|> justAppend
+ where
+   -- inefficient, but who cares
+   replacePlain = try $ (\x -> concat . flip intersperse x)
+                     <$> sequence [tillSpecifier 's', rest]
+   replaceUrl   = try $ (\x -> concat . flip intersperse x . urlEncode)
+                     <$> sequence [tillSpecifier 'h', rest]
+   justAppend   = try $ (++) <$> rest
+
+   rest            = manyTill anyChar         (eof <|> () <$ oneOf "\n\r")
+   tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+
+--
+-- Headers
+--
+
+-- | Headers
+header :: OrgParser (F Blocks)
+header = try $ do
+  level    <- headerStart
+  title    <- manyTill inline (lookAhead $ optional headerTags <* newline)
+  tags     <- option [] headerTags
+  newline
+  propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
+  inlines  <- runF (tagTitle title tags) <$> getState
+  attr     <- registerHeader propAttr inlines
+  return $ pure (B.headerWith attr level inlines)
+ where
+   tagTitle :: [F Inlines] -> [String] -> F Inlines
+   tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags
+
+   tagToInlineF :: String -> F Inlines
+   tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
+
+   headerTags :: OrgParser [String]
+   headerTags = try $
+     let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+     in skipSpaces
+          *> char ':'
+          *> many1 tag
+          <* skipSpaces
+
+
+--
+-- Tables
+--
+
+data OrgTableRow = OrgContentRow (F [Blocks])
+                 | OrgAlignRow [Alignment]
+                 | OrgHlineRow
+
+-- OrgTable is strongly related to the pandoc table ADT.  Using the same
+-- (i.e. pandoc-global) ADT would mean that the reader would break if the
+-- global structure was to be changed, which would be bad.  The final table
+-- should be generated using a builder function.  Column widths aren't
+-- implemented yet, so they are not tracked here.
+data OrgTable = OrgTable
+  { orgTableAlignments :: [Alignment]
+  , orgTableHeader     :: [Blocks]
+  , orgTableRows       :: [[Blocks]]
+  }
+
+table :: OrgParser (F Blocks)
+table = try $ do
+  blockAttrs <- blockAttributes
+  lookAhead tableStart
+  do
+    rows <- tableRows
+    let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
+    return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
+
+orgToPandocTable :: OrgTable
+                 -> Inlines
+                 -> Blocks
+orgToPandocTable (OrgTable aligns heads lns) caption =
+  B.table caption (zip aligns $ repeat 0) heads lns
+
+tableRows :: OrgParser [OrgTableRow]
+tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
+
+tableContentRow :: OrgParser OrgTableRow
+tableContentRow = try $
+  OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
+
+tableContentCell :: OrgParser (F Blocks)
+tableContentCell = try $
+  fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
+
+tableAlignRow :: OrgParser OrgTableRow
+tableAlignRow = try $ do
+  tableStart
+  cells <- many1Till tableAlignCell newline
+  -- Empty rows are regular (i.e. content) rows, not alignment rows.
+  guard $ any (/= AlignDefault) cells
+  return $ OrgAlignRow cells
+
+tableAlignCell :: OrgParser Alignment
+tableAlignCell =
+  choice [ try $ emptyCell *> return AlignDefault
+         , try $ skipSpaces
+                   *> char '<'
+                   *> tableAlignFromChar
+                   <* many digit
+                   <* char '>'
+                   <* emptyCell
+         ] <?> "alignment info"
+    where emptyCell = try $ skipSpaces *> endOfCell
+
+tableAlignFromChar :: OrgParser Alignment
+tableAlignFromChar = try $
+  choice [ char 'l' *> return AlignLeft
+         , char 'c' *> return AlignCenter
+         , char 'r' *> return AlignRight
+         ]
+
+tableHline :: OrgParser OrgTableRow
+tableHline = try $
+  OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+
+endOfCell :: OrgParser Char
+endOfCell = try $ char '|' <|> lookAhead newline
+
+rowsToTable :: [OrgTableRow]
+            -> F OrgTable
+rowsToTable = foldM rowToContent emptyTable
+ where emptyTable = OrgTable mempty mempty mempty
+
+normalizeTable :: OrgTable -> OrgTable
+normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows
+ where
+   refRow = if heads /= mempty
+            then heads
+            else if rows == mempty then mempty else head rows
+   cols = length refRow
+   fillColumns base padding = take cols $ base ++ repeat padding
+   aligns' = fillColumns aligns AlignDefault
+
+-- One or more horizontal rules after the first content line mark the previous
+-- line as a header.  All other horizontal lines are discarded.
+rowToContent :: OrgTable
+             -> OrgTableRow
+             -> F OrgTable
+rowToContent orgTable row =
+  case row of
+    OrgHlineRow       -> return singleRowPromotedToHeader
+    OrgAlignRow as    -> return . setAligns $ as
+    OrgContentRow cs  -> appendToBody cs
+ where
+   singleRowPromotedToHeader :: OrgTable
+   singleRowPromotedToHeader = case orgTable of
+     OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+            orgTable{ orgTableHeader = b , orgTableRows = [] }
+     _   -> orgTable
+
+   setAligns :: [Alignment] -> OrgTable
+   setAligns aligns = orgTable{ orgTableAlignments = aligns }
+
+   appendToBody :: F [Blocks] -> F OrgTable
+   appendToBody frow = do
+     newRow <- frow
+     let oldRows = orgTableRows orgTable
+     -- NOTE: This is an inefficient O(n) operation.  This should be changed
+     -- if performance ever becomes a problem.
+     return orgTable{ orgTableRows = oldRows ++ [newRow] }
+
+
+--
+-- LaTeX fragments
+--
+latexFragment :: OrgParser (F Blocks)
+latexFragment = try $ do
+  envName <- latexEnvStart
+  content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
+  return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
+ where
+   c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
+                              , c
+                              , "\\end{", e, "}\n"
+                              ]
+
+latexEnd :: String -> OrgParser ()
+latexEnd envName = try $
+  () <$ skipSpaces
+     <* string ("\\end{" ++ envName ++ "}")
+     <* blankline
+
+
+--
+-- Footnote defintions
+--
+noteBlock :: OrgParser (F Blocks)
+noteBlock = try $ do
+  ref <- noteMarker <* skipSpaces
+  content <- mconcat <$> blocksTillHeaderOrNote
+  addToNotesTable (ref, content)
+  return mempty
+ where
+   blocksTillHeaderOrNote =
+     many1Till block (eof <|> () <$ lookAhead noteMarker
+                          <|> () <$ lookAhead headerStart)
+
+-- Paragraphs or Plain text
+paraOrPlain :: OrgParser (F Blocks)
+paraOrPlain = try $ do
+  ils <- parseInlines
+  nl <- option False (newline *> return True)
+  -- Read block as paragraph, except if we are in a list context and the block
+  -- is directly followed by a list item, in which case the block is read as
+  -- plain text.
+  try (guard nl
+       *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
+       *> return (B.para <$> ils))
+    <|>  (return (B.plain <$> ils))
+
+inlinesTillNewline :: OrgParser (F Inlines)
+inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
+
+
+--
+-- list blocks
+--
+
+list :: OrgParser (F Blocks)
+list = choice [ definitionList, bulletList, orderedList ] <?> "list"
+
+definitionList :: OrgParser (F Blocks)
+definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
+                          fmap B.definitionList . fmap compactify'DL . sequence
+                            <$> many1 (definitionListItem $ bulletListStart' (Just n))
+
+bulletList :: OrgParser (F Blocks)
+bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
+                      fmap B.bulletList . fmap compactify' . sequence
+                        <$> many1 (listItem (bulletListStart' $ Just n))
+
+orderedList :: OrgParser (F Blocks)
+orderedList = fmap B.orderedList . fmap compactify' . sequence
+              <$> many1 (listItem orderedListStart)
+
+bulletListStart' :: Maybe Int -> OrgParser Int
+-- returns length of bulletList prefix, inclusive of marker
+bulletListStart' Nothing  = do ind <- length <$> many spaceChar
+                               oneOf (bullets $ ind == 0)
+                               skipSpaces1
+                               return (ind + 1)
+bulletListStart' (Just n) = do count (n-1) spaceChar
+                               oneOf (bullets $ n == 1)
+                               many1 spaceChar
+                               return n
+
+-- Unindented lists are legal, but they can't use '*' bullets.
+-- We return n to maintain compatibility with the generic listItem.
+bullets :: Bool -> String
+bullets unindented = if unindented then "+-" else "*+-"
+
+definitionListItem :: OrgParser Int
+                   -> OrgParser (F (Inlines, [Blocks]))
+definitionListItem parseMarkerGetLength = try $ do
+  markerLength <- parseMarkerGetLength
+  term <- manyTill (noneOf "\n\r") (try definitionMarker)
+  line1 <- anyLineNewline
+  blank <- option "" ("\n" <$ blankline)
+  cont <- concat <$> many (listContinuation markerLength)
+  term' <- parseFromString parseInlines term
+  contents' <- parseFromString blocks $ line1 ++ blank ++ cont
+  return $ (,) <$> term' <*> fmap (:[]) contents'
+ where
+   definitionMarker =
+     spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
+
+
+-- parse raw text for one list item, excluding start marker and continuations
+listItem :: OrgParser Int
+         -> OrgParser (F Blocks)
+listItem start = try . withContext ListItemState $ do
+  markerLength <- try start
+  firstLine <- anyLineNewline
+  blank <- option "" ("\n" <$ blankline)
+  rest <- concat <$> many (listContinuation markerLength)
+  parseFromString blocks $ firstLine ++ blank ++ rest
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+                 -> OrgParser String
+listContinuation markerLength = try $
+  notFollowedBy' blankline
+  *> (mappend <$> (concat <$> many1 listLine)
+              <*> many blankline)
+ where listLine = try $ indentWith markerLength *> anyLineNewline
+
+-- | Parse any line, include the final newline in the output.
+anyLineNewline :: OrgParser String
+anyLineNewline = (++ "\n") <$> anyLine
-- 
cgit v1.2.3