Merge branch 'master' into groff_reader

author: Yan Pas <yanp.bugz@gmail.com> 2018-10-07 18:10:01 +0300
committer: Yan Pas <yanp.bugz@gmail.com> 2018-10-07 18:10:01 +0300
commit: 27467189ab184c5d098e244e01f7d1bfdb0d4d45 (patch)
tree: d1fb96ebbc49ee0c4e73ef354feddd521690d545 /src/Text/Pandoc/Readers
parent: 4f3dd3b1af7217214287ab886147c5e33a54774d (diff)
parent: bd8a66394bc25b52dca9ffd963a560a4ca492f9c (diff)
download: pandoc-27467189ab184c5d098e244e01f7d1bfdb0d4d45.tar.gz
34 files changed, 2090 insertions, 1595 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 79a4abbc2..9c4f7a8ac 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -39,10 +39,12 @@ import Control.Monad.State
 import Data.Char (isAlphaNum, isLetter, isSpace, toLower)
 import Data.List (groupBy)
 import qualified Data.Map as Map
+import Data.Maybe (mapMaybe)
 import Data.Text (Text, unpack)
+import Text.Pandoc.Asciify (toAsciiChar)
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Emoji (emojiToInline)
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (stringify)
 import Text.Pandoc.Walk (walkM)
@@ -51,7 +53,7 @@ import Text.Pandoc.Walk (walkM)
 readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readCommonMark opts s = return $
   (if isEnabled Ext_gfm_auto_identifiers opts
-      then addHeaderIdentifiers
+      then addHeaderIdentifiers opts
       else id) $
   nodeToPandoc opts $ commonmarkToNode opts' exts s
   where opts' = [ optSmart | isEnabled Ext_smart opts ]
@@ -59,24 +61,27 @@ readCommonMark opts s = return $
                [ extTable | isEnabled Ext_pipe_tables opts ] ++
                [ extAutolink | isEnabled Ext_autolink_bare_uris opts ]
 
-convertEmojis :: String -> String
-convertEmojis (':':xs) =
+convertEmojis :: String -> [Inline]
+convertEmojis s@(':':xs) =
    case break (==':') xs of
         (ys,':':zs) ->
-           case Map.lookup ys emojis of
-                Just s  -> s ++ convertEmojis zs
-                Nothing -> ':' : ys ++ convertEmojis (':':zs)
-        _ -> ':':xs
-convertEmojis (x:xs) = x : convertEmojis xs
-convertEmojis [] = []
-
-addHeaderIdentifiers :: Pandoc -> Pandoc
-addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty
-
-addHeaderId :: Block -> State (Map.Map String Int) Block
-addHeaderId (Header lev (_,classes,kvs) ils) = do
+           case emojiToInline ys of
+                Just em -> em : convertEmojis zs
+                Nothing -> Str (':' : ys) : convertEmojis (':':zs)
+        _ -> [Str s]
+convertEmojis s =
+  case break (==':') s of
+    ("","") -> []
+    (_,"") -> [Str s]
+    (xs,ys) -> Str xs:convertEmojis ys
+
+addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
+addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
+
+addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block
+addHeaderId opts (Header lev (_,classes,kvs) ils) = do
   idmap <- get
-  let ident = toIdent ils
+  let ident = toIdent opts ils
   ident' <- case Map.lookup ident idmap of
                  Nothing -> do
                    put (Map.insert ident 1 idmap)
@@ -85,13 +90,16 @@ addHeaderId (Header lev (_,classes,kvs) ils) = do
                    put (Map.adjust (+ 1) ident idmap)
                    return (ident ++ "-" ++ show i)
   return $ Header lev (ident',classes,kvs) ils
-addHeaderId x = return x
+addHeaderId _ x = return x
 
-toIdent :: [Inline] -> String
-toIdent =   map (\c -> if isSpace c then '-' else c)
-          . filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
-                           c == '_' || c == '-')
-          . map toLower . stringify
+toIdent :: ReaderOptions -> [Inline] -> String
+toIdent opts = map (\c -> if isSpace c then '-' else c)
+               . filterer
+               . map toLower . stringify
+  where filterer = if isEnabled Ext_ascii_identifiers opts
+                   then mapMaybe toAsciiChar
+                   else filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
+                                      c == '_' || c == '-')
 
 nodeToPandoc :: ReaderOptions -> Node -> Pandoc
 nodeToPandoc opts (Node _ DOCUMENT nodes) =
@@ -200,17 +208,17 @@ addInlines :: ReaderOptions -> [Node] -> [Inline]
 addInlines opts = foldr (addInline opts) []
 
 addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
-addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++)
+addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++)
   where raw = unpack t
         clumps = groupBy samekind raw
         samekind ' ' ' ' = True
         samekind ' ' _   = False
         samekind _   ' ' = False
         samekind _  _    = True
-        toinl (' ':_) = Space
-        toinl xs      = Str $ if isEnabled Ext_emoji opts
-                                 then convertEmojis xs
-                                 else xs
+        toinl (' ':_) = [Space]
+        toinl xs      = if isEnabled Ext_emoji opts
+                        then convertEmojis xs
+                        else [Str xs]
 addInline _ (Node _ LINEBREAK _) = (LineBreak :)
 addInline opts (Node _ SOFTBREAK _)
   | isEnabled Ext_hard_line_breaks opts = (LineBreak :)
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 4fd38c0fd..a337bf937 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -2,7 +2,7 @@
 {-
   Copyright (C) 2017 Sascha Wilde <wilde@sha-bang.de>
 
-  partly based on all the other readers, especialy the work by
+  partly based on all the other readers, especially the work by
   John MacFarlane <jgm@berkeley.edu> and
   Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
   all bugs are solely created by me.
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3d48c7ee8..b7bd71754 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -537,7 +537,6 @@ type DB m = StateT DBState m
 data DBState = DBState{ dbSectionLevel :: Int
                       , dbQuoteType    :: QuoteType
                       , dbMeta         :: Meta
-                      , dbAcceptsMeta  :: Bool
                       , dbBook         :: Bool
                       , dbFigureTitle  :: Inlines
                       , dbContent      :: [Content]
@@ -547,7 +546,6 @@ instance Default DBState where
   def = DBState{ dbSectionLevel = 0
                , dbQuoteType = DoubleQuote
                , dbMeta = mempty
-               , dbAcceptsMeta = False
                , dbBook = False
                , dbFigureTitle = mempty
                , dbContent = [] }
@@ -609,18 +607,26 @@ named s e = qName (elName e) == s
 
 --
 
-acceptingMetadata :: PandocMonad m => DB m a -> DB m a
-acceptingMetadata p = do
-  modify (\s -> s { dbAcceptsMeta = True } )
-  res <- p
-  modify (\s -> s { dbAcceptsMeta = False })
-  return res
-
-checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
-checkInMeta p = do
-  accepts <- dbAcceptsMeta <$> get
-  when accepts p
-  return mempty
+addMetadataFromElement :: PandocMonad m => Element -> DB m Blocks
+addMetadataFromElement e = do
+    case filterChild (named "title") e of
+         Nothing -> return ()
+         Just z  -> do
+           getInlines z >>= addMeta "title"
+           addMetaField "subtitle" z
+    case filterChild (named "authorgroup") e of
+         Nothing -> return ()
+         Just z  -> addMetaField "author" z
+    addMetaField "subtitle" e
+    addMetaField "author" e
+    addMetaField "date" e
+    addMetaField "release" e
+    return mempty
+  where addMetaField fieldname elt =
+            case filterChildren (named fieldname) elt of
+                   []  -> return ()
+                   [z] -> getInlines z >>= addMeta fieldname
+                   zs  -> mapM getInlines zs >>= addMeta fieldname
 
 addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
 addMeta field val = modify (setMeta field val)
@@ -718,11 +724,6 @@ parseBlock (Elem e) =
         "attribution" -> return mempty
         "titleabbrev" -> return mempty
         "authorinitials" -> return mempty
-        "title" ->  checkInMeta getTitle
-        "author" -> checkInMeta getAuthor
-        "authorgroup" -> checkInMeta getAuthorGroup
-        "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release")
-        "date" -> checkInMeta getDate
         "bibliography" -> sect 0
         "bibliodiv" -> sect 1
         "biblioentry" -> parseMixed para (elContent e)
@@ -788,8 +789,8 @@ parseBlock (Elem e) =
         "figure" -> getFigure e
         "mediaobject" -> para <$> getMediaobject e
         "caption" -> return mempty
-        "info" -> metaBlock
-        "articleinfo" -> metaBlock
+        "info" -> addMetadataFromElement e
+        "articleinfo" -> addMetadataFromElement e
         "sectioninfo" -> return mempty  -- keywords & other metadata
         "refsectioninfo" -> return mempty  -- keywords & other metadata
         "refsect1info" -> return mempty  -- keywords & other metadata
@@ -803,10 +804,11 @@ parseBlock (Elem e) =
         "chapterinfo" -> return mempty  -- keywords & other metadata
         "glossaryinfo" -> return mempty  -- keywords & other metadata
         "appendixinfo" -> return mempty  -- keywords & other metadata
-        "bookinfo" -> metaBlock
+        "bookinfo" -> addMetadataFromElement e
         "article" -> modify (\st -> st{ dbBook = False }) >>
-                           getBlocks e
-        "book" -> modify (\st -> st{ dbBook = True }) >>  getBlocks e
+                           addMetadataFromElement e >> getBlocks e
+        "book" -> modify (\st -> st{ dbBook = True }) >>
+                    addMetadataFromElement e >> getBlocks e
         "table" -> parseTable
         "informaltable" -> parseTable
         "informalexample" -> divWith ("", ["informalexample"], []) <$>
@@ -816,6 +818,8 @@ parseBlock (Elem e) =
         "screen" -> codeBlockWithLang
         "programlisting" -> codeBlockWithLang
         "?xml"  -> return mempty
+        "title" -> return mempty     -- handled in parent element
+        "subtitle" -> return mempty  -- handled in parent element
         _       -> getBlocks e
    where parseMixed container conts = do
            let (ils,rest) = break isBlockElement conts
@@ -857,19 +861,6 @@ parseBlock (Elem e) =
                      terms' <- mapM getInlines terms
                      items' <- mapM getBlocks items
                      return (mconcat $ intersperse (str "; ") terms', items')
-         getTitle =  do
-                     tit <- getInlines e
-                     subtit <-  case filterChild (named "subtitle") e of
-                                  Just s  -> (text ": " <>) <$>
-                                              getInlines s
-                                  Nothing -> return mempty
-                     addMeta "title" (tit <> subtit)
-
-         getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
-         getAuthorGroup = do
-          let terms = filterChildren (named "author") e
-          mapM getInlines terms >>= addMeta "author"
-         getDate = getInlines e >>= addMeta "date"
          parseTable = do
                       let isCaption x = named "title" x || named "caption" x
                       caption <- case filterChild isCaption e of
@@ -935,7 +926,6 @@ parseBlock (Elem e) =
                      modify $ \st -> st{ dbSectionLevel = n - 1 }
                      return $ headerWith (ident,[],[]) n' headerText <> b
          lineItems = mapM getInlines $ filterChildren (named "line") e
-         metaBlock = acceptingMetadata (getBlocks e) >> return mempty
 
 getInlines :: PandocMonad m => Element -> DB m Inlines
 getInlines e' = (trimInlines . mconcat) <$>
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index 49ea71601..0be363f3d 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -183,14 +183,13 @@ blocksToDefinitions' defAcc acc
         pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
     in
      blocksToDefinitions' (pair : defAcc) acc blks
-blocksToDefinitions' defAcc acc
+blocksToDefinitions' ((defTerm, defItems):defs) acc
   (Div (ident2, classes2, kvs2) blks2 : blks)
-  | (not . null) defAcc && "Definition"  `elem` classes2 =
+  | "Definition"  `elem` classes2 =
     let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
         defItems2 = case remainingAttr2 == ("", [], []) of
           True  -> blks2
           False -> [Div remainingAttr2 blks2]
-        ((defTerm, defItems):defs) = defAcc
         defAcc' = case null defItems of
           True -> (defTerm, [defItems2]) : defs
           False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 4c4c06073..b4e52de14 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -785,7 +785,7 @@ So we do this in a number of steps. If we encounter the fldchar begin
 tag, we start open a fldchar state variable (see state above). We add
 the instrtext to it as FieldInfo. Then we close that and start adding
 the runs when we get to separate. Then when we get to end, we produce
-the Field type with approriate FieldInfo and Runs.
+the Field type with appropriate FieldInfo and Runs.
 -}
 elemToParPart ns element
   | isElem ns "w" "r" element
@@ -1056,8 +1056,10 @@ elemToRunStyle ns element parentStyle
   | Just rPr <- findChildByName ns "w" "rPr" element =
     RunStyle
       {
-        isBold = checkOnOff ns rPr (elemName ns "w" "b")
-      , isItalic = checkOnOff ns rPr (elemName ns "w" "i")
+        isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus`
+                 checkOnOff ns rPr (elemName ns "w" "bCs")
+      , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus`
+                   checkOnOff ns rPr (elemName ns "w" "iCs")
       , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
       , isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
       , rVertAlign =
@@ -1153,8 +1155,9 @@ getSymChar :: NameSpaces -> Element -> RunElem
 getSymChar ns element
   | Just s <- lowerFromPrivate <$> getCodepoint
   , Just font <- getFont =
-  let [(char, _)] = readLitChar ("\\x" ++ s) in
-    TextRun . maybe "" (:[]) $ getUnicode font char
+    case readLitChar ("\\x" ++ s) of
+         [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char
+         _           -> TextRun ""
   where
     getCodepoint = findAttrByName ns "w" "char" element
     getFont = stringToFont =<< findAttrByName ns "w" "font" element
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index c26447641..bfc3fc3ee 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -73,7 +73,7 @@ readEPUB opts bytes = case toArchiveOrFail bytes of
 -- runEPUB :: Except PandocError a -> Either PandocError a
 -- runEPUB = runExcept
 
--- Note that internal reference are aggresively normalised so that all ids
+-- Note that internal reference are aggressively normalised so that all ids
 -- are of the form "filename#id"
 --
 archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 32a1ba5a6..b06e07a80 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -68,11 +68,13 @@ import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad (..))
 import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
 import Text.Pandoc.Definition
+import Text.Pandoc.Readers.LaTeX (rawLaTeXInline)
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options (
     Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
-               Ext_native_spans, Ext_raw_html, Ext_line_blocks),
+               Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex),
     ReaderOptions (readerExtensions, readerStripComments),
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
@@ -102,7 +104,8 @@ readHtml opts inp = do
                                          (m:_) -> messageString m
   result <- flip runReaderT def $
        runParserT parseDoc
-       (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty [])
+       (HTMLState def{ stateOptions = opts }
+         [] Nothing Set.empty M.empty [] M.empty)
        "source" tags
   case result of
     Right doc -> return doc
@@ -124,7 +127,8 @@ data HTMLState =
      baseHref    :: Maybe URI,
      identifiers :: Set.Set String,
      headerMap   :: M.Map Inlines String,
-     logMessages :: [LogMessage]
+     logMessages :: [LogMessage],
+     macros      :: M.Map Text Macro
   }
 
 data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -659,6 +663,7 @@ inline = choice
            , pCode
            , pSpan
            , pMath False
+           , pScriptMath
            , pRawHtmlInline
            ]
 
@@ -745,18 +750,18 @@ pLink = try $ do
   let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
                maybeFromAttrib "id" tag
   let cls = words $ T.unpack $ fromAttrib "class" tag
-  lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
+  lab <- mconcat <$> manyTill inline (pCloses "a")
   -- check for href; if href, then a link, otherwise a span
   case maybeFromAttrib "href" tag of
        Nothing   ->
-         return $ B.spanWith (uid, cls, []) lab
+         return $ extractSpaces (B.spanWith (uid, cls, [])) lab
        Just url' -> do
          mbBaseHref <- baseHref <$> getState
          let url = case (parseURIReference url', mbBaseHref) of
                         (Just rel, Just bs) ->
                           show (rel `nonStrictRelativeTo` bs)
                         _                   -> url'
-         return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
+         return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
 
 pImage :: PandocMonad m => TagParser m Inlines
 pImage = do
@@ -818,6 +823,17 @@ toStringAttr :: [(Text, Text)] -> [(String, String)]
 toStringAttr = map go
   where go (x,y) = (T.unpack x, T.unpack y)
 
+pScriptMath :: PandocMonad m => TagParser m Inlines
+pScriptMath = try $ do
+  TagOpen _ attr' <- pSatisfy $ tagOpen (=="script") (const True)
+  isdisplay <- case lookup "type" attr' of
+                    Just x | "math/tex" `T.isPrefixOf` x
+                      -> return $ "display" `T.isSuffixOf` x
+                    _ -> mzero
+  contents <- T.unpack . innerText <$>
+                manyTill pAnyTag (pSatisfy (matchTagClose "script"))
+  return $ (if isdisplay then B.displayMath else B.math) contents
+
 pMath :: PandocMonad m => Bool -> TagParser m Inlines
 pMath inCase = try $ do
   open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True)
@@ -852,7 +868,7 @@ pInTags' tagtype tagtest parser = try $ do
   pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
   mconcat <$> manyTill parser (pCloses tagtype <|> eof)
 
--- parses p, preceeded by an optional opening tag
+-- parses p, preceded by an optional opening tag
 -- and followed by an optional closing tags
 pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a
 pOptInTag tagtype p = try $ do
@@ -907,9 +923,25 @@ pTagContents =
   <|> pStr
   <|> pSpace
   <|> smartPunctuation pTagContents
+  <|> pRawTeX
   <|> pSymbol
   <|> pBad
 
+pRawTeX :: PandocMonad m => InlinesParser m Inlines
+pRawTeX = do
+  lookAhead $ try $ do
+    char '\\'
+    choice $ map (try . string) ["begin", "eqref", "ref"]
+  guardEnabled Ext_raw_tex
+  inp <- getInput
+  st <- getState
+  res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp)
+  case res of
+       Left _                -> mzero
+       Right (contents, raw) -> do
+         _ <- count (length raw) anyChar
+         return $ B.rawInline "tex" contents
+
 pStr :: PandocMonad m => InlinesParser m Inlines
 pStr = do
   result <- many1 $ satisfy $ \c ->
@@ -923,6 +955,7 @@ isSpecial '\''    = True
 isSpecial '.'     = True
 isSpecial '-'     = True
 isSpecial '$'     = True
+isSpecial '\\'    = True
 isSpecial '\8216' = True
 isSpecial '\8217' = True
 isSpecial '\8220' = True
@@ -1249,6 +1282,10 @@ isSpace _    = False
 
 -- Instances
 
+instance HasMacros HTMLState where
+  extractMacros        = macros
+  updateMacros f st    = st{ macros = f $ macros st }
+
 instance HasIdentifierList HTMLState where
   extractIdentifierList = identifiers
   updateIdentifierList f s = s{ identifiers = f (identifiers s) }
@@ -1281,7 +1318,7 @@ instance HasLastStrPosition HTMLState where
   setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
   getLastStrPos = getLastStrPos . parserState
 
--- For now we need a special verison here; the one in Shared has String type
+-- For now we need a special version here; the one in Shared has String type
 renderTags' :: [Tag Text] -> Text
 renderTags' = renderTagsOptions
                renderOptions{ optMinimize = matchTags ["hr", "br", "img",
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 967037e4e..072bab350 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -44,11 +44,7 @@ readHaddockEither :: ReaderOptions -- ^ Reader options
                   -> String        -- ^ String to parse
                   -> Either PandocError Pandoc
 readHaddockEither _opts =
-#if MIN_VERSION_haddock_library(1,2,0)
-  Right . B.doc . docHToBlocks . _doc . parseParas
-#else
-  Right .  B.doc . docHToBlocks . parseParas
-#endif
+  Right . B.doc . docHToBlocks . _doc . parseParas Nothing
 
 docHToBlocks :: DocH String Identifier -> Blocks
 docHToBlocks d' =
@@ -68,10 +64,8 @@ docHToBlocks d' =
     DocEmphasis _ -> inlineFallback
     DocMonospaced _ -> inlineFallback
     DocBold _ -> inlineFallback
-#if MIN_VERSION_haddock_library(1,4,0)
     DocMathInline _ -> inlineFallback
     DocMathDisplay _ -> inlineFallback
-#endif
     DocHeader h -> B.header (headerLevel h)
                            (docHToInlines False $ headerTitle h)
     DocUnorderedList items -> B.bulletList (map docHToBlocks items)
@@ -87,7 +81,6 @@ docHToBlocks d' =
     DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s)
     DocExamples es -> mconcat $ map (\e ->
        makeExample ">>>" (exampleExpression e) (exampleResult e)) es
-#if MIN_VERSION_haddock_library(1,5,0)
     DocTable H.Table{ tableHeaderRows = headerRows
                     , tableBodyRows = bodyRows
                     }
@@ -100,7 +93,6 @@ docHToBlocks d' =
              colspecs = replicate (maximum (map length body))
                              (AlignDefault, 0.0)
          in  B.table mempty colspecs header body
-#endif
 
   where inlineFallback = B.plain $ docHToInlines False d'
         consolidatePlains = B.fromList . consolidatePlains' . B.toList
@@ -133,10 +125,8 @@ docHToInlines isCode d' =
     DocMonospaced (DocString s) -> B.code s
     DocMonospaced d -> docHToInlines True d
     DocBold d -> B.strong (docHToInlines isCode d)
-#if MIN_VERSION_haddock_library(1,4,0)
     DocMathInline s -> B.math s
     DocMathDisplay s -> B.displayMath s
-#endif
     DocHeader _ -> mempty
     DocUnorderedList _ -> mempty
     DocOrderedList _ -> mempty
@@ -149,9 +139,7 @@ docHToInlines isCode d' =
     DocAName s -> B.spanWith (s,["anchor"],[]) mempty
     DocProperty _ -> mempty
     DocExamples _ -> mempty
-#if MIN_VERSION_haddock_library(1,5,0)
     DocTable _ -> mempty
-#endif
 
 -- | Create an 'Example', stripping superfluous characters as appropriate
 makeExample :: String -> String -> [String] -> Blocks
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 59af76d23..695c86b5d 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -191,7 +191,7 @@ parseBlock (Elem e) =
                     listType -> do
                       let start = fromMaybe 1 $
                                   (strContent <$> (filterElement (named "list-item") e
-                                               >>= filterElement (named "lable")))
+                                               >>= filterElement (named "label")))
                                    >>= safeRead
                       orderedListWith (start, parseListStyleType listType, DefaultDelim)
                         <$> listitems
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 39dffde76..7c5619165 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -47,8 +47,7 @@ import Prelude
 import Control.Applicative (many, optional, (<|>))
 import Control.Monad
 import Control.Monad.Except (throwError)
-import Control.Monad.Trans (lift)
-import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower, toUpper)
+import Data.Char (isDigit, isLetter, toLower, toUpper)
 import Data.Default
 import Data.List (intercalate, isPrefixOf)
 import qualified Data.Map as M
@@ -63,7 +62,7 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv,
                           readFileFromDirs, report, setResourcePath,
                           setTranslations, translateTerm, trace)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop, PandocParseError, PandocParsecError))
+import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError))
 import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@@ -71,12 +70,15 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
-                                        Tok (..), TokType (..))
+                                        ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
+                                       babelLangToBCP47)
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
-import Text.Parsec.Pos
 import qualified Text.Pandoc.Builder as B
+import qualified Data.Text.Normalize as Normalize
 
 -- for debugging:
 -- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -137,482 +139,49 @@ resolveRefs _ x = x
 --        Left e  -> error (show e)
 --        Right r -> return r
 
-newtype HeaderNum = HeaderNum [Int]
-  deriving (Show)
-
-renderHeaderNum :: HeaderNum -> String
-renderHeaderNum (HeaderNum xs) =
-  intercalate "." (map show xs)
-
-incrementHeaderNum :: Int -> HeaderNum -> HeaderNum
-incrementHeaderNum level (HeaderNum ns) = HeaderNum $
-  case reverse (take level (ns ++ repeat 0)) of
-       (x:xs) -> reverse (x+1 : xs)
-       []     -> []  -- shouldn't happen
-
-data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
-                            , sMeta          :: Meta
-                            , sQuoteContext  :: QuoteContext
-                            , sMacros        :: M.Map Text Macro
-                            , sContainers    :: [String]
-                            , sHeaders       :: M.Map Inlines String
-                            , sLogMessages   :: [LogMessage]
-                            , sIdentifiers   :: Set.Set String
-                            , sVerbatimMode  :: Bool
-                            , sCaption       :: Maybe Inlines
-                            , sInListItem    :: Bool
-                            , sInTableCell   :: Bool
-                            , sLastHeaderNum :: HeaderNum
-                            , sLabels        :: M.Map String [Inline]
-                            , sHasChapters   :: Bool
-                            , sToggles       :: M.Map String Bool
-                            }
-     deriving Show
-
-defaultLaTeXState :: LaTeXState
-defaultLaTeXState = LaTeXState{ sOptions       = def
-                              , sMeta          = nullMeta
-                              , sQuoteContext  = NoQuote
-                              , sMacros        = M.empty
-                              , sContainers    = []
-                              , sHeaders       = M.empty
-                              , sLogMessages   = []
-                              , sIdentifiers   = Set.empty
-                              , sVerbatimMode  = False
-                              , sCaption       = Nothing
-                              , sInListItem    = False
-                              , sInTableCell   = False
-                              , sLastHeaderNum = HeaderNum []
-                              , sLabels        = M.empty
-                              , sHasChapters   = False
-                              , sToggles       = M.empty
-                              }
-
-instance PandocMonad m => HasQuoteContext LaTeXState m where
-  getQuoteContext = sQuoteContext <$> getState
-  withQuoteContext context parser = do
-    oldState <- getState
-    let oldQuoteContext = sQuoteContext oldState
-    setState oldState { sQuoteContext = context }
-    result <- parser
-    newState <- getState
-    setState newState { sQuoteContext = oldQuoteContext }
-    return result
-
-instance HasLogMessages LaTeXState where
-  addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
-  getLogMessages st = reverse $ sLogMessages st
-
-instance HasIdentifierList LaTeXState where
-  extractIdentifierList     = sIdentifiers
-  updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
-
-instance HasIncludeFiles LaTeXState where
-  getIncludeFiles = sContainers
-  addIncludeFile f s = s{ sContainers = f : sContainers s }
-  dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
-
-instance HasHeaderMap LaTeXState where
-  extractHeaderMap     = sHeaders
-  updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
-
-instance HasMacros LaTeXState where
-  extractMacros  st  = sMacros st
-  updateMacros f st  = st{ sMacros = f (sMacros st) }
-
-instance HasReaderOptions LaTeXState where
-  extractReaderOptions = sOptions
-
-instance HasMeta LaTeXState where
-  setMeta field val st =
-    st{ sMeta = setMeta field val $ sMeta st }
-  deleteMeta field st =
-    st{ sMeta = deleteMeta field $ sMeta st }
-
-instance Default LaTeXState where
-  def = defaultLaTeXState
-
-type LP m = ParserT [Tok] LaTeXState m
-
-withVerbatimMode :: PandocMonad m => LP m a -> LP m a
-withVerbatimMode parser = do
-  updateState $ \st -> st{ sVerbatimMode = True }
-  result <- parser
-  updateState $ \st -> st{ sVerbatimMode = False }
-  return result
-
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-               => LP m a -> LP m a -> ParserT String s m (a, String)
-rawLaTeXParser parser valParser = do
-  inp <- getInput
-  let toks = tokenize "source" $ T.pack inp
-  pstate <- getState
-  let lstate = def{ sOptions = extractReaderOptions pstate }
-  let lstate' = lstate { sMacros = extractMacros pstate }
-  let rawparser = (,) <$> withRaw valParser <*> getState
-  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
-  case res' of
-       Left _    -> mzero
-       Right toks' -> do
-         res <- lift $ runParserT (do doMacros 0
-                                      -- retokenize, applying macros
-                                      ts <- many (satisfyTok (const True))
-                                      setInput ts
-                                      rawparser)
-                        lstate' "chunk" toks'
-         case res of
-              Left _    -> mzero
-              Right ((val, raw), st) -> do
-                updateState (updateMacros (sMacros st <>))
-                _ <- takeP (T.length (untokenize toks'))
-                return (val, T.unpack (untokenize raw))
-
-applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-            => String -> ParserT String s m String
-applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
-   do let retokenize = doMacros 0 *>
-             (toksToString <$> many (satisfyTok (const True)))
-      pstate <- getState
-      let lstate = def{ sOptions = extractReaderOptions pstate
-                      , sMacros  = extractMacros pstate }
-      res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
-      case res of
-           Left e   -> fail (show e)
-           Right s' -> return s'
 
 rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
               => ParserT String s m String
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
-  -- we don't want to apply newly defined latex macros to their own
-  -- definitions:
-  snd <$> rawLaTeXParser (environment <|> macroDef <|> blockCommand) blocks
+  snd <$> (rawLaTeXParser False macroDef blocks
+      <|> (rawLaTeXParser True
+             (do choice (map controlSeq
+                   ["include", "input", "subfile", "usepackage"])
+                 skipMany opt
+                 braced
+                 return mempty) blocks)
+      <|> rawLaTeXParser True
+           (environment <|> blockCommand)
+           (mconcat <$> (many (block <|> beginOrEndCommand))))
+
+-- See #4667 for motivation; sometimes people write macros
+-- that just evaluate to a begin or end command, which blockCommand
+-- won't accept.
+beginOrEndCommand :: PandocMonad m => LP m Blocks
+beginOrEndCommand = try $ do
+  Tok _ (CtrlSeq name) txt <- anyControlSeq
+  guard $ name == "begin" || name == "end"
+  (envname, rawargs) <- withRaw braced
+  if M.member (untokenize envname)
+      (inlineEnvironments :: M.Map Text (LP PandocPure Inlines))
+     then mzero
+     else return $ rawBlock "latex"
+                    (T.unpack (txt <> untokenize rawargs))
 
 rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
                => ParserT String s m String
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
-  snd <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines
+  snd <$> (  rawLaTeXParser True
+              (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+              inlines
+        <|> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines)
 
 inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
-  fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines
-
-tokenize :: SourceName -> Text -> [Tok]
-tokenize sourcename = totoks (initialPos sourcename)
-
-totoks :: SourcePos -> Text -> [Tok]
-totoks pos t =
-  case T.uncons t of
-       Nothing        -> []
-       Just (c, rest)
-         | c == '\n' ->
-           Tok pos Newline "\n"
-           : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
-         | isSpaceOrTab c ->
-           let (sps, rest') = T.span isSpaceOrTab t
-           in  Tok pos Spaces sps
-               : totoks (incSourceColumn pos (T.length sps))
-                 rest'
-         | isAlphaNum c ->
-           let (ws, rest') = T.span isAlphaNum t
-           in  Tok pos Word ws
-               : totoks (incSourceColumn pos (T.length ws)) rest'
-         | c == '%' ->
-           let (cs, rest') = T.break (== '\n') rest
-           in  Tok pos Comment ("%" <> cs)
-               : totoks (incSourceColumn pos (1 + T.length cs)) rest'
-         | c == '\\' ->
-           case T.uncons rest of
-                Nothing -> [Tok pos (CtrlSeq " ") "\\"]
-                Just (d, rest')
-                  | isLetterOrAt d ->
-                      -- \makeatletter is common in macro defs;
-                      -- ideally we should make tokenization sensitive
-                      -- to \makeatletter and \makeatother, but this is
-                      -- probably best for now
-                      let (ws, rest'') = T.span isLetterOrAt rest
-                          (ss, rest''') = T.span isSpaceOrTab rest''
-                      in  Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
-                          : totoks (incSourceColumn pos
-                               (1 + T.length ws + T.length ss)) rest'''
-                  | isSpaceOrTab d || d == '\n' ->
-                      let (w1, r1) = T.span isSpaceOrTab rest
-                          (w2, (w3, r3)) = case T.uncons r1 of
-                                          Just ('\n', r2)
-                                                  -> (T.pack "\n",
-                                                        T.span isSpaceOrTab r2)
-                                          _ -> (mempty, (mempty, r1))
-                          ws = "\\" <> w1 <> w2 <> w3
-                      in  case T.uncons r3 of
-                               Just ('\n', _) ->
-                                 Tok pos (CtrlSeq " ") ("\\" <> w1)
-                                 : totoks (incSourceColumn pos (T.length ws))
-                                   r1
-                               _ ->
-                                 Tok pos (CtrlSeq " ") ws
-                                 : totoks (incSourceColumn pos (T.length ws))
-                                   r3
-                  | otherwise  ->
-                      Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
-                      : totoks (incSourceColumn pos 2) rest'
-         | c == '#' ->
-           let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
-           in  case safeRead (T.unpack t1) of
-                    Just i ->
-                       Tok pos (Arg i) ("#" <> t1)
-                       : totoks (incSourceColumn pos (1 + T.length t1)) t2
-                    Nothing ->
-                       Tok pos Symbol "#"
-                       : totoks (incSourceColumn pos 1) t2
-         | c == '^' ->
-           case T.uncons rest of
-                Just ('^', rest') ->
-                  case T.uncons rest' of
-                       Just (d, rest'')
-                         | isLowerHex d ->
-                           case T.uncons rest'' of
-                                Just (e, rest''') | isLowerHex e ->
-                                  Tok pos Esc2 (T.pack ['^','^',d,e])
-                                  : totoks (incSourceColumn pos 4) rest'''
-                                _ ->
-                                  Tok pos Esc1 (T.pack ['^','^',d])
-                                  : totoks (incSourceColumn pos 3) rest''
-                         | d < '\128' ->
-                                  Tok pos Esc1 (T.pack ['^','^',d])
-                                  : totoks (incSourceColumn pos 3) rest''
-                       _ -> Tok pos Symbol "^" :
-                            Tok (incSourceColumn pos 1) Symbol "^" :
-                            totoks (incSourceColumn pos 2) rest'
-                _ -> Tok pos Symbol "^"
-                     : totoks (incSourceColumn pos 1) rest
-         | otherwise ->
-           Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
-
-isSpaceOrTab :: Char -> Bool
-isSpaceOrTab ' '  = True
-isSpaceOrTab '\t' = True
-isSpaceOrTab _    = False
-
-isLetterOrAt :: Char -> Bool
-isLetterOrAt '@' = True
-isLetterOrAt c   = isLetter c
-
-isLowerHex :: Char -> Bool
-isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
-
-untokenize :: [Tok] -> Text
-untokenize = mconcat . map untoken
-
-untoken :: Tok -> Text
-untoken (Tok _ _ t) = t
-
-satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
-satisfyTok f =
-  try $ do
-    res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    doMacros 0 -- apply macros on remaining input stream
-    return res
-  where matcher t | f t       = Just t
-                  | otherwise = Nothing
-        updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
-        updatePos _spos _ (Tok pos _ _ : _) = pos
-        updatePos spos _ []                 = incSourceColumn spos 1
-
-doMacros :: PandocMonad m => Int -> LP m ()
-doMacros n = do
-  verbatimMode <- sVerbatimMode <$> getState
-  unless verbatimMode $ do
-    inp <- getInput
-    case inp of
-         Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
-          Tok _ Word name : Tok _ Symbol "}" : ts
-            -> handleMacros spos name ts
-         Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
-          Tok _ Word name : Tok _ Symbol "}" : ts
-            -> handleMacros spos ("end" <> name) ts
-         Tok _ (CtrlSeq "expandafter") _ : t : ts
-            -> do setInput ts
-                  doMacros n
-                  getInput >>= setInput . combineTok t
-         Tok spos (CtrlSeq name) _ : ts
-            -> handleMacros spos name ts
-         _ -> return ()
-  where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
-          | T.all isLetterOrAt w =
-            Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
-              where (x1, x2) = T.break isSpaceOrTab x
-        combineTok t ts = t:ts
-        handleMacros spos name ts = do
-                macros <- sMacros <$> getState
-                case M.lookup name macros of
-                     Nothing -> return ()
-                     Just (Macro expansionPoint numargs optarg newtoks) -> do
-                       setInput ts
-                       let getarg = try $ spaces >> bracedOrToken
-                       args <- case optarg of
-                                    Nothing -> count numargs getarg
-                                    Just o  ->
-                                       (:) <$> option o bracketedToks
-                                           <*> count (numargs - 1) getarg
-                       -- first boolean param is true if we're tokenizing
-                       -- an argument (in which case we don't want to
-                       -- expand #1 etc.)
-                       let addTok False (Tok _ (Arg i) _) acc | i > 0
-                                                              , i <= numargs =
-                                 foldr (addTok True) acc (args !! (i - 1))
-                           -- add space if needed after control sequence
-                           -- see #4007
-                           addTok _ (Tok _ (CtrlSeq x) txt)
-                                  acc@(Tok _ Word _ : _)
-                             | not (T.null txt) &&
-                               isLetter (T.last txt) =
-                               Tok spos (CtrlSeq x) (txt <> " ") : acc
-                           addTok _ t acc = setpos spos t : acc
-                       ts' <- getInput
-                       setInput $ foldr (addTok False) ts' newtoks
-                       case expansionPoint of
-                            ExpandWhenUsed ->
-                              if n > 20  -- detect macro expansion loops
-                                 then throwError $ PandocMacroLoop (T.unpack name)
-                                 else doMacros (n + 1)
-                            ExpandWhenDefined -> return ()
-
-
-setpos :: SourcePos -> Tok -> Tok
-setpos spos (Tok _ tt txt) = Tok spos tt txt
-
-anyControlSeq :: PandocMonad m => LP m Tok
-anyControlSeq = satisfyTok isCtrlSeq
-  where isCtrlSeq (Tok _ (CtrlSeq _) _) = True
-        isCtrlSeq _                     = False
-
-anySymbol :: PandocMonad m => LP m Tok
-anySymbol = satisfyTok isSym
-  where isSym (Tok _ Symbol _) = True
-        isSym _                = False
-
-spaces :: PandocMonad m => LP m ()
-spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
-
-spaces1 :: PandocMonad m => LP m ()
-spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
-
-tokTypeIn :: [TokType] -> Tok -> Bool
-tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
-
-controlSeq :: PandocMonad m => Text -> LP m Tok
-controlSeq name = satisfyTok isNamed
-  where isNamed (Tok _ (CtrlSeq n) _) = n == name
-        isNamed _                     = False
-
-symbol :: PandocMonad m => Char -> LP m Tok
-symbol c = satisfyTok isc
-  where isc (Tok _ Symbol d) = case T.uncons d of
-                                    Just (c',_) -> c == c'
-                                    _           -> False
-        isc _ = False
-
-symbolIn :: PandocMonad m => [Char] -> LP m Tok
-symbolIn cs = satisfyTok isInCs
-  where isInCs (Tok _ Symbol d) = case T.uncons d of
-                                       Just (c,_) -> c `elem` cs
-                                       _          -> False
-        isInCs _ = False
-
-sp :: PandocMonad m => LP m ()
-sp = whitespace <|> endline
-
-whitespace :: PandocMonad m => LP m ()
-whitespace = () <$ satisfyTok isSpaceTok
-  where isSpaceTok (Tok _ Spaces _) = True
-        isSpaceTok _                = False
-
-newlineTok :: PandocMonad m => LP m ()
-newlineTok = () <$ satisfyTok isNewlineTok
-
-isNewlineTok :: Tok -> Bool
-isNewlineTok (Tok _ Newline _) = True
-isNewlineTok _                 = False
-
-comment :: PandocMonad m => LP m ()
-comment = () <$ satisfyTok isCommentTok
-  where isCommentTok (Tok _ Comment _) = True
-        isCommentTok _                 = False
-
-anyTok :: PandocMonad m => LP m Tok
-anyTok = satisfyTok (const True)
-
-endline :: PandocMonad m => LP m ()
-endline = try $ do
-  newlineTok
-  lookAhead anyTok
-  notFollowedBy blankline
-
-blankline :: PandocMonad m => LP m ()
-blankline = try $ skipMany whitespace *> newlineTok
-
-primEscape :: PandocMonad m => LP m Char
-primEscape = do
-  Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
-  case toktype of
-       Esc1 -> case T.uncons (T.drop 2 t) of
-                    Just (c, _)
-                      | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
-                      | otherwise                 -> return (chr (ord c + 64))
-                    Nothing -> fail "Empty content of Esc1"
-       Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
-                    Just x  -> return (chr x)
-                    Nothing -> fail $ "Could not read: " ++ T.unpack t
-       _    -> fail "Expected an Esc1 or Esc2 token" -- should not happen
-
-bgroup :: PandocMonad m => LP m Tok
-bgroup = try $ do
-  skipMany sp
-  symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
-
-egroup :: PandocMonad m => LP m Tok
-egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
-
-grouped :: (PandocMonad m,  Monoid a) => LP m a -> LP m a
-grouped parser = try $ do
-  bgroup
-  -- first we check for an inner 'grouped', because
-  -- {{a,b}} should be parsed the same as {a,b}
-  try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
-
-braced :: PandocMonad m => LP m [Tok]
-braced = bgroup *> braced' 1
-  where braced' (n :: Int) =
-          handleEgroup n <|> handleBgroup n <|> handleOther n
-        handleEgroup n = do
-          t <- egroup
-          if n == 1
-             then return []
-             else (t:) <$> braced' (n - 1)
-        handleBgroup n = do
-          t <- bgroup
-          (t:) <$> braced' (n + 1)
-        handleOther n = do
-          t <- anyTok
-          (t:) <$> braced' n
-
-bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
-bracketed parser = try $ do
-  symbol '['
-  mconcat <$> manyTill parser (symbol ']')
-
-dimenarg :: PandocMonad m => LP m Text
-dimenarg = try $ do
-  ch  <- option False $ True <$ symbol '='
-  Tok _ _ s <- satisfyTok isWordTok
-  guard $ T.take 2 (T.reverse s) `elem`
-           ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
-  let num = T.take (T.length s - 2) s
-  guard $ T.length num > 0
-  guard $ T.all isDigit num
-  return $ T.pack ['=' | ch] <> s
+  fst <$> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines
 
 -- inline elements:
 
@@ -625,13 +194,6 @@ regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
         isRegularSymbol _                = False
         isSpecial c = c `Set.member` specialChars
 
-specialChars :: Set.Set Char
-specialChars = Set.fromList "#$%&~_^\\{}"
-
-isWordTok :: Tok -> Bool
-isWordTok (Tok _ Word _) = True
-isWordTok _              = False
-
 inlineGroup :: PandocMonad m => LP m Inlines
 inlineGroup = do
   ils <- grouped inline
@@ -678,7 +240,7 @@ dosiunitx = do
   skipopts
   value <- tok
   valueprefix <- option "" $ bracketed tok
-  unit <- inlineCommand' <|> tok
+  unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok
   let emptyOr160 "" = ""
       emptyOr160 _  = "\160"
   return . mconcat $ [valueprefix,
@@ -687,11 +249,187 @@ dosiunitx = do
                       emptyOr160 unit,
                       unit]
 
--- siunitx's \square command
-dosquare :: PandocMonad m => LP m Inlines
-dosquare = do
-  unit <- inlineCommand' <|> tok
-  return . mconcat $ [unit, "\178"]
+siUnit :: PandocMonad m => LP m Inlines
+siUnit = do
+  Tok _ (CtrlSeq name) _ <- anyControlSeq
+  if name == "square"
+     then do
+       unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok
+       return . mconcat $ [unit, "\178"]
+     else
+       case M.lookup name siUnitMap of
+            Just il -> return il
+            Nothing -> mzero
+
+siUnitMap :: M.Map Text Inlines
+siUnitMap = M.fromList
+  [ ("fg", str "fg")
+  , ("pg", str "pg")
+  , ("ng", str "ng")
+  , ("ug", str "μg")
+  , ("mg", str "mg")
+  , ("g", str "g")
+  , ("kg", str "kg")
+  , ("amu", str "u")
+  , ("pm", str "pm")
+  , ("nm", str "nm")
+  , ("um", str "μm")
+  , ("mm", str "mm")
+  , ("cm", str "cm")
+  , ("dm", str "dm")
+  , ("m", str "m")
+  , ("km", str "km")
+  , ("as", str "as")
+  , ("fs", str "fs")
+  , ("ps", str "ps")
+  , ("ns", str "ns")
+  , ("us", str "μs")
+  , ("ms", str "ms")
+  , ("s", str "s")
+  , ("fmol", str "fmol")
+  , ("pmol", str "pmol")
+  , ("nmol", str "nmol")
+  , ("umol", str "μmol")
+  , ("mmol", str "mmol")
+  , ("mol", str "mol")
+  , ("kmol", str "kmol")
+  , ("pA", str "pA")
+  , ("nA", str "nA")
+  , ("uA", str "μA")
+  , ("mA", str "mA")
+  , ("A", str "A")
+  , ("kA", str "kA")
+  , ("ul", str "μl")
+  , ("ml", str "ml")
+  , ("l", str "l")
+  , ("hl", str "hl")
+  , ("uL", str "μL")
+  , ("mL", str "mL")
+  , ("L", str "L")
+  , ("hL", str "hL")
+  , ("mHz", str "mHz")
+  , ("Hz", str "Hz")
+  , ("kHz", str "kHz")
+  , ("MHz", str "MHz")
+  , ("GHz", str "GHz")
+  , ("THz", str "THz")
+  , ("mN", str "mN")
+  , ("N", str "N")
+  , ("kN", str "kN")
+  , ("MN", str "MN")
+  , ("Pa", str "Pa")
+  , ("kPa", str "kPa")
+  , ("MPa", str "MPa")
+  , ("GPa", str "GPa")
+  , ("mohm", str "mΩ")
+  , ("kohm", str "kΩ")
+  , ("Mohm", str "MΩ")
+  , ("pV", str "pV")
+  , ("nV", str "nV")
+  , ("uV", str "μV")
+  , ("mV", str "mV")
+  , ("V", str "V")
+  , ("kV", str "kV")
+  , ("W", str "W")
+  , ("uW", str "μW")
+  , ("mW", str "mW")
+  , ("kW", str "kW")
+  , ("MW", str "MW")
+  , ("GW", str "GW")
+  , ("J", str "J")
+  , ("uJ", str "μJ")
+  , ("mJ", str "mJ")
+  , ("kJ", str "kJ")
+  , ("eV", str "eV")
+  , ("meV", str "meV")
+  , ("keV", str "keV")
+  , ("MeV", str "MeV")
+  , ("GeV", str "GeV")
+  , ("TeV", str "TeV")
+  , ("kWh", str "kWh")
+  , ("F", str "F")
+  , ("fF", str "fF")
+  , ("pF", str "pF")
+  , ("K", str "K")
+  , ("dB", str "dB")
+  , ("angstrom", str "Å")
+  , ("arcmin", str "′")
+  , ("arcminute", str "′")
+  , ("arcsecond", str "″")
+  , ("astronomicalunit", str "ua")
+  , ("atomicmassunit", str "u")
+  , ("atto", str "a")
+  , ("bar", str "bar")
+  , ("barn", str "b")
+  , ("becquerel", str "Bq")
+  , ("bel", str "B")
+  , ("candela", str "cd")
+  , ("celsius", str "°C")
+  , ("centi", str "c")
+  , ("coulomb", str "C")
+  , ("dalton", str "Da")
+  , ("day", str "d")
+  , ("deca", str "d")
+  , ("deci", str "d")
+  , ("decibel", str "db")
+  , ("degreeCelsius",str "°C")
+  , ("degree", str "°")
+  , ("deka", str "d")
+  , ("electronvolt", str "eV")
+  , ("exa", str "E")
+  , ("farad", str "F")
+  , ("femto", str "f")
+  , ("giga", str "G")
+  , ("gram", str "g")
+  , ("hectare", str "ha")
+  , ("hecto", str "h")
+  , ("henry", str "H")
+  , ("hertz", str "Hz")
+  , ("hour", str "h")
+  , ("joule", str "J")
+  , ("katal", str "kat")
+  , ("kelvin", str "K")
+  , ("kilo", str "k")
+  , ("kilogram", str "kg")
+  , ("knot", str "kn")
+  , ("liter", str "L")
+  , ("litre", str "l")
+  , ("lumen", str "lm")
+  , ("lux", str "lx")
+  , ("mega", str "M")
+  , ("meter", str "m")
+  , ("metre", str "m")
+  , ("milli", str "m")
+  , ("minute", str "min")
+  , ("mmHg", str "mmHg")
+  , ("mole", str "mol")
+  , ("nano", str "n")
+  , ("nauticalmile", str "M")
+  , ("neper", str "Np")
+  , ("newton", str "N")
+  , ("ohm", str "Ω")
+  , ("Pa", str "Pa")
+  , ("pascal", str "Pa")
+  , ("percent", str "%")
+  , ("per", str "/")
+  , ("peta", str "P")
+  , ("pico", str "p")
+  , ("radian", str "rad")
+  , ("second", str "s")
+  , ("siemens", str "S")
+  , ("sievert", str "Sv")
+  , ("steradian", str "sr")
+  , ("tera", str "T")
+  , ("tesla", str "T")
+  , ("tonne", str "t")
+  , ("volt", str "V")
+  , ("watt", str "W")
+  , ("weber", str "Wb")
+  , ("yocto", str "y")
+  , ("yotta", str "Y")
+  , ("zepto", str "z")
+  , ("zetta", str "Z")
+  ]
 
 lit :: String -> LP m Inlines
 lit = pure . str
@@ -742,13 +480,31 @@ quoted' f starter ender = do
                               cs   -> cs)
      else lit startchs
 
-enquote :: PandocMonad m => LP m Inlines
-enquote = do
+enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
+enquote starred mblang = do
   skipopts
+  let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+  let langspan = case lang of
+                      Nothing -> id
+                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
   quoteContext <- sQuoteContext <$> getState
-  if quoteContext == InDoubleQuote
-     then singleQuoted <$> withQuoteContext InSingleQuote tok
-     else doubleQuoted <$> withQuoteContext InDoubleQuote tok
+  if starred || quoteContext == InDoubleQuote
+     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
+blockquote citations mblang = do
+  citePar <- if citations
+                then do
+                  cs <- cites NormalCitation False
+                  return $ para (cite cs mempty)
+                else return mempty
+  let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+  let langdiv = case lang of
+                      Nothing -> id
+                      Just l  -> divWith ("",[],[("lang", renderLang l)])
+  bs <- grouped block
+  return $ blockQuote . langdiv $ (bs <> citePar)
 
 doAcronym :: PandocMonad m => String -> LP m Inlines
 doAcronym form = do
@@ -791,6 +547,16 @@ dolstinline :: PandocMonad m => LP m Inlines
 dolstinline = do
   options <- option [] keyvals
   let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage
+  doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+  skipopts
+  cls <- toksToString <$> braced
+  doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [String] -> LP m Inlines
+doinlinecode classes = do
   Tok _ Symbol t <- anySymbol
   marker <- case T.uncons t of
               Just (c, ts) | T.null ts -> return c
@@ -803,246 +569,41 @@ dolstinline = do
 keyval :: PandocMonad m => LP m (String, String)
 keyval = try $ do
   Tok _ Word key <- satisfyTok isWordTok
-  let isSpecSym (Tok _ Symbol t) = t /= "]" && t /= ","
-      isSpecSym _                = False
   optional sp
-  val <- option [] $ do
+  val <- option mempty $ do
            symbol '='
            optional sp
-           braced <|> many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym
-                               <|> anyControlSeq)
-  optional sp
+           (untokenize <$> braced) <|>
+             (mconcat <$> many1 (
+                 (untokenize . snd <$> withRaw braced)
+                 <|>
+                 (untokenize <$> (many1
+                      (satisfyTok
+                         (\t -> case t of
+                                Tok _ Symbol "]" -> False
+                                Tok _ Symbol "," -> False
+                                Tok _ Symbol "{" -> False
+                                Tok _ Symbol "}" -> False
+                                _                -> True))))))
   optional (symbol ',')
   optional sp
-  return (T.unpack key, T.unpack . untokenize $ val)
+  return (T.unpack key, T.unpack $ T.strip val)
 
 keyvals :: PandocMonad m => LP m [(String, String)]
 keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
 
-accent :: PandocMonad m => Char -> (Char -> String) -> LP m Inlines
-accent c f = try $ do
+accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
+accent combiningAccent fallBack = try $ do
   ils <- tok
   case toList ils of
-       (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
-       [Space]           -> return $ str [c]
-       []                -> return $ str [c]
+       (Str (x:xs) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (T.unpack
+           (Normalize.normalize Normalize.NFC
+             (T.pack [x, combiningAccent])) ++ xs) : ys
+       [Space]           -> return $ str [fromMaybe combiningAccent fallBack]
+       []                -> return $ str [fromMaybe combiningAccent fallBack]
        _                 -> return ils
-
-grave :: Char -> String
-grave 'A' = "À"
-grave 'E' = "È"
-grave 'I' = "Ì"
-grave 'O' = "Ò"
-grave 'U' = "Ù"
-grave 'a' = "à"
-grave 'e' = "è"
-grave 'i' = "ì"
-grave 'o' = "ò"
-grave 'u' = "ù"
-grave c   = [c]
-
-acute :: Char -> String
-acute 'A' = "Á"
-acute 'E' = "É"
-acute 'I' = "Í"
-acute 'O' = "Ó"
-acute 'U' = "Ú"
-acute 'Y' = "Ý"
-acute 'a' = "á"
-acute 'e' = "é"
-acute 'i' = "í"
-acute 'o' = "ó"
-acute 'u' = "ú"
-acute 'y' = "ý"
-acute 'C' = "Ć"
-acute 'c' = "ć"
-acute 'L' = "Ĺ"
-acute 'l' = "ĺ"
-acute 'N' = "Ń"
-acute 'n' = "ń"
-acute 'R' = "Ŕ"
-acute 'r' = "ŕ"
-acute 'S' = "Ś"
-acute 's' = "ś"
-acute 'Z' = "Ź"
-acute 'z' = "ź"
-acute c   = [c]
-
-circ :: Char -> String
-circ 'A' = "Â"
-circ 'E' = "Ê"
-circ 'I' = "Î"
-circ 'O' = "Ô"
-circ 'U' = "Û"
-circ 'a' = "â"
-circ 'e' = "ê"
-circ 'i' = "î"
-circ 'o' = "ô"
-circ 'u' = "û"
-circ 'C' = "Ĉ"
-circ 'c' = "ĉ"
-circ 'G' = "Ĝ"
-circ 'g' = "ĝ"
-circ 'H' = "Ĥ"
-circ 'h' = "ĥ"
-circ 'J' = "Ĵ"
-circ 'j' = "ĵ"
-circ 'S' = "Ŝ"
-circ 's' = "ŝ"
-circ 'W' = "Ŵ"
-circ 'w' = "ŵ"
-circ 'Y' = "Ŷ"
-circ 'y' = "ŷ"
-circ c   = [c]
-
-tilde :: Char -> String
-tilde 'A' = "Ã"
-tilde 'a' = "ã"
-tilde 'O' = "Õ"
-tilde 'o' = "õ"
-tilde 'I' = "Ĩ"
-tilde 'i' = "ĩ"
-tilde 'U' = "Ũ"
-tilde 'u' = "ũ"
-tilde 'N' = "Ñ"
-tilde 'n' = "ñ"
-tilde c   = [c]
-
-umlaut :: Char -> String
-umlaut 'A' = "Ä"
-umlaut 'E' = "Ë"
-umlaut 'I' = "Ï"
-umlaut 'O' = "Ö"
-umlaut 'U' = "Ü"
-umlaut 'a' = "ä"
-umlaut 'e' = "ë"
-umlaut 'i' = "ï"
-umlaut 'o' = "ö"
-umlaut 'u' = "ü"
-umlaut c   = [c]
-
-hungarumlaut :: Char -> String
-hungarumlaut 'A' = "A̋"
-hungarumlaut 'E' = "E̋"
-hungarumlaut 'I' = "I̋"
-hungarumlaut 'O' = "Ő"
-hungarumlaut 'U' = "Ű"
-hungarumlaut 'Y' = "ӳ"
-hungarumlaut 'a' = "a̋"
-hungarumlaut 'e' = "e̋"
-hungarumlaut 'i' = "i̋"
-hungarumlaut 'o' = "ő"
-hungarumlaut 'u' = "ű"
-hungarumlaut 'y' = "ӳ"
-hungarumlaut c   = [c]
-
-dot :: Char -> String
-dot 'C' = "Ċ"
-dot 'c' = "ċ"
-dot 'E' = "Ė"
-dot 'e' = "ė"
-dot 'G' = "Ġ"
-dot 'g' = "ġ"
-dot 'I' = "İ"
-dot 'Z' = "Ż"
-dot 'z' = "ż"
-dot c   = [c]
-
-macron :: Char -> String
-macron 'A' = "Ā"
-macron 'E' = "Ē"
-macron 'I' = "Ī"
-macron 'O' = "Ō"
-macron 'U' = "Ū"
-macron 'a' = "ā"
-macron 'e' = "ē"
-macron 'i' = "ī"
-macron 'o' = "ō"
-macron 'u' = "ū"
-macron c   = [c]
-
-cedilla :: Char -> String
-cedilla 'c' = "ç"
-cedilla 'C' = "Ç"
-cedilla 's' = "ş"
-cedilla 'S' = "Ş"
-cedilla 't' = "ţ"
-cedilla 'T' = "Ţ"
-cedilla 'e' = "ȩ"
-cedilla 'E' = "Ȩ"
-cedilla 'h' = "ḩ"
-cedilla 'H' = "Ḩ"
-cedilla 'o' = "o̧"
-cedilla 'O' = "O̧"
-cedilla c   = [c]
-
-hacek :: Char -> String
-hacek 'A' = "Ǎ"
-hacek 'a' = "ǎ"
-hacek 'C' = "Č"
-hacek 'c' = "č"
-hacek 'D' = "Ď"
-hacek 'd' = "ď"
-hacek 'E' = "Ě"
-hacek 'e' = "ě"
-hacek 'G' = "Ǧ"
-hacek 'g' = "ǧ"
-hacek 'H' = "Ȟ"
-hacek 'h' = "ȟ"
-hacek 'I' = "Ǐ"
-hacek 'i' = "ǐ"
-hacek 'j' = "ǰ"
-hacek 'K' = "Ǩ"
-hacek 'k' = "ǩ"
-hacek 'L' = "Ľ"
-hacek 'l' = "ľ"
-hacek 'N' = "Ň"
-hacek 'n' = "ň"
-hacek 'O' = "Ǒ"
-hacek 'o' = "ǒ"
-hacek 'R' = "Ř"
-hacek 'r' = "ř"
-hacek 'S' = "Š"
-hacek 's' = "š"
-hacek 'T' = "Ť"
-hacek 't' = "ť"
-hacek 'U' = "Ǔ"
-hacek 'u' = "ǔ"
-hacek 'Z' = "Ž"
-hacek 'z' = "ž"
-hacek c   = [c]
-
-ogonek :: Char -> String
-ogonek 'a' = "ą"
-ogonek 'e' = "ę"
-ogonek 'o' = "ǫ"
-ogonek 'i' = "į"
-ogonek 'u' = "ų"
-ogonek 'A' = "Ą"
-ogonek 'E' = "Ę"
-ogonek 'I' = "Į"
-ogonek 'O' = "Ǫ"
-ogonek 'U' = "Ų"
-ogonek c   = [c]
-
-breve :: Char -> String
-breve 'A' = "Ă"
-breve 'a' = "ă"
-breve 'E' = "Ĕ"
-breve 'e' = "ĕ"
-breve 'G' = "Ğ"
-breve 'g' = "ğ"
-breve 'I' = "Ĭ"
-breve 'i' = "ĭ"
-breve 'O' = "Ŏ"
-breve 'o' = "ŏ"
-breve 'U' = "Ŭ"
-breve 'u' = "ŭ"
-breve c   = [c]
-
-toksToString :: [Tok] -> String
-toksToString = T.unpack . untokenize
-
 mathDisplay :: String -> Inlines
 mathDisplay = displayMath . trim
 
@@ -1119,7 +680,21 @@ citationLabel  = do
 cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
 cites mode multi = try $ do
   cits <- if multi
-             then many1 simpleCiteArgs
+             then do
+               multiprenote <- optionMaybe $ toList <$> paropt
+               multipostnote <- optionMaybe $ toList <$> paropt
+               let (pre, suf) = case (multiprenote, multipostnote) of
+                     (Just s , Nothing) -> (mempty, s)
+                     (Nothing , Just t) -> (mempty, t)
+                     (Just s , Just t ) -> (s, t)
+                     _                  -> (mempty, mempty)
+               tempCits <- many1 simpleCiteArgs
+               case tempCits of
+                 (k:ks) -> case ks of
+                             (_:_) -> return $ ((addMprenote pre k):init ks) ++
+                                                 [addMpostnote suf (last ks)]
+                             _ -> return [addMprenote pre (addMpostnote suf k)]
+                 _ -> return [[]]
              else count 1 simpleCiteArgs
   let cs = concat cits
   return $ case mode of
@@ -1127,6 +702,17 @@ cites mode multi = try $ do
                              (c:rest) -> c {citationMode = mode} : rest
                              []       -> []
         _            -> map (\a -> a {citationMode = mode}) cs
+  where mprenote (k:ks) = (k:ks) ++ [Space]
+        mprenote _ = mempty
+        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+        mpostnote _ = mempty
+        addMprenote mpn (k:ks) =
+          let mpnfinal = case citationPrefix k of
+                           (_:_) -> mprenote mpn
+                           _ -> mpn
+          in addPrefix mpnfinal (k:ks)
+        addMprenote _ _ = []
+        addMpostnote = addSuffix . mpostnote
 
 citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
 citation name mode multi = do
@@ -1181,22 +767,12 @@ tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
           Tok _ _ t <- singleChar
           return (str (T.unpack t))
 
-singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
-  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
-  guard $ not $ toktype == Symbol &&
-                T.any (`Set.member` specialChars) t
-  if T.length t > 1
-     then do
-       let (t1, t2) = (T.take 1 t, T.drop 1 t)
-       inp <- getInput
-       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
-       return $ Tok pos toktype t1
-     else return $ Tok pos toktype t
-
 opt :: PandocMonad m => LP m Inlines
 opt = bracketed inline <|> (str . T.unpack <$> rawopt)
 
+paropt :: PandocMonad m => LP m Inlines
+paropt = parenWrapped inline
+
 rawopt :: PandocMonad m => LP m Text
 rawopt = do
   inner <- untokenize <$> bracketedToks
@@ -1204,30 +780,28 @@ rawopt = do
   return $ "[" <> inner <> "]"
 
 skipopts :: PandocMonad m => LP m ()
-skipopts = skipMany rawopt
+skipopts = skipMany (overlaySpecification <|> void rawopt)
 
 -- opts in angle brackets are used in beamer
-rawangle :: PandocMonad m => LP m ()
-rawangle = try $ do
+overlaySpecification :: PandocMonad m => LP m ()
+overlaySpecification = try $ do
   symbol '<'
-  () <$ manyTill anyTok (symbol '>')
-
-skipangles :: PandocMonad m => LP m ()
-skipangles = skipMany rawangle
-
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
-ignore raw = do
-  pos <- getPosition
-  report $ SkippedContent raw pos
-  return mempty
-
-withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
-withRaw parser = do
-  inp <- getInput
-  result <- parser
-  nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
-  let raw = takeWhile (/= nxt) inp
-  return (result, raw)
+  ts <- manyTill overlayTok (symbol '>')
+  guard $ case ts of
+               -- see issue #3368
+               [Tok _ Word s] | T.all isLetter s -> s `elem`
+                                ["beamer","presentation", "trans",
+                                 "handout","article", "second"]
+               _ -> True
+
+overlayTok :: PandocMonad m => LP m Tok
+overlayTok =
+  satisfyTok (\t ->
+                  case t of
+                    Tok _ Word _       -> True
+                    Tok _ Spaces _     -> True
+                    Tok _ Symbol c     -> c `elem` ["-","+","@","|",":",","]
+                    _                  -> False)
 
 inBrackets :: Inlines -> Inlines
 inBrackets x = str "[" <> x <> str "]"
@@ -1275,6 +849,12 @@ inlineEnvironments = M.fromList [
   , ("align*", mathEnvWith id (Just "aligned") "align*")
   , ("alignat", mathEnvWith id (Just "aligned") "alignat")
   , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+  , ("dmath", mathEnvWith id Nothing "dmath")
+  , ("dmath*", mathEnvWith id Nothing "dmath*")
+  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+  , ("darray", mathEnvWith id (Just "aligned") "darray")
+  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
   ]
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
@@ -1289,7 +869,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
   , ("texttt", ttfamily)
   , ("sout", extractSpaces strikeout <$> tok)
-  , ("alert", skipangles >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+  , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
   , ("lq", return (str "‘"))
   , ("rq", return (str "’"))
   , ("textquoteleft", return (str "‘"))
@@ -1318,7 +898,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
   , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
   , ("ensuremath", mathInline . toksToString <$> braced)
-  , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
+  , ("texorpdfstring", (\x _ -> x) <$> tok <*> tok)
   , ("P", lit "¶")
   , ("S", lit "§")
   , ("$", lit "$")
@@ -1361,20 +941,32 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("copyright", lit "©")
   , ("textasciicircum", lit "^")
   , ("textasciitilde", lit "~")
-  , ("H", accent '\779' hungarumlaut)
-  , ("`", accent '`' grave)
-  , ("'", accent '\'' acute)
-  , ("^", accent '^' circ)
-  , ("~", accent '~' tilde)
-  , ("\"", accent '\776' umlaut)
-  , (".", accent '\775' dot)
-  , ("=", accent '\772' macron)
-  , ("c", accent '\807' cedilla)
-  , ("v", accent 'ˇ' hacek)
-  , ("u", accent '\774' breve)
-  , ("k", accent '\808' ogonek)
-  , ("textogonekcentered", accent '\808' ogonek)
-  , ("i", lit "i")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
                             optional opt
@@ -1392,17 +984,25 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("footnote", skipopts >> note <$> grouped block)
   , ("verb", doverb)
   , ("lstinline", dolstinline)
+  , ("mintinline", domintinline)
   , ("Verb", doverb)
-  , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url ->
+  , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url ->
                   pure (link url "" (str url)))
   , ("href", (unescapeURL . toksToString <$>
-                 braced <* optional sp) >>= \url ->
+                 bracedUrl <* optional sp) >>= \url ->
                    tok >>= \lab -> pure (link url "" lab))
   , ("includegraphics", do options <- option [] keyvals
                            src <- unescapeURL . T.unpack .
                                     removeDoubleQuotes . untokenize <$> braced
                            mkImage options src)
-  , ("enquote", enquote)
+  , ("enquote*", enquote True Nothing)
+  , ("enquote", enquote False Nothing)
+  -- foreignquote is supposed to use native quote marks
+  , ("foreignquote*", braced >>= enquote True . Just . untokenize)
+  , ("foreignquote", braced >>= enquote False . Just . untokenize)
+  -- hypehnquote uses regular quotes
+  , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
+  , ("hyphenquote", braced >>= enquote False . Just . untokenize)
   , ("figurename", doTerm Translations.Figure)
   , ("prefacename", doTerm Translations.Preface)
   , ("refname", doTerm Translations.References)
@@ -1507,13 +1107,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("acsp", doAcronymPlural "abbrv")
   -- siuntix
   , ("SI", dosiunitx)
-  -- units of siuntix
-  , ("celsius", lit "°C")
-  , ("degreeCelsius", lit "°C")
-  , ("gram", lit "g")
-  , ("meter", lit "m")
-  , ("milli", lit "m")
-  , ("square", dosquare)
   -- hyphenat
   , ("bshyp", lit "\\\173")
   , ("fshyp", lit "/\173")
@@ -1542,8 +1135,18 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("Rn", romanNumeralLower)
   -- babel
   , ("foreignlanguage", foreignlanguage)
+  -- include
+  , ("input", include "input")
+  -- plain tex stuff that should just be passed through as raw tex
+  , ("ifdim", ifdim)
   ]
 
+ifdim :: PandocMonad m => LP m Inlines
+ifdim = do
+  contents <- manyTill anyTok (controlSeq "fi")
+  return $ rawInline "latex" $ T.unpack $
+           "\\ifdim" <> untokenize contents <> "\\fi"
+
 makeUppercase :: Inlines -> Inlines
 makeUppercase = fromList . walk (alterStr (map toUpper)) . toList
 
@@ -1693,7 +1296,6 @@ getRawCommand name txt = do
            "def" ->
              void $ manyTill anyTok braced
            _ -> do
-             skipangles
              skipopts
              option "" (try (optional sp *> dimenarg))
              void $ many braced
@@ -1818,7 +1420,6 @@ end_ t = try (do
 preamble :: PandocMonad m => LP m Blocks
 preamble = mempty <$ many preambleBlock
   where preambleBlock =  spaces1
-                     <|> void include
                      <|> void macroDef
                      <|> void blockCommand
                      <|> void braced
@@ -1831,11 +1432,8 @@ paragraph = do
      then return mempty
      else return $ para x
 
-include :: PandocMonad m => LP m Blocks
-include = do
-  (Tok _ (CtrlSeq name) _) <-
-                    controlSeq "include" <|> controlSeq "input" <|>
-                    controlSeq "subfile" <|> controlSeq "usepackage"
+include :: (PandocMonad m, Monoid a) => Text -> LP m a
+include name = do
   skipMany opt
   fs <- (map (T.unpack . removeDoubleQuotes . T.strip) . T.splitOn "," .
          untokenize) <$> braced
@@ -1912,31 +1510,28 @@ letmacro = do
   optional $ symbol '='
   spaces
   contents <- bracedOrToken
-  return (name, Macro ExpandWhenDefined 0 Nothing contents)
+  return (name, Macro ExpandWhenDefined [] Nothing contents)
 
 defmacro :: PandocMonad m => LP m (Text, Macro)
 defmacro = try $ do
   controlSeq "def"
   Tok _ (CtrlSeq name) _ <- anyControlSeq
-  numargs <- option 0 $ argSeq 1
+  argspecs <- many (argspecArg <|> argspecPattern)
   -- we use withVerbatimMode, because macros are to be expanded
   -- at point of use, not point of definition
   contents <- withVerbatimMode bracedOrToken
-  return (name, Macro ExpandWhenUsed numargs Nothing contents)
+  return (name, Macro ExpandWhenUsed argspecs Nothing contents)
 
--- Note: we don't yet support fancy things like #1.#2
-argSeq :: PandocMonad m => Int -> LP m Int
-argSeq n = do
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
   Tok _ (Arg i) _ <- satisfyTok isArgTok
-  guard $ i == n
-  argSeq (n+1) <|> return n
+  return $ ArgNum i
 
-isArgTok :: Tok -> Bool
-isArgTok (Tok _ (Arg _) _) = True
-isArgTok _                 = False
-
-bracedOrToken :: PandocMonad m => LP m [Tok]
-bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+                                    (toktype' == Symbol || toktype' == Word) &&
+                                    (txt /= "{" && txt /= "\\" && txt /= "}")))
 
 newcommand :: PandocMonad m => LP m (Text, Macro)
 newcommand = do
@@ -1950,6 +1545,7 @@ newcommand = do
     (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
   spaces
   numargs <- option 0 $ try bracketedNum
+  let argspecs = map (\i -> ArgNum i) [1..numargs]
   spaces
   optarg <- option Nothing $ Just <$> try bracketedToks
   spaces
@@ -1959,7 +1555,7 @@ newcommand = do
     case M.lookup name macros of
          Just _  -> report $ MacroAlreadyDefined (T.unpack txt) pos
          Nothing -> return ()
-  return (name, Macro ExpandWhenUsed numargs optarg contents)
+  return (name, Macro ExpandWhenUsed argspecs optarg contents)
 
 newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)
 newenvironment = do
@@ -1972,6 +1568,7 @@ newenvironment = do
   name <- untokenize <$> braced
   spaces
   numargs <- option 0 $ try bracketedNum
+  let argspecs = map (\i -> ArgNum i) [1..numargs]
   spaces
   optarg <- option Nothing $ Just <$> try bracketedToks
   spaces
@@ -1983,13 +1580,8 @@ newenvironment = do
     case M.lookup name macros of
          Just _  -> report $ MacroAlreadyDefined (T.unpack name) pos
          Nothing -> return ()
-  return (name, Macro ExpandWhenUsed numargs optarg startcontents,
-             Macro ExpandWhenUsed 0 Nothing endcontents)
-
-bracketedToks :: PandocMonad m => LP m [Tok]
-bracketedToks = do
-  symbol '['
-  mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
+  return (name, Macro ExpandWhenUsed argspecs optarg startcontents,
+             Macro ExpandWhenUsed [] Nothing endcontents)
 
 bracketedNum :: PandocMonad m => LP m Int
 bracketedNum = do
@@ -2003,11 +1595,13 @@ setCaption = do
   ils <- tok
   mblabel <- option Nothing $
                try $ spaces >> controlSeq "label" >> (Just <$> tok)
-  let ils' = case mblabel of
-                  Just lab -> ils <> spanWith
-                                ("",[],[("label", stringify lab)]) mempty
-                  Nothing  -> ils
-  updateState $ \st -> st{ sCaption = Just ils' }
+  let capt = case mblabel of
+                  Just lab -> let slab = stringify lab
+                                  ils' = ils <> spanWith
+                                    ("",[],[("label", slab)]) mempty
+                              in  (Just ils', Just slab)
+                  Nothing  -> (Just ils, Nothing)
+  updateState $ \st -> st{ sCaption = capt }
   return mempty
 
 looseItem :: PandocMonad m => LP m Blocks
@@ -2018,28 +1612,27 @@ looseItem = do
   return mempty
 
 resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption = Nothing }
+resetCaption = updateState $ \st -> st{ sCaption = (Nothing, Nothing) }
 
-section :: PandocMonad m => Bool -> Attr -> Int -> LP m Blocks
-section starred (ident, classes, kvs) lvl = do
+section :: PandocMonad m => Attr -> Int -> LP m Blocks
+section (ident, classes, kvs) lvl = do
   skipopts
   contents <- grouped inline
   lab <- option ident $
           try (spaces >> controlSeq "label"
                >> spaces >> toksToString <$> braced)
-  let classes' = if starred then "unnumbered" : classes else classes
   when (lvl == 0) $
     updateState $ \st -> st{ sHasChapters = True }
-  unless starred $ do
+  unless ("unnumbered" `elem` classes) $ do
     hn <- sLastHeaderNum <$> getState
     hasChapters <- sHasChapters <$> getState
     let lvl' = lvl + if hasChapters then 1 else 0
-    let num = incrementHeaderNum lvl' hn
-    updateState $ \st -> st{ sLastHeaderNum = num }
-    updateState $ \st -> st{ sLabels = M.insert lab
-                            [Str (renderHeaderNum num)]
-                            (sLabels st) }
-  attr' <- registerHeader (lab, classes', kvs) contents
+    let num = incrementDottedNum lvl' hn
+    updateState $ \st -> st{ sLastHeaderNum = num
+                           , sLabels = M.insert lab
+                              [Str (renderDottedNum num)]
+                              (sLabels st) }
+  attr' <- registerHeader (lab, classes, kvs) contents
   return $ headerWith attr' lvl contents
 
 blockCommand :: PandocMonad m => LP m Blocks
@@ -2100,23 +1693,23 @@ blockCommands = M.fromList
    -- Koma-script metadata commands
    , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
    -- sectioning
-   , ("part", section False nullAttr (-1))
-   , ("part*", section True nullAttr (-1))
-   , ("chapter", section False nullAttr 0)
-   , ("chapter*", section True ("",["unnumbered"],[]) 0)
-   , ("section", section False nullAttr 1)
-   , ("section*", section True ("",["unnumbered"],[]) 1)
-   , ("subsection", section False nullAttr 2)
-   , ("subsection*", section True ("",["unnumbered"],[]) 2)
-   , ("subsubsection", section False nullAttr 3)
-   , ("subsubsection*", section True ("",["unnumbered"],[]) 3)
-   , ("paragraph", section False nullAttr 4)
-   , ("paragraph*", section True ("",["unnumbered"],[]) 4)
-   , ("subparagraph", section False nullAttr 5)
-   , ("subparagraph*", section True ("",["unnumbered"],[]) 5)
+   , ("part", section nullAttr (-1))
+   , ("part*", section nullAttr (-1))
+   , ("chapter", section nullAttr 0)
+   , ("chapter*", section ("",["unnumbered"],[]) 0)
+   , ("section", section nullAttr 1)
+   , ("section*", section ("",["unnumbered"],[]) 1)
+   , ("subsection", section nullAttr 2)
+   , ("subsection*", section ("",["unnumbered"],[]) 2)
+   , ("subsubsection", section nullAttr 3)
+   , ("subsubsection*", section ("",["unnumbered"],[]) 3)
+   , ("paragraph", section nullAttr 4)
+   , ("paragraph*", section ("",["unnumbered"],[]) 4)
+   , ("subparagraph", section nullAttr 5)
+   , ("subparagraph*", section ("",["unnumbered"],[]) 5)
    -- beamer slides
-   , ("frametitle", section False nullAttr 3)
-   , ("framesubtitle", section False nullAttr 4)
+   , ("frametitle", section nullAttr 3)
+   , ("framesubtitle", section nullAttr 4)
    -- letters
    , ("opening", (para . trimInlines) <$> (skipopts *> tok))
    , ("closing", skipopts *> closing)
@@ -2152,6 +1745,18 @@ blockCommands = M.fromList
    -- LaTeX colors
    , ("textcolor", coloredBlock "color")
    , ("colorbox", coloredBlock "background-color")
+   -- csquotes
+   , ("blockquote", blockquote False Nothing)
+   , ("blockcquote", blockquote True Nothing)
+   , ("foreignblockquote", braced >>= blockquote False . Just . untokenize)
+   , ("foreignblockcquote", braced >>= blockquote True . Just . untokenize)
+   , ("hyphenblockquote", braced >>= blockquote False . Just . untokenize)
+   , ("hyphenblockcquote", braced >>= blockquote True . Just . untokenize)
+   -- include
+   , ("include", include "include")
+   , ("input", include "input")
+   , ("subfile", include "subfile")
+   , ("usepackage", include "usepackage")
    ]
 
 
@@ -2192,6 +1797,7 @@ environments = M.fromList
    , ("minted", minted)
    , ("obeylines", obeylines)
    , ("tikzpicture", rawVerbEnv "tikzpicture")
+   , ("lilypond", rawVerbEnv "lilypond")
    -- etoolbox
    , ("ifstrequal", ifstrequal)
    , ("newtoggle", braced >>= newToggle)
@@ -2234,7 +1840,7 @@ rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
 rawVerbEnv name = do
   pos <- getPosition
   (_, raw) <- withRaw $ verbEnv name
-  let raw' = "\\begin{tikzpicture}" ++ toksToString raw
+  let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw
   exts <- getOption readerExtensions
   let parseRaw = extensionEnabled Ext_raw_tex exts
   if parseRaw
@@ -2248,7 +1854,20 @@ verbEnv name = withVerbatimMode $ do
   skipopts
   optional blankline
   res <- manyTill anyTok (end_ name)
-  return $ stripTrailingNewlines $ toksToString res
+  return $ T.unpack
+         $ stripTrailingNewline
+         $ untokenize
+         $ res
+
+-- Strip single final newline and any spaces following it.
+-- Input is unchanged if it doesn't end with newline +
+-- optional spaces.
+stripTrailingNewline :: Text -> Text
+stripTrailingNewline t =
+  let (b, e) = T.breakOnEnd "\n" t
+  in  if T.all (== ' ') e
+         then T.dropEnd 1 b
+         else t
 
 fancyverbEnv :: PandocMonad m => Text -> LP m Blocks
 fancyverbEnv name = do
@@ -2303,12 +1922,43 @@ figure = try $ do
 
 addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
 addImageCaption = walkM go
-  where go (Image attr alt (src,tit))
+  where go (Image attr@(_, cls, kvs) alt (src,tit))
             | not ("fig:" `isPrefixOf` tit) = do
-          mbcapt <- sCaption <$> getState
-          return $ case mbcapt of
-               Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
-               Nothing  -> Image attr alt (src,tit)
+          (mbcapt, mblab) <- sCaption <$> getState
+          let (alt', tit') = case mbcapt of
+                               Just ils -> (toList ils, "fig:" ++ tit)
+                               Nothing  -> (alt, tit)
+              attr' = case mblab of
+                        Just lab -> (lab, cls, kvs)
+                        Nothing  -> attr
+          case attr' of
+               ("", _, _)    -> return ()
+               (ident, _, _) -> do
+                  st <- getState
+                  let chapnum =
+                        case (sHasChapters st, sLastHeaderNum st) of
+                             (True, DottedNum (n:_)) -> Just n
+                             _                       -> Nothing
+                  let num = case sLastFigureNum st of
+                       DottedNum [m,n]  ->
+                         case chapnum of
+                              Just m' | m' == m   -> DottedNum [m, n+1]
+                                      | otherwise -> DottedNum [m', 1]
+                              Nothing             -> DottedNum [1]
+                                                      -- shouldn't happen
+                       DottedNum [n]   ->
+                         case chapnum of
+                              Just m  -> DottedNum [m, 1]
+                              Nothing -> DottedNum [n + 1]
+                       _               ->
+                         case chapnum of
+                               Just n  -> DottedNum [n, 1]
+                               Nothing -> DottedNum [1]
+                  setState $
+                    st{ sLastFigureNum = num
+                      , sLabels = M.insert ident
+                                 [Str (renderDottedNum num)] (sLabels st) }
+          return $ Image attr' alt' (src, tit')
         go x = return x
 
 coloredBlock :: PandocMonad m => String -> LP m Blocks
@@ -2321,7 +1971,8 @@ coloredBlock stylename = try $ do
 
 graphicsPath :: PandocMonad m => LP m Blocks
 graphicsPath = do
-  ps <- map toksToString <$> (bgroup *> manyTill braced egroup)
+  ps <- map toksToString <$>
+          (bgroup *> spaces *> manyTill (braced <* spaces) egroup)
   getResourcePath >>= setResourcePath . (++ ps)
   return mempty
 
@@ -2579,7 +2230,7 @@ simpTable envname hasWidthParameter = try $ do
 addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
 addTableCaption = walkM go
   where go (Table c als ws hs rs) = do
-          mbcapt <- sCaption <$> getState
+          (mbcapt, _) <- sCaption <$> getState
           return $ case mbcapt of
                Just ils -> Table (toList ils) als ws hs rs
                Nothing  -> Table c als ws hs rs
@@ -2590,7 +2241,6 @@ block :: PandocMonad m => LP m Blocks
 block = do
   res <- (mempty <$ spaces1)
     <|> environment
-    <|> include
     <|> macroDef
     <|> blockCommand
     <|> paragraph
@@ -2613,137 +2263,3 @@ setDefaultLanguage = do
          setTranslations l
          updateState $ setMeta "lang" $ str (renderLang l)
          return mempty
-
-polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
-polyglossiaLangToBCP47 = M.fromList
-  [ ("arabic", \o -> case filter (/=' ') o of
-       "locale=algeria"    -> Lang "ar" "" "DZ" []
-       "locale=mashriq"    -> Lang "ar" "" "SY" []
-       "locale=libya"      -> Lang "ar" "" "LY" []
-       "locale=morocco"    -> Lang "ar" "" "MA" []
-       "locale=mauritania" -> Lang "ar" "" "MR" []
-       "locale=tunisia"    -> Lang "ar" "" "TN" []
-       _                   -> Lang "ar" "" "" [])
-  , ("german", \o -> case filter (/=' ') o of
-       "spelling=old" -> Lang "de" "" "DE" ["1901"]
-       "variant=austrian,spelling=old"
-                       -> Lang "de" "" "AT" ["1901"]
-       "variant=austrian" -> Lang "de" "" "AT" []
-       "variant=swiss,spelling=old"
-                       -> Lang "de" "" "CH" ["1901"]
-       "variant=swiss" -> Lang "de" "" "CH" []
-       _ -> Lang "de" "" "" [])
-  , ("lsorbian", \_ -> Lang "dsb" "" "" [])
-  , ("greek", \o -> case filter (/=' ') o of
-       "variant=poly"    -> Lang "el" "" "polyton" []
-       "variant=ancient" -> Lang "grc" "" "" []
-       _                 -> Lang "el" "" "" [])
-  , ("english", \o -> case filter (/=' ') o of
-       "variant=australian" -> Lang "en" "" "AU" []
-       "variant=canadian"   -> Lang "en" "" "CA" []
-       "variant=british"    -> Lang "en" "" "GB" []
-       "variant=newzealand" -> Lang "en" "" "NZ" []
-       "variant=american"   -> Lang "en" "" "US" []
-       _                    -> Lang "en" "" "" [])
-  , ("usorbian", \_ -> Lang "hsb" "" "" [])
-  , ("latin", \o -> case filter (/=' ') o of
-       "variant=classic" -> Lang "la" "" "" ["x-classic"]
-       _                 -> Lang "la" "" "" [])
-  , ("slovenian", \_ -> Lang "sl" "" "" [])
-  , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
-  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
-  , ("afrikaans", \_ -> Lang "af" "" "" [])
-  , ("amharic", \_ -> Lang "am" "" "" [])
-  , ("assamese", \_ -> Lang "as" "" "" [])
-  , ("asturian", \_ -> Lang "ast" "" "" [])
-  , ("bulgarian", \_ -> Lang "bg" "" "" [])
-  , ("bengali", \_ -> Lang "bn" "" "" [])
-  , ("tibetan", \_ -> Lang "bo" "" "" [])
-  , ("breton", \_ -> Lang "br" "" "" [])
-  , ("catalan", \_ -> Lang "ca" "" "" [])
-  , ("welsh", \_ -> Lang "cy" "" "" [])
-  , ("czech", \_ -> Lang "cs" "" "" [])
-  , ("coptic", \_ -> Lang "cop" "" "" [])
-  , ("danish", \_ -> Lang "da" "" "" [])
-  , ("divehi", \_ -> Lang "dv" "" "" [])
-  , ("esperanto", \_ -> Lang "eo" "" "" [])
-  , ("spanish", \_ -> Lang "es" "" "" [])
-  , ("estonian", \_ -> Lang "et" "" "" [])
-  , ("basque", \_ -> Lang "eu" "" "" [])
-  , ("farsi", \_ -> Lang "fa" "" "" [])
-  , ("finnish", \_ -> Lang "fi" "" "" [])
-  , ("french", \_ -> Lang "fr" "" "" [])
-  , ("friulan", \_ -> Lang "fur" "" "" [])
-  , ("irish", \_ -> Lang "ga" "" "" [])
-  , ("scottish", \_ -> Lang "gd" "" "" [])
-  , ("ethiopic", \_ -> Lang "gez" "" "" [])
-  , ("galician", \_ -> Lang "gl" "" "" [])
-  , ("hebrew", \_ -> Lang "he" "" "" [])
-  , ("hindi", \_ -> Lang "hi" "" "" [])
-  , ("croatian", \_ -> Lang "hr" "" "" [])
-  , ("magyar", \_ -> Lang "hu" "" "" [])
-  , ("armenian", \_ -> Lang "hy" "" "" [])
-  , ("interlingua", \_ -> Lang "ia" "" "" [])
-  , ("indonesian", \_ -> Lang "id" "" "" [])
-  , ("icelandic", \_ -> Lang "is" "" "" [])
-  , ("italian", \_ -> Lang "it" "" "" [])
-  , ("japanese", \_ -> Lang "jp" "" "" [])
-  , ("khmer", \_ -> Lang "km" "" "" [])
-  , ("kurmanji", \_ -> Lang "kmr" "" "" [])
-  , ("kannada", \_ -> Lang "kn" "" "" [])
-  , ("korean", \_ -> Lang "ko" "" "" [])
-  , ("lao", \_ -> Lang "lo" "" "" [])
-  , ("lithuanian", \_ -> Lang "lt" "" "" [])
-  , ("latvian", \_ -> Lang "lv" "" "" [])
-  , ("malayalam", \_ -> Lang "ml" "" "" [])
-  , ("mongolian", \_ -> Lang "mn" "" "" [])
-  , ("marathi", \_ -> Lang "mr" "" "" [])
-  , ("dutch", \_ -> Lang "nl" "" "" [])
-  , ("nynorsk", \_ -> Lang "nn" "" "" [])
-  , ("norsk", \_ -> Lang "no" "" "" [])
-  , ("nko", \_ -> Lang "nqo" "" "" [])
-  , ("occitan", \_ -> Lang "oc" "" "" [])
-  , ("panjabi", \_ -> Lang "pa" "" "" [])
-  , ("polish", \_ -> Lang "pl" "" "" [])
-  , ("piedmontese", \_ -> Lang "pms" "" "" [])
-  , ("portuguese", \_ -> Lang "pt" "" "" [])
-  , ("romansh", \_ -> Lang "rm" "" "" [])
-  , ("romanian", \_ -> Lang "ro" "" "" [])
-  , ("russian", \_ -> Lang "ru" "" "" [])
-  , ("sanskrit", \_ -> Lang "sa" "" "" [])
-  , ("samin", \_ -> Lang "se" "" "" [])
-  , ("slovak", \_ -> Lang "sk" "" "" [])
-  , ("albanian", \_ -> Lang "sq" "" "" [])
-  , ("serbian", \_ -> Lang "sr" "" "" [])
-  , ("swedish", \_ -> Lang "sv" "" "" [])
-  , ("syriac", \_ -> Lang "syr" "" "" [])
-  , ("tamil", \_ -> Lang "ta" "" "" [])
-  , ("telugu", \_ -> Lang "te" "" "" [])
-  , ("thai", \_ -> Lang "th" "" "" [])
-  , ("turkmen", \_ -> Lang "tk" "" "" [])
-  , ("turkish", \_ -> Lang "tr" "" "" [])
-  , ("ukrainian", \_ -> Lang "uk" "" "" [])
-  , ("urdu", \_ -> Lang "ur" "" "" [])
-  , ("vietnamese", \_ -> Lang "vi" "" "" [])
-  ]
-
-babelLangToBCP47 :: String -> Maybe Lang
-babelLangToBCP47 s =
-  case s of
-       "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
-       "naustrian" -> Just $ Lang "de" "" "AT" []
-       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
-       "nswissgerman" -> Just $ Lang "de" "" "CH" []
-       "german" -> Just $ Lang "de" "" "DE" ["1901"]
-       "ngerman" -> Just $ Lang "de" "" "DE" []
-       "lowersorbian" -> Just $ Lang "dsb" "" "" []
-       "uppersorbian" -> Just $ Lang "hsb" "" "" []
-       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
-       "slovene" -> Just $ Lang "sl" "" "" []
-       "australian" -> Just $ Lang "en" "" "AU" []
-       "canadian" -> Just $ Lang "en" "" "CA" []
-       "british" -> Just $ Lang "en" "" "GB" []
-       "newzealand" -> Just $ Lang "en" "" "NZ" []
-       "american" -> Just $ Lang "en" "" "US" []
-       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
-       _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
new file mode 100644
index 000000000..9b57c98fd
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -0,0 +1,173 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-
+Copyright (C) 2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Lang
+   Copyright   : Copyright (C) 2018 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Functions for parsing polyglossia and babel language specifiers to
+BCP47 'Lang'.
+-}
+module Text.Pandoc.Readers.LaTeX.Lang
+  ( polyglossiaLangToBCP47
+  , babelLangToBCP47
+  )
+where
+import Prelude
+import qualified Data.Map as M
+import Text.Pandoc.BCP47 (Lang(..))
+
+polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
+polyglossiaLangToBCP47 = M.fromList
+  [ ("arabic", \o -> case filter (/=' ') o of
+       "locale=algeria"    -> Lang "ar" "" "DZ" []
+       "locale=mashriq"    -> Lang "ar" "" "SY" []
+       "locale=libya"      -> Lang "ar" "" "LY" []
+       "locale=morocco"    -> Lang "ar" "" "MA" []
+       "locale=mauritania" -> Lang "ar" "" "MR" []
+       "locale=tunisia"    -> Lang "ar" "" "TN" []
+       _                   -> Lang "ar" "" "" [])
+  , ("german", \o -> case filter (/=' ') o of
+       "spelling=old" -> Lang "de" "" "DE" ["1901"]
+       "variant=austrian,spelling=old"
+                       -> Lang "de" "" "AT" ["1901"]
+       "variant=austrian" -> Lang "de" "" "AT" []
+       "variant=swiss,spelling=old"
+                       -> Lang "de" "" "CH" ["1901"]
+       "variant=swiss" -> Lang "de" "" "CH" []
+       _ -> Lang "de" "" "" [])
+  , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+  , ("greek", \o -> case filter (/=' ') o of
+       "variant=poly"    -> Lang "el" "" "polyton" []
+       "variant=ancient" -> Lang "grc" "" "" []
+       _                 -> Lang "el" "" "" [])
+  , ("english", \o -> case filter (/=' ') o of
+       "variant=australian" -> Lang "en" "" "AU" []
+       "variant=canadian"   -> Lang "en" "" "CA" []
+       "variant=british"    -> Lang "en" "" "GB" []
+       "variant=newzealand" -> Lang "en" "" "NZ" []
+       "variant=american"   -> Lang "en" "" "US" []
+       _                    -> Lang "en" "" "" [])
+  , ("usorbian", \_ -> Lang "hsb" "" "" [])
+  , ("latin", \o -> case filter (/=' ') o of
+       "variant=classic" -> Lang "la" "" "" ["x-classic"]
+       _                 -> Lang "la" "" "" [])
+  , ("slovenian", \_ -> Lang "sl" "" "" [])
+  , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
+  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
+  , ("afrikaans", \_ -> Lang "af" "" "" [])
+  , ("amharic", \_ -> Lang "am" "" "" [])
+  , ("assamese", \_ -> Lang "as" "" "" [])
+  , ("asturian", \_ -> Lang "ast" "" "" [])
+  , ("bulgarian", \_ -> Lang "bg" "" "" [])
+  , ("bengali", \_ -> Lang "bn" "" "" [])
+  , ("tibetan", \_ -> Lang "bo" "" "" [])
+  , ("breton", \_ -> Lang "br" "" "" [])
+  , ("catalan", \_ -> Lang "ca" "" "" [])
+  , ("welsh", \_ -> Lang "cy" "" "" [])
+  , ("czech", \_ -> Lang "cs" "" "" [])
+  , ("coptic", \_ -> Lang "cop" "" "" [])
+  , ("danish", \_ -> Lang "da" "" "" [])
+  , ("divehi", \_ -> Lang "dv" "" "" [])
+  , ("esperanto", \_ -> Lang "eo" "" "" [])
+  , ("spanish", \_ -> Lang "es" "" "" [])
+  , ("estonian", \_ -> Lang "et" "" "" [])
+  , ("basque", \_ -> Lang "eu" "" "" [])
+  , ("farsi", \_ -> Lang "fa" "" "" [])
+  , ("finnish", \_ -> Lang "fi" "" "" [])
+  , ("french", \_ -> Lang "fr" "" "" [])
+  , ("friulan", \_ -> Lang "fur" "" "" [])
+  , ("irish", \_ -> Lang "ga" "" "" [])
+  , ("scottish", \_ -> Lang "gd" "" "" [])
+  , ("ethiopic", \_ -> Lang "gez" "" "" [])
+  , ("galician", \_ -> Lang "gl" "" "" [])
+  , ("hebrew", \_ -> Lang "he" "" "" [])
+  , ("hindi", \_ -> Lang "hi" "" "" [])
+  , ("croatian", \_ -> Lang "hr" "" "" [])
+  , ("magyar", \_ -> Lang "hu" "" "" [])
+  , ("armenian", \_ -> Lang "hy" "" "" [])
+  , ("interlingua", \_ -> Lang "ia" "" "" [])
+  , ("indonesian", \_ -> Lang "id" "" "" [])
+  , ("icelandic", \_ -> Lang "is" "" "" [])
+  , ("italian", \_ -> Lang "it" "" "" [])
+  , ("japanese", \_ -> Lang "jp" "" "" [])
+  , ("khmer", \_ -> Lang "km" "" "" [])
+  , ("kurmanji", \_ -> Lang "kmr" "" "" [])
+  , ("kannada", \_ -> Lang "kn" "" "" [])
+  , ("korean", \_ -> Lang "ko" "" "" [])
+  , ("lao", \_ -> Lang "lo" "" "" [])
+  , ("lithuanian", \_ -> Lang "lt" "" "" [])
+  , ("latvian", \_ -> Lang "lv" "" "" [])
+  , ("malayalam", \_ -> Lang "ml" "" "" [])
+  , ("mongolian", \_ -> Lang "mn" "" "" [])
+  , ("marathi", \_ -> Lang "mr" "" "" [])
+  , ("dutch", \_ -> Lang "nl" "" "" [])
+  , ("nynorsk", \_ -> Lang "nn" "" "" [])
+  , ("norsk", \_ -> Lang "no" "" "" [])
+  , ("nko", \_ -> Lang "nqo" "" "" [])
+  , ("occitan", \_ -> Lang "oc" "" "" [])
+  , ("panjabi", \_ -> Lang "pa" "" "" [])
+  , ("polish", \_ -> Lang "pl" "" "" [])
+  , ("piedmontese", \_ -> Lang "pms" "" "" [])
+  , ("portuguese", \_ -> Lang "pt" "" "" [])
+  , ("romansh", \_ -> Lang "rm" "" "" [])
+  , ("romanian", \_ -> Lang "ro" "" "" [])
+  , ("russian", \_ -> Lang "ru" "" "" [])
+  , ("sanskrit", \_ -> Lang "sa" "" "" [])
+  , ("samin", \_ -> Lang "se" "" "" [])
+  , ("slovak", \_ -> Lang "sk" "" "" [])
+  , ("albanian", \_ -> Lang "sq" "" "" [])
+  , ("serbian", \_ -> Lang "sr" "" "" [])
+  , ("swedish", \_ -> Lang "sv" "" "" [])
+  , ("syriac", \_ -> Lang "syr" "" "" [])
+  , ("tamil", \_ -> Lang "ta" "" "" [])
+  , ("telugu", \_ -> Lang "te" "" "" [])
+  , ("thai", \_ -> Lang "th" "" "" [])
+  , ("turkmen", \_ -> Lang "tk" "" "" [])
+  , ("turkish", \_ -> Lang "tr" "" "" [])
+  , ("ukrainian", \_ -> Lang "uk" "" "" [])
+  , ("urdu", \_ -> Lang "ur" "" "" [])
+  , ("vietnamese", \_ -> Lang "vi" "" "" [])
+  ]
+
+babelLangToBCP47 :: String -> Maybe Lang
+babelLangToBCP47 s =
+  case s of
+       "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
+       "naustrian" -> Just $ Lang "de" "" "AT" []
+       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
+       "nswissgerman" -> Just $ Lang "de" "" "CH" []
+       "german" -> Just $ Lang "de" "" "DE" ["1901"]
+       "ngerman" -> Just $ Lang "de" "" "DE" []
+       "lowersorbian" -> Just $ Lang "dsb" "" "" []
+       "uppersorbian" -> Just $ Lang "hsb" "" "" []
+       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
+       "slovene" -> Just $ Lang "sl" "" "" []
+       "australian" -> Just $ Lang "en" "" "AU" []
+       "canadian" -> Just $ Lang "en" "" "CA" []
+       "british" -> Just $ Lang "en" "" "GB" []
+       "newzealand" -> Just $ Lang "en" "" "NZ" []
+       "american" -> Just $ Lang "en" "" "US" []
+       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+       _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
new file mode 100644
index 000000000..9256217fe
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -0,0 +1,668 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE FlexibleInstances     #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings     #-}
+{-# LANGUAGE ScopedTypeVariables   #-}
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Parsing
+   Copyright   : Copyright (C) 2006-2018 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+General parsing types and functions for LaTeX.
+-}
+module Text.Pandoc.Readers.LaTeX.Parsing
+  ( DottedNum(..)
+  , renderDottedNum
+  , incrementDottedNum
+  , LaTeXState(..)
+  , defaultLaTeXState
+  , LP
+  , withVerbatimMode
+  , rawLaTeXParser
+  , applyMacros
+  , tokenize
+  , untokenize
+  , untoken
+  , totoks
+  , toksToString
+  , satisfyTok
+  , doMacros
+  , setpos
+  , anyControlSeq
+  , anySymbol
+  , isNewlineTok
+  , isWordTok
+  , isArgTok
+  , spaces
+  , spaces1
+  , tokTypeIn
+  , controlSeq
+  , symbol
+  , symbolIn
+  , sp
+  , whitespace
+  , newlineTok
+  , comment
+  , anyTok
+  , singleChar
+  , specialChars
+  , endline
+  , blankline
+  , primEscape
+  , bgroup
+  , egroup
+  , grouped
+  , braced
+  , braced'
+  , bracedUrl
+  , bracedOrToken
+  , bracketed
+  , bracketedToks
+  , parenWrapped
+  , dimenarg
+  , ignore
+  , withRaw
+  ) where
+
+import Prelude
+import Control.Applicative (many, (<|>))
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Control.Monad.Trans (lift)
+import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
+import Data.Default
+import Data.List (intercalate)
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
+                                        ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Shared
+import Text.Parsec.Pos
+
+newtype DottedNum = DottedNum [Int]
+  deriving (Show)
+
+renderDottedNum :: DottedNum -> String
+renderDottedNum (DottedNum xs) =
+  intercalate "." (map show xs)
+
+incrementDottedNum :: Int -> DottedNum -> DottedNum
+incrementDottedNum level (DottedNum ns) = DottedNum $
+  case reverse (take level (ns ++ repeat 0)) of
+       (x:xs) -> reverse (x+1 : xs)
+       []     -> []  -- shouldn't happen
+
+data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
+                            , sMeta          :: Meta
+                            , sQuoteContext  :: QuoteContext
+                            , sMacros        :: M.Map Text Macro
+                            , sContainers    :: [String]
+                            , sHeaders       :: M.Map Inlines String
+                            , sLogMessages   :: [LogMessage]
+                            , sIdentifiers   :: Set.Set String
+                            , sVerbatimMode  :: Bool
+                            , sCaption       :: (Maybe Inlines, Maybe String)
+                            , sInListItem    :: Bool
+                            , sInTableCell   :: Bool
+                            , sLastHeaderNum :: DottedNum
+                            , sLastFigureNum :: DottedNum
+                            , sLabels        :: M.Map String [Inline]
+                            , sHasChapters   :: Bool
+                            , sToggles       :: M.Map String Bool
+                            }
+     deriving Show
+
+defaultLaTeXState :: LaTeXState
+defaultLaTeXState = LaTeXState{ sOptions       = def
+                              , sMeta          = nullMeta
+                              , sQuoteContext  = NoQuote
+                              , sMacros        = M.empty
+                              , sContainers    = []
+                              , sHeaders       = M.empty
+                              , sLogMessages   = []
+                              , sIdentifiers   = Set.empty
+                              , sVerbatimMode  = False
+                              , sCaption       = (Nothing, Nothing)
+                              , sInListItem    = False
+                              , sInTableCell   = False
+                              , sLastHeaderNum = DottedNum []
+                              , sLastFigureNum = DottedNum []
+                              , sLabels        = M.empty
+                              , sHasChapters   = False
+                              , sToggles       = M.empty
+                              }
+
+instance PandocMonad m => HasQuoteContext LaTeXState m where
+  getQuoteContext = sQuoteContext <$> getState
+  withQuoteContext context parser = do
+    oldState <- getState
+    let oldQuoteContext = sQuoteContext oldState
+    setState oldState { sQuoteContext = context }
+    result <- parser
+    newState <- getState
+    setState newState { sQuoteContext = oldQuoteContext }
+    return result
+
+instance HasLogMessages LaTeXState where
+  addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
+  getLogMessages st = reverse $ sLogMessages st
+
+instance HasIdentifierList LaTeXState where
+  extractIdentifierList     = sIdentifiers
+  updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
+
+instance HasIncludeFiles LaTeXState where
+  getIncludeFiles = sContainers
+  addIncludeFile f s = s{ sContainers = f : sContainers s }
+  dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
+
+instance HasHeaderMap LaTeXState where
+  extractHeaderMap     = sHeaders
+  updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
+
+instance HasMacros LaTeXState where
+  extractMacros  st  = sMacros st
+  updateMacros f st  = st{ sMacros = f (sMacros st) }
+
+instance HasReaderOptions LaTeXState where
+  extractReaderOptions = sOptions
+
+instance HasMeta LaTeXState where
+  setMeta field val st =
+    st{ sMeta = setMeta field val $ sMeta st }
+  deleteMeta field st =
+    st{ sMeta = deleteMeta field $ sMeta st }
+
+instance Default LaTeXState where
+  def = defaultLaTeXState
+
+type LP m = ParserT [Tok] LaTeXState m
+
+withVerbatimMode :: PandocMonad m => LP m a -> LP m a
+withVerbatimMode parser = do
+  updateState $ \st -> st{ sVerbatimMode = True }
+  result <- parser
+  updateState $ \st -> st{ sVerbatimMode = False }
+  return result
+
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+               => Bool -> LP m a -> LP m a -> ParserT String s m (a, String)
+rawLaTeXParser retokenize parser valParser = do
+  inp <- getInput
+  let toks = tokenize "source" $ T.pack inp
+  pstate <- getState
+  let lstate = def{ sOptions = extractReaderOptions pstate }
+  let lstate' = lstate { sMacros = extractMacros pstate }
+  let rawparser = (,) <$> withRaw valParser <*> getState
+  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+  case res' of
+       Left _    -> mzero
+       Right toks' -> do
+         res <- lift $ runParserT (do when retokenize $ do
+                                        -- retokenize, applying macros
+                                        doMacros 0
+                                        ts <- many (satisfyTok (const True))
+                                        setInput ts
+                                      rawparser)
+                        lstate' "chunk" toks'
+         case res of
+              Left _    -> mzero
+              Right ((val, raw), st) -> do
+                updateState (updateMacros (sMacros st <>))
+                _ <- takeP (T.length (untokenize toks'))
+                return (val, T.unpack (untokenize raw))
+
+applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+            => String -> ParserT String s m String
+applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
+   do let retokenize = doMacros 0 *>
+             (toksToString <$> many (satisfyTok (const True)))
+      pstate <- getState
+      let lstate = def{ sOptions = extractReaderOptions pstate
+                      , sMacros  = extractMacros pstate }
+      res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
+      case res of
+           Left e   -> fail (show e)
+           Right s' -> return s'
+tokenize :: SourceName -> Text -> [Tok]
+tokenize sourcename = totoks (initialPos sourcename)
+
+totoks :: SourcePos -> Text -> [Tok]
+totoks pos t =
+  case T.uncons t of
+       Nothing        -> []
+       Just (c, rest)
+         | c == '\n' ->
+           Tok pos Newline "\n"
+           : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
+         | isSpaceOrTab c ->
+           let (sps, rest') = T.span isSpaceOrTab t
+           in  Tok pos Spaces sps
+               : totoks (incSourceColumn pos (T.length sps))
+                 rest'
+         | isAlphaNum c ->
+           let (ws, rest') = T.span isAlphaNum t
+           in  Tok pos Word ws
+               : totoks (incSourceColumn pos (T.length ws)) rest'
+         | c == '%' ->
+           let (cs, rest') = T.break (== '\n') rest
+           in  Tok pos Comment ("%" <> cs)
+               : totoks (incSourceColumn pos (1 + T.length cs)) rest'
+         | c == '\\' ->
+           case T.uncons rest of
+                Nothing -> [Tok pos (CtrlSeq " ") "\\"]
+                Just (d, rest')
+                  | isLetterOrAt d ->
+                      -- \makeatletter is common in macro defs;
+                      -- ideally we should make tokenization sensitive
+                      -- to \makeatletter and \makeatother, but this is
+                      -- probably best for now
+                      let (ws, rest'') = T.span isLetterOrAt rest
+                          (ss, rest''') = T.span isSpaceOrTab rest''
+                      in  Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
+                          : totoks (incSourceColumn pos
+                               (1 + T.length ws + T.length ss)) rest'''
+                  | isSpaceOrTab d || d == '\n' ->
+                      let (w1, r1) = T.span isSpaceOrTab rest
+                          (w2, (w3, r3)) = case T.uncons r1 of
+                                          Just ('\n', r2)
+                                                  -> (T.pack "\n",
+                                                        T.span isSpaceOrTab r2)
+                                          _ -> (mempty, (mempty, r1))
+                          ws = "\\" <> w1 <> w2 <> w3
+                      in  case T.uncons r3 of
+                               Just ('\n', _) ->
+                                 Tok pos (CtrlSeq " ") ("\\" <> w1)
+                                 : totoks (incSourceColumn pos (T.length ws))
+                                   r1
+                               _ ->
+                                 Tok pos (CtrlSeq " ") ws
+                                 : totoks (incSourceColumn pos (T.length ws))
+                                   r3
+                  | otherwise  ->
+                      Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
+                      : totoks (incSourceColumn pos 2) rest'
+         | c == '#' ->
+           let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
+           in  case safeRead (T.unpack t1) of
+                    Just i ->
+                       Tok pos (Arg i) ("#" <> t1)
+                       : totoks (incSourceColumn pos (1 + T.length t1)) t2
+                    Nothing ->
+                       Tok pos Symbol "#"
+                       : totoks (incSourceColumn pos 1) t2
+         | c == '^' ->
+           case T.uncons rest of
+                Just ('^', rest') ->
+                  case T.uncons rest' of
+                       Just (d, rest'')
+                         | isLowerHex d ->
+                           case T.uncons rest'' of
+                                Just (e, rest''') | isLowerHex e ->
+                                  Tok pos Esc2 (T.pack ['^','^',d,e])
+                                  : totoks (incSourceColumn pos 4) rest'''
+                                _ ->
+                                  Tok pos Esc1 (T.pack ['^','^',d])
+                                  : totoks (incSourceColumn pos 3) rest''
+                         | d < '\128' ->
+                                  Tok pos Esc1 (T.pack ['^','^',d])
+                                  : totoks (incSourceColumn pos 3) rest''
+                       _ -> Tok pos Symbol "^" :
+                            Tok (incSourceColumn pos 1) Symbol "^" :
+                            totoks (incSourceColumn pos 2) rest'
+                _ -> Tok pos Symbol "^"
+                     : totoks (incSourceColumn pos 1) rest
+         | otherwise ->
+           Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
+
+isSpaceOrTab :: Char -> Bool
+isSpaceOrTab ' '  = True
+isSpaceOrTab '\t' = True
+isSpaceOrTab _    = False
+
+isLetterOrAt :: Char -> Bool
+isLetterOrAt '@' = True
+isLetterOrAt c   = isLetter c
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+untokenize :: [Tok] -> Text
+untokenize = mconcat . map untoken
+
+untoken :: Tok -> Text
+untoken (Tok _ _ t) = t
+
+toksToString :: [Tok] -> String
+toksToString = T.unpack . untokenize
+
+satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
+satisfyTok f =
+  try $ do
+    res <- tokenPrim (T.unpack . untoken) updatePos matcher
+    doMacros 0 -- apply macros on remaining input stream
+    return res
+  where matcher t | f t       = Just t
+                  | otherwise = Nothing
+        updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
+        updatePos _spos _ (Tok pos _ _ : _) = pos
+        updatePos spos _ []                 = incSourceColumn spos 1
+
+doMacros :: PandocMonad m => Int -> LP m ()
+doMacros n = do
+  verbatimMode <- sVerbatimMode <$> getState
+  unless verbatimMode $ do
+    inp <- getInput
+    case inp of
+         Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+          Tok _ Word name : Tok _ Symbol "}" : ts
+            -> handleMacros spos name ts
+         Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+          Tok _ Word name : Tok _ Symbol "}" : ts
+            -> handleMacros spos ("end" <> name) ts
+         Tok _ (CtrlSeq "expandafter") _ : t : ts
+            -> do setInput ts
+                  doMacros n
+                  getInput >>= setInput . combineTok t
+         Tok spos (CtrlSeq name) _ : ts
+            -> handleMacros spos name ts
+         _ -> return ()
+  where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
+          | T.all isLetterOrAt w =
+            Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
+              where (x1, x2) = T.break isSpaceOrTab x
+        combineTok t ts = t:ts
+        handleMacros spos name ts = do
+                macros <- sMacros <$> getState
+                case M.lookup name macros of
+                     Nothing -> return ()
+                     Just (Macro expansionPoint argspecs optarg newtoks) -> do
+                       setInput ts
+                       let matchTok (Tok _ toktype txt) =
+                             satisfyTok (\(Tok _ toktype' txt') ->
+                                           toktype == toktype' &&
+                                           txt == txt')
+                       let matchPattern toks = try $ mapM_ matchTok toks
+                       let getargs argmap [] = return argmap
+                           getargs argmap (Pattern toks : rest) = try $ do
+                              matchPattern toks
+                              getargs argmap rest
+                           getargs argmap (ArgNum i : Pattern toks : rest) =
+                             try $ do
+                               x <- mconcat <$> manyTill
+                                     (braced <|> ((:[]) <$> anyTok))
+                                     (matchPattern toks)
+                               getargs (M.insert i x argmap) rest
+                           getargs argmap (ArgNum i : rest) = do
+                             x <- try $ spaces >> bracedOrToken
+                             getargs (M.insert i x argmap) rest
+                       args <- case optarg of
+                                    Nothing -> getargs M.empty argspecs
+                                    Just o  -> do
+                                       x <- option o bracketedToks
+                                       getargs (M.singleton 1 x) argspecs
+                       -- first boolean param is true if we're tokenizing
+                       -- an argument (in which case we don't want to
+                       -- expand #1 etc.)
+                       let addTok False (Tok _ (Arg i) _) acc =
+                              case M.lookup i args of
+                                   Nothing -> mzero
+                                   Just xs -> foldr (addTok True) acc xs
+                           -- see #4007
+                           addTok _ (Tok _ (CtrlSeq x) txt)
+                                  acc@(Tok _ Word _ : _)
+                             | not (T.null txt) &&
+                               isLetter (T.last txt) =
+                               Tok spos (CtrlSeq x) (txt <> " ") : acc
+                           addTok _ t acc = setpos spos t : acc
+                       ts' <- getInput
+                       setInput $ foldr (addTok False) ts' newtoks
+                       case expansionPoint of
+                            ExpandWhenUsed ->
+                              if n > 20  -- detect macro expansion loops
+                                 then throwError $ PandocMacroLoop (T.unpack name)
+                                 else doMacros (n + 1)
+                            ExpandWhenDefined -> return ()
+
+
+setpos :: SourcePos -> Tok -> Tok
+setpos spos (Tok _ tt txt) = Tok spos tt txt
+
+anyControlSeq :: PandocMonad m => LP m Tok
+anyControlSeq = satisfyTok isCtrlSeq
+
+isCtrlSeq :: Tok -> Bool
+isCtrlSeq (Tok _ (CtrlSeq _) _) = True
+isCtrlSeq _                     = False
+
+anySymbol :: PandocMonad m => LP m Tok
+anySymbol = satisfyTok isSymbolTok
+
+isSymbolTok :: Tok -> Bool
+isSymbolTok (Tok _ Symbol _) = True
+isSymbolTok _                = False
+
+isWordTok :: Tok -> Bool
+isWordTok (Tok _ Word _) = True
+isWordTok _              = False
+
+isArgTok :: Tok -> Bool
+isArgTok (Tok _ (Arg _) _) = True
+isArgTok _                 = False
+
+spaces :: PandocMonad m => LP m ()
+spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+spaces1 :: PandocMonad m => LP m ()
+spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+tokTypeIn :: [TokType] -> Tok -> Bool
+tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
+
+controlSeq :: PandocMonad m => Text -> LP m Tok
+controlSeq name = satisfyTok isNamed
+  where isNamed (Tok _ (CtrlSeq n) _) = n == name
+        isNamed _                     = False
+
+symbol :: PandocMonad m => Char -> LP m Tok
+symbol c = satisfyTok isc
+  where isc (Tok _ Symbol d) = case T.uncons d of
+                                    Just (c',_) -> c == c'
+                                    _           -> False
+        isc _ = False
+
+symbolIn :: PandocMonad m => [Char] -> LP m Tok
+symbolIn cs = satisfyTok isInCs
+  where isInCs (Tok _ Symbol d) = case T.uncons d of
+                                       Just (c,_) -> c `elem` cs
+                                       _          -> False
+        isInCs _ = False
+
+sp :: PandocMonad m => LP m ()
+sp = whitespace <|> endline
+
+whitespace :: PandocMonad m => LP m ()
+whitespace = () <$ satisfyTok isSpaceTok
+
+isSpaceTok :: Tok -> Bool
+isSpaceTok (Tok _ Spaces _) = True
+isSpaceTok _                = False
+
+newlineTok :: PandocMonad m => LP m ()
+newlineTok = () <$ satisfyTok isNewlineTok
+
+isNewlineTok :: Tok -> Bool
+isNewlineTok (Tok _ Newline _) = True
+isNewlineTok _                 = False
+
+comment :: PandocMonad m => LP m ()
+comment = () <$ satisfyTok isCommentTok
+
+isCommentTok :: Tok -> Bool
+isCommentTok (Tok _ Comment _) = True
+isCommentTok _                 = False
+
+anyTok :: PandocMonad m => LP m Tok
+anyTok = satisfyTok (const True)
+
+singleChar :: PandocMonad m => LP m Tok
+singleChar = try $ do
+  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
+  guard $ not $ toktype == Symbol &&
+                T.any (`Set.member` specialChars) t
+  if T.length t > 1
+     then do
+       let (t1, t2) = (T.take 1 t, T.drop 1 t)
+       inp <- getInput
+       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
+       return $ Tok pos toktype t1
+     else return $ Tok pos toktype t
+
+specialChars :: Set.Set Char
+specialChars = Set.fromList "#$%&~_^\\{}"
+
+endline :: PandocMonad m => LP m ()
+endline = try $ do
+  newlineTok
+  lookAhead anyTok
+  notFollowedBy blankline
+
+blankline :: PandocMonad m => LP m ()
+blankline = try $ skipMany whitespace *> newlineTok
+
+primEscape :: PandocMonad m => LP m Char
+primEscape = do
+  Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
+  case toktype of
+       Esc1 -> case T.uncons (T.drop 2 t) of
+                    Just (c, _)
+                      | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
+                      | otherwise                 -> return (chr (ord c + 64))
+                    Nothing -> fail "Empty content of Esc1"
+       Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+                    Just x  -> return (chr x)
+                    Nothing -> fail $ "Could not read: " ++ T.unpack t
+       _    -> fail "Expected an Esc1 or Esc2 token" -- should not happen
+
+bgroup :: PandocMonad m => LP m Tok
+bgroup = try $ do
+  skipMany sp
+  symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+
+egroup :: PandocMonad m => LP m Tok
+egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+
+grouped :: (PandocMonad m,  Monoid a) => LP m a -> LP m a
+grouped parser = try $ do
+  bgroup
+  -- first we check for an inner 'grouped', because
+  -- {{a,b}} should be parsed the same as {a,b}
+  try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
+
+braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
+braced' getTok n =
+  handleEgroup <|> handleBgroup <|> handleOther
+  where handleEgroup = do
+          t <- egroup
+          if n == 1
+             then return []
+             else (t:) <$> braced' getTok (n - 1)
+        handleBgroup = do
+          t <- bgroup
+          (t:) <$> braced' getTok (n + 1)
+        handleOther = do
+          t <- getTok
+          (t:) <$> braced' getTok n
+
+braced :: PandocMonad m => LP m [Tok]
+braced = bgroup *> braced' anyTok 1
+
+-- URLs require special handling, because they can contain %
+-- characters.  So we retonenize comments as we go...
+bracedUrl :: PandocMonad m => LP m [Tok]
+bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+
+-- For handling URLs, which allow literal % characters...
+retokenizeComment :: PandocMonad m => LP m ()
+retokenizeComment = (do
+  Tok pos Comment txt <- satisfyTok isCommentTok
+  let updPos (Tok pos' toktype' txt') =
+        Tok (incSourceColumn (incSourceLine pos' (sourceLine pos - 1))
+             (sourceColumn pos)) toktype' txt'
+  let newtoks = map updPos $ tokenize (sourceName pos) $ T.tail txt
+  getInput >>= setInput . ((Tok pos Symbol "%" : newtoks) ++))
+    <|> return ()
+
+bracedOrToken :: PandocMonad m => LP m [Tok]
+bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
+
+bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
+bracketed parser = try $ do
+  symbol '['
+  mconcat <$> manyTill parser (symbol ']')
+
+bracketedToks :: PandocMonad m => LP m [Tok]
+bracketedToks = do
+  symbol '['
+  mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
+
+parenWrapped :: PandocMonad m => Monoid a => LP m a -> LP m a
+parenWrapped parser = try $ do
+  symbol '('
+  mconcat <$> manyTill parser (symbol ')')
+
+dimenarg :: PandocMonad m => LP m Text
+dimenarg = try $ do
+  ch  <- option False $ True <$ symbol '='
+  Tok _ _ s <- satisfyTok isWordTok
+  guard $ T.take 2 (T.reverse s) `elem`
+           ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
+  let num = T.take (T.length s - 2) s
+  guard $ T.length num > 0
+  guard $ T.all isDigit num
+  return $ T.pack ['=' | ch] <> s
+
+ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore raw = do
+  pos <- getPosition
+  report $ SkippedContent raw pos
+  return mempty
+
+withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
+withRaw parser = do
+  inp <- getInput
+  result <- parser
+  nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
+  let raw = takeWhile (/= nxt) inp
+  return (result, raw)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index fa832114b..e3a302d49 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -31,6 +31,7 @@ Types for LaTeX tokens and macros.
 module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        , TokType(..)
                                        , Macro(..)
+                                       , ArgSpec(..)
                                        , ExpansionPoint(..)
                                        , SourcePos
                                        )
@@ -49,5 +50,8 @@ data Tok = Tok SourcePos TokType Text
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
-data Macro = Macro ExpansionPoint Int (Maybe [Tok]) [Tok]
+data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
+     deriving Show
+
+data ArgSpec = ArgNum Int | Pattern [Tok]
      deriving Show
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 156b2b622..d1ea7a1a5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE RelaxedPolyRec      #-}
 {-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
 
 {-
 Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
@@ -31,31 +32,28 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of markdown-formatted plain text to 'Pandoc' document.
 -}
-module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
+module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where
 
 import Prelude
 import Control.Monad
 import Control.Monad.Except (throwError)
+import qualified Data.ByteString.Lazy as BS
 import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
-import qualified Data.HashMap.Strict as H
 import Data.List (intercalate, sortBy, transpose, elemIndex)
 import qualified Data.Map as M
 import Data.Maybe
 import Data.Ord (comparing)
-import Data.Scientific (base10Exponent, coefficient)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.Vector as V
-import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..))
-import qualified Data.Yaml as Yaml
+import qualified Data.YAML as YAML
 import System.FilePath (addExtension, takeExtension)
 import Text.HTML.TagSoup
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad (..), report)
 import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Emoji (emojiToInline)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
@@ -234,11 +232,9 @@ pandocTitleBlock = try $ do
                    $ nullMeta
   updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
-
 yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
 yamlMetaBlock = try $ do
   guardEnabled Ext_yaml_metadata_block
-  pos <- getPosition
   string "---"
   blankline
   notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
@@ -246,52 +242,44 @@ yamlMetaBlock = try $ do
   -- by including --- and ..., we allow yaml blocks with just comments:
   let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
   optional blanklines
-  case Yaml.decodeEither' $ UTF8.fromString rawYaml of
-       Right (Yaml.Object hashmap) -> do
-         let alist = H.toList hashmap
-         mapM_ (\(k, v) ->
-             if ignorable k
-                then return ()
-                else do
-                  v' <- yamlToMeta v
-                  let k' = T.unpack k
-                  updateState $ \st -> st{ stateMeta' =
-                     do m <- stateMeta' st
-                        -- if there's already a value, leave it unchanged
-                        case lookupMeta k' m of
-                             Just _ -> return m
-                             Nothing -> do
-                               v'' <- v'
-                               return $ B.setMeta (T.unpack k) v'' m}
-           ) alist
-       Right Yaml.Null -> return ()
+  newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml
+  -- Since `<>` is left-biased, existing values are not touched:
+  updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF }
+  return mempty
+
+-- | Read a YAML string and convert it to pandoc metadata.
+-- String scalars in the YAML are parsed as Markdown.
+yamlToMeta :: PandocMonad m => BS.ByteString -> m Meta
+yamlToMeta bstr = do
+  let parser = do
+        meta <- yamlBsToMeta bstr
+        return $ runF meta defaultParserState
+  parsed <- readWithM parser def ""
+  case parsed of
+    Right result -> return result
+    Left e       -> throwError e
+
+yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta)
+yamlBsToMeta bstr = do
+  pos <- getPosition
+  case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
+       Right ((YAML.Doc (YAML.Mapping _ o)):_) -> (fmap Meta) <$> yamlMap o
+       Right [] -> return . return $ mempty
+       Right [YAML.Doc (YAML.Scalar YAML.SNull)] -> return . return $ mempty
        Right _ -> do
-                   logMessage $
-                      CouldNotParseYamlMetadata "not an object"
-                      pos
-                   return ()
+                  logMessage $
+                     CouldNotParseYamlMetadata "not an object"
+                     pos
+                  return . return $ mempty
        Left err' -> do
-                case err' of
-                   InvalidYaml (Just YamlParseException{
-                               yamlProblem = problem
-                             , yamlContext = _ctxt
-                             , yamlProblemMark = Yaml.YamlMark {
-                                   yamlLine = yline
-                                 , yamlColumn = ycol
-                             }}) ->
-                        logMessage $ CouldNotParseYamlMetadata
-                           problem (setSourceLine
-                           (setSourceColumn pos
-                              (sourceColumn pos + ycol))
-                           (sourceLine pos + 1 + yline))
-                   _ -> logMessage $ CouldNotParseYamlMetadata
-                           (show err') pos
-                return ()
-  return mempty
+                    logMessage $ CouldNotParseYamlMetadata
+                                 err' pos
+                    return . return $ mempty
 
--- ignore fields ending with _
-ignorable :: Text -> Bool
-ignorable t = (T.pack "_") `T.isSuffixOf` t
+nodeToKey :: Monad m => YAML.Node -> m Text
+nodeToKey (YAML.Scalar (YAML.SStr t))       = return t
+nodeToKey (YAML.Scalar (YAML.SUnknown _ t)) = return t
+nodeToKey _                                 = fail "Non-string key in YAML mapping"
 
 toMetaValue :: PandocMonad m
             => Text -> MarkdownParser m (F MetaValue)
@@ -312,34 +300,51 @@ toMetaValue x =
         -- not end in a newline, but a "block" set off with
         -- `|` or `>` will.
 
-yamlToMeta :: PandocMonad m
-           => Yaml.Value -> MarkdownParser m (F MetaValue)
-yamlToMeta (Yaml.String t) = toMetaValue t
-yamlToMeta (Yaml.Number n)
-  -- avoid decimal points for numbers that don't need them:
-  | base10Exponent n >= 0     = return $ return $ MetaString $ show
-                                $ coefficient n * (10 ^ base10Exponent n)
-  | otherwise                 = return $ return $ MetaString $ show n
-yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b
-yamlToMeta (Yaml.Array xs) = do
-  xs' <- mapM yamlToMeta (V.toList xs)
+checkBoolean :: Text -> Maybe Bool
+checkBoolean t =
+  if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE"
+     then Just True
+     else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE"
+             then Just False
+             else Nothing
+
+yamlToMetaValue :: PandocMonad m
+                => YAML.Node -> MarkdownParser m (F MetaValue)
+yamlToMetaValue (YAML.Scalar x) =
+  case x of
+       YAML.SStr t       -> toMetaValue t
+       YAML.SBool b      -> return $ return $ MetaBool b
+       YAML.SFloat d     -> return $ return $ MetaString (show d)
+       YAML.SInt i       -> return $ return $ MetaString (show i)
+       YAML.SUnknown _ t ->
+         case checkBoolean t of
+           Just b        -> return $ return $ MetaBool b
+           Nothing       -> toMetaValue t
+       YAML.SNull        -> return $ return $ MetaString ""
+yamlToMetaValue (YAML.Sequence _ xs) = do
+  xs' <- mapM yamlToMetaValue xs
   return $ do
     xs'' <- sequence xs'
     return $ B.toMetaValue xs''
-yamlToMeta (Yaml.Object o) = do
-  let alist = H.toList o
-  foldM (\m (k,v) ->
-           if ignorable k
-              then return m
-              else do
-                v' <- yamlToMeta v
-                return $ do
-                   MetaMap m' <- m
-                   v'' <- v'
-                   return (MetaMap $ M.insert (T.unpack k) v'' m'))
-        (return $ MetaMap M.empty)
-        alist
-yamlToMeta _ = return $ return $ MetaString ""
+yamlToMetaValue (YAML.Mapping _ o) = fmap B.toMetaValue <$> yamlMap o
+yamlToMetaValue _ = return $ return $ MetaString ""
+
+yamlMap :: PandocMonad m
+        => M.Map YAML.Node YAML.Node
+        -> MarkdownParser m (F (M.Map String MetaValue))
+yamlMap o = do
+    kvs <- forM (M.toList o) $ \(key, v) -> do
+             k <- nodeToKey key
+             return (k, v)
+    let kvs' = filter (not . ignorable . fst) kvs
+    (fmap M.fromList . sequence) <$> mapM toMeta kvs'
+  where
+    ignorable t = (T.pack "_") `T.isSuffixOf` t
+    toMeta (k, v) = do
+      fv <- yamlToMetaValue v
+      return $ do
+        v' <- fv
+        return (T.unpack k, v')
 
 stopLine :: PandocMonad m => MarkdownParser m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
@@ -966,7 +971,9 @@ orderedList = try $ do
                <|> return (style == Example)
   items <- fmap sequence $ many1 $ listItem fourSpaceRule
                  (orderedListStart (Just (style, delim)))
-  start' <- (start <$ guardEnabled Ext_startnum) <|> return 1
+  start' <- if style == Example
+               then return start
+               else (start <$ guardEnabled Ext_startnum) <|> return 1
   return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
 
 bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
@@ -1142,10 +1149,9 @@ rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
 rawTeXBlock = do
   guardEnabled Ext_raw_tex
   lookAhead $ try $ char '\\' >> letter
-  result <- (B.rawBlock "context" . trim . concat <$>
-                many1 ((++) <$> (rawConTeXtEnvironment <|> conTeXtCommand)
-                            <*> spnl'))
-          <|> (B.rawBlock "latex" . trim . concat <$>
+  result <- (B.rawBlock "tex" . trim . concat <$>
+                many1 ((++) <$> rawConTeXtEnvironment <*> spnl'))
+          <|> (B.rawBlock "tex" . trim . concat <$>
                 many1 ((++) <$> rawLaTeXBlock <*> spnl'))
   return $ case B.toList result of
                 [RawBlock _ cs]
@@ -1153,9 +1159,6 @@ rawTeXBlock = do
                 -- don't create a raw block for suppressed macro defs
                 _ -> return result
 
-conTeXtCommand :: PandocMonad m => MarkdownParser m String
-conTeXtCommand = oneOfStrings ["\\placeformula"]
-
 rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
 rawHtmlBlocks = do
   (TagOpen tagtype _, raw) <- htmlTag isBlockTag
@@ -1591,7 +1594,7 @@ code = try $ do
   starts <- many1 (char '`')
   skipSpaces
   result <- (trim . concat) <$>
-            many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+            manyTill (many1 (noneOf "`\n") <|> many1 (char '`') <|>
                        (char '\n' >> notFollowedBy' blankline >> return " "))
                       (try (skipSpaces >> count (length starts) (char '`') >>
                       notFollowedBy (char '`')))
@@ -1877,23 +1880,24 @@ bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
 bareURL = try $ do
   guardEnabled Ext_autolink_bare_uris
   getState >>= guard . stateAllowLinks
-  (orig, src) <- uri <|> emailAddress
+  (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
   notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
-  return $ return $ B.link src "" (B.str orig)
+  return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
 
 autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
 autoLink = try $ do
   getState >>= guard . stateAllowLinks
   char '<'
-  (orig, src) <- uri <|> emailAddress
+  (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
   -- in rare cases, something may remain after the uri parser
   -- is finished, because the uri parser tries to avoid parsing
   -- final punctuation.  for example:  in `<http://hi---there>`,
   -- the URI parser will stop before the dashes.
   extra <- fromEntities <$> manyTill nonspaceChar (char '>')
-  attr  <- option nullAttr $ try $
+  attr  <- option ("", [cls], []) $ try $
             guardEnabled Ext_link_attributes >> attributes
-  return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
+  return $ return $ B.linkWith attr (src ++ escapeURI extra) ""
+                     (B.str $ orig ++ extra)
 
 image :: PandocMonad m => MarkdownParser m (F Inlines)
 image = try $ do
@@ -2037,9 +2041,9 @@ emoji = try $ do
   char ':'
   emojikey <- many1 (oneOf emojiChars)
   char ':'
-  case M.lookup emojikey emojis of
-       Just s  -> return (return (B.str s))
-       Nothing -> mzero
+  case emojiToInline emojikey of
+    Just i -> return (return $ B.singleton i)
+    Nothing -> mzero
 
 -- Citations
 
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index fe6b3698c..134598c07 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -35,34 +35,32 @@ TODO:
 - Page breaks (five "*")
 - Org tables
 - table.el tables
-- Images with attributes (floating and width)
 - <cite> tag
 -}
 module Text.Pandoc.Readers.Muse (readMuse) where
 
 import Prelude
 import Control.Monad
+import Control.Monad.Reader
 import Control.Monad.Except (throwError)
 import Data.Bifunctor
-import Data.Char (isLetter)
+import Data.Char (isAlphaNum)
 import Data.Default
-import Data.List (stripPrefix, intercalate)
+import Data.List (intercalate)
 import Data.List.Split (splitOn)
 import qualified Data.Map as M
 import qualified Data.Set as Set
-import Data.Maybe (fromMaybe, isNothing)
+import Data.Maybe (fromMaybe, isNothing, maybeToList)
 import Data.Text (Text, unpack)
-import System.FilePath (takeExtension)
-import Text.HTML.TagSoup
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad (..))
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Readers.HTML (htmlTag)
-import Text.Pandoc.Shared (crFilter, underlineSpan)
+import Text.Pandoc.Parsing hiding (F, enclosed)
+import Text.Pandoc.Shared (crFilter, underlineSpan, mapLeft)
 
 -- | Read Muse from an input string and return a Pandoc document.
 readMuse :: PandocMonad m
@@ -70,7 +68,8 @@ readMuse :: PandocMonad m
          -> Text
          -> m Pandoc
 readMuse opts s = do
-  res <- readWithM parseMuse def{ museOptions = opts } (unpack (crFilter s))
+  let input = crFilter s
+  res <- mapLeft (PandocParsecError $ unpack input) `liftM` runReaderT (runParserT parseMuse def{ museOptions = opts } "source" input) def
   case res of
        Left e  -> throwError e
        Right d -> return d
@@ -84,7 +83,6 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
                            , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
                            , museLogMessages :: [LogMessage]
                            , museNotes :: M.Map String (SourcePos, F Blocks)
-                           , museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
                            , museInPara :: Bool -- ^ True when looking for a paragraph terminator
                            }
 
@@ -96,11 +94,17 @@ instance Default MuseState where
                   , museLastStrPos = Nothing
                   , museLogMessages = []
                   , museNotes = M.empty
-                  , museInLink = False
                   , museInPara = False
                   }
 
-type MuseParser = ParserT String MuseState
+data MuseEnv =
+  MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
+          }
+
+instance Default MuseEnv where
+  def = MuseEnv { museInLink = False }
+
+type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
 
 instance HasReaderOptions MuseState where
   extractReaderOptions = museOptions
@@ -125,11 +129,9 @@ instance HasLogMessages MuseState where
 parseMuse :: PandocMonad m => MuseParser m Pandoc
 parseMuse = do
   many directive
-  blocks <- parseBlocks
+  blocks <- (:) <$> parseBlocks <*> many parseSection
   st <- getState
-  let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
-                     meta <- museMeta st
-                     return $ Pandoc meta bs) st
+  let doc = runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st
   reportLogMessages
   return doc
 
@@ -144,9 +146,8 @@ commonPrefix (x:xs) (y:ys)
 
 -- | Trim up to one newline from the beginning of the string.
 lchop :: String -> String
-lchop s = case s of
-                    '\n':ss -> ss
-                    _       -> s
+lchop ('\n':xs) = xs
+lchop s = s
 
 -- | Trim up to one newline from the end of the string.
 rchop :: String -> String
@@ -165,12 +166,19 @@ atStart p = do
   guard $ museLastStrPos st /= Just pos
   p
 
+firstColumn :: PandocMonad m => MuseParser m ()
+firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
+
 -- * Parsers
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
 eol :: Stream s m Char => ParserT s st m ()
 eol = void newline <|> eof
 
+getIndent :: PandocMonad m
+          => MuseParser m Int
+getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition
+
 someUntil :: (Stream s m t)
           => ParserT s u m a
           -> ParserT s u m b
@@ -179,28 +187,21 @@ someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end
 
 -- ** HTML parsers
 
--- | Parse HTML tag, returning its attributes and literal contents.
-htmlElement :: PandocMonad m
-            => String -- ^ Tag name
-            -> MuseParser m (Attr, String)
-htmlElement tag = try $ do
-  (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
-  content <- manyTill anyChar endtag
-  return (htmlAttrToPandoc attr, content)
+openTag :: PandocMonad m => String -> MuseParser m [(String, String)]
+openTag tag = try $
+  char '<' *> string tag *> manyTill attr (char '>')
   where
-    endtag = void $ htmlTag (~== TagClose tag)
+    attr = try $ (,)
+      <$  many1 spaceChar
+      <*> many1 (noneOf "=\n")
+      <*  string "=\""
+      <*> manyTill (noneOf "\"") (char '"')
 
-htmlBlock :: PandocMonad m
-          => String -- ^ Tag name
-          -> MuseParser m (Attr, String)
-htmlBlock tag = try $ do
-  many spaceChar
-  res <- htmlElement tag
-  manyTill spaceChar eol
-  return res
+closeTag :: PandocMonad m => String -> MuseParser m ()
+closeTag tag = try $ string "</" *> string tag *> void (char '>')
 
 -- | Convert HTML attributes to Pandoc 'Attr'
-htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc :: [(String, String)] -> Attr
 htmlAttrToPandoc attrs = (ident, classes, keyvals)
   where
     ident   = fromMaybe "" $ lookup "id" attrs
@@ -211,15 +212,12 @@ parseHtmlContent :: PandocMonad m
                  => String -- ^ Tag name
                  -> MuseParser m (Attr, F Blocks)
 parseHtmlContent tag = try $ do
-  many spaceChar
-  pos <- getPosition
-  (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+  indent <- getIndent
+  attr <- openTag tag
   manyTill spaceChar eol
-  content <- parseBlocksTill $ try $ count (sourceColumn pos - 1) spaceChar >> endtag
+  content <- parseBlocksTill $ try $ count indent spaceChar *> closeTag tag
   manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline
   return (htmlAttrToPandoc attr, content)
-  where
-    endtag = void $ htmlTag (~== TagClose tag)
 
 -- ** Directive parsers
 
@@ -228,21 +226,19 @@ parseDirectiveKey :: PandocMonad m => MuseParser m String
 parseDirectiveKey = char '#' *> many (letter <|> char '-')
 
 parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines)
-parseEmacsDirective = do
-  key <- parseDirectiveKey
-  spaceChar
-  value <- trimInlinesF . mconcat <$> manyTill (choice inlineList) eol
-  return (key, value)
+parseEmacsDirective = (,)
+  <$> parseDirectiveKey
+  <*  spaceChar
+  <*> (trimInlinesF . mconcat <$> manyTill inline' eol)
 
 parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
-parseAmuseDirective = do
-  key <- parseDirectiveKey
-  many1 spaceChar
-  value <- trimInlinesF . mconcat <$> many1Till inline endOfDirective
-  many blankline
-  return (key, value)
+parseAmuseDirective = (,)
+  <$> parseDirectiveKey
+  <*  many1 spaceChar
+  <*> (trimInlinesF . mconcat <$> many1Till inline endOfDirective)
+  <*  many blankline
   where
-    endOfDirective = lookAhead $ eof <|> try (newline >> (void blankline <|> void parseDirectiveKey))
+    endOfDirective = lookAhead $ eof <|> try (newline *> (void blankline <|> void parseDirectiveKey))
 
 directive :: PandocMonad m => MuseParser m ()
 directive = do
@@ -254,17 +250,20 @@ directive = do
 
 -- ** Block parsers
 
+-- | Parse section contents until EOF or next header
 parseBlocks :: PandocMonad m
             => MuseParser m (F Blocks)
 parseBlocks =
   try (parseEnd <|>
+       nextSection <|>
        blockStart <|>
        listStart <|>
        paraStart)
   where
+    nextSection = mempty <$ lookAhead headingStart
     parseEnd = mempty <$ eof
-    blockStart = ((B.<>) <$> (emacsHeading <|> blockElements <|> emacsNoteBlock)
-                         <*> parseBlocks) <|> (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks)
+    blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock)
+                        <*> parseBlocks
     listStart = do
       updateState (\st -> st { museInPara = False })
       uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks)
@@ -273,6 +272,13 @@ parseBlocks =
       uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks
       where p indent = if indent >= 2 && indent < 6 then fmap B.blockQuote else id
 
+-- | Parse section that starts with a header
+parseSection :: PandocMonad m
+             => MuseParser m (F Blocks)
+parseSection =
+  ((B.<>) <$> emacsHeading <*> parseBlocks) <|>
+  (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks)
+
 parseBlocksTill :: PandocMonad m
                 => MuseParser m a
                 -> MuseParser m (F Blocks)
@@ -347,31 +353,32 @@ blockElements = do
 
 -- | Parse a line comment, starting with @;@ in the first column.
 comment :: PandocMonad m => MuseParser m (F Blocks)
-comment = try $ do
-  getPosition >>= \pos -> guard (sourceColumn pos == 1)
-  char ';'
-  optional (spaceChar >> many (noneOf "\n"))
-  eol
-  return mempty
+comment = try $ mempty
+  <$ firstColumn
+  <* char ';'
+  <* optional (spaceChar *> many (noneOf "\n"))
+  <* eol
 
 -- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters.
 separator :: PandocMonad m => MuseParser m (F Blocks)
-separator = try $ do
-  string "----"
-  many $ char '-'
-  many spaceChar
-  eol
-  return $ return B.horizontalRule
+separator = try $ pure B.horizontalRule
+  <$ string "----"
+  <* many (char '-')
+  <* many spaceChar
+  <* eol
+
+headingStart :: PandocMonad m => MuseParser m (String, Int)
+headingStart = try $ (,)
+  <$> option "" (try (parseAnchor <* manyTill spaceChar eol))
+  <*  firstColumn
+  <*> fmap length (many1 $ char '*')
+  <*  spaceChar
 
 -- | Parse a single-line heading.
 emacsHeading :: PandocMonad m => MuseParser m (F Blocks)
 emacsHeading = try $ do
   guardDisabled Ext_amuse
-  anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
-  getPosition >>= \pos -> guard (sourceColumn pos == 1)
-  level <- fmap length $ many1 $ char '*'
-  guard $ level <= 5
-  spaceChar
+  (anchorId, level) <- headingStart
   content <- trimInlinesF . mconcat <$> manyTill inline eol
   attr <- registerHeader (anchorId, [], []) (runF content def)
   return $ B.headerWith attr level <$> content
@@ -383,11 +390,7 @@ amuseHeadingUntil :: PandocMonad m
                   -> MuseParser m (F Blocks, a)
 amuseHeadingUntil end = try $ do
   guardEnabled Ext_amuse
-  anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
-  getPosition >>= \pos -> guard (sourceColumn pos == 1)
-  level <- fmap length $ many1 $ char '*'
-  guard $ level <= 5
-  spaceChar
+  (anchorId, level) <- headingStart
   (content, e) <- paraContentsUntil end
   attr <- registerHeader (anchorId, [], []) (runF content def)
   return (B.headerWith attr level <$> content, e)
@@ -395,33 +398,28 @@ amuseHeadingUntil end = try $ do
 -- | Parse an example between @{{{@ and @}}}@.
 -- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation.
 example :: PandocMonad m => MuseParser m (F Blocks)
-example = try $ do
-  string "{{{"
-  optional blankline
-  contents <- manyTill anyChar $ try (optional blankline >> string "}}}")
-  return $ return $ B.codeBlock contents
+example = try $ pure . B.codeBlock
+  <$  string "{{{"
+  <*  optional blankline
+  <*> manyTill anyChar (try (optional blankline *> string "}}}"))
 
 -- | Parse an @\<example>@ tag.
 exampleTag :: PandocMonad m => MuseParser m (F Blocks)
-exampleTag = try $ do
-  (attr, contents) <- htmlBlock "example"
-  return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents
+exampleTag = try $ fmap pure $ B.codeBlockWith
+  <$  many spaceChar
+  <*> (htmlAttrToPandoc <$> openTag "example")
+  <*> (rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop <$> manyTill anyChar (closeTag "example"))
+  <*  manyTill spaceChar eol
 
 -- | Parse a @\<literal>@ tag as a raw block.
 -- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'.
 literalTag :: PandocMonad m => MuseParser m (F Blocks)
-literalTag = try $ do
-  many spaceChar
-  (TagOpen _ attr, _) <- htmlTag (~== TagOpen "literal" [])
-  manyTill spaceChar eol
-  content <- manyTill anyChar endtag
-  manyTill spaceChar eol
-  return $ return $ rawBlock (htmlAttrToPandoc attr, content)
-  where
-    endtag = void $ htmlTag (~== TagClose "literal")
-    -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
-    format (_, _, kvs)        = fromMaybe "html" $ lookup "style" kvs
-    rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content
+literalTag = try $ fmap pure $ B.rawBlock
+  <$  many spaceChar
+  <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
+  <*  manyTill spaceChar eol
+  <*> (rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop <$> manyTill anyChar (closeTag "literal"))
+  <*  manyTill spaceChar eol
 
 -- | Parse @\<center>@ tag.
 -- Currently it is ignored as Pandoc cannot represent centered blocks.
@@ -459,25 +457,27 @@ playTag = do
   fmap (B.divWith ("", ["play"], [])) . snd <$> parseHtmlContent "play"
 
 verseLine :: PandocMonad m => MuseParser m (F Inlines)
-verseLine = do
-  indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty
-  rest <- manyTill (choice inlineList) newline
-  return $ trimInlinesF $ mconcat (pure indent : rest)
-
-verseLines :: PandocMonad m => MuseParser m (F Blocks)
-verseLines = do
-  lns <- many verseLine
-  return $ B.lineBlock <$> sequence lns
+verseLine = (<>)
+  <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' ')))
+  <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol)
 
 -- | Parse @\<verse>@ tag.
 verseTag :: PandocMonad m => MuseParser m (F Blocks)
-verseTag = do
-  (_, content) <- htmlBlock "verse"
-  parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content)
+verseTag = try $ do
+  indent <- getIndent
+  openTag "verse"
+  manyTill spaceChar eol
+  content <- sequence <$> manyTill (count indent spaceChar *> verseLine) (try $ count indent spaceChar *> closeTag "verse")
+  manyTill spaceChar eol
+  return $ B.lineBlock <$> content
 
 -- | Parse @\<comment>@ tag.
 commentTag :: PandocMonad m => MuseParser m (F Blocks)
-commentTag = htmlBlock "comment" >> return mempty
+commentTag = try $ mempty
+  <$ many spaceChar
+  <* openTag "comment"
+  <* manyTill anyChar (closeTag "comment")
+  <* manyTill spaceChar eol
 
 -- | Parse paragraph contents.
 paraContentsUntil :: PandocMonad m
@@ -485,7 +485,7 @@ paraContentsUntil :: PandocMonad m
                   -> MuseParser m (F Inlines, a)
 paraContentsUntil end = do
   updateState (\st -> st { museInPara = True })
-  (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end)
+  (l, e) <- someUntil inline $ try (manyTill spaceChar eol *> end)
   updateState (\st -> st { museInPara = False })
   return (trimInlinesF $ mconcat l, e)
 
@@ -499,9 +499,10 @@ paraUntil end = do
   first (fmap B.para) <$> paraContentsUntil end
 
 noteMarker :: PandocMonad m => MuseParser m String
-noteMarker = try $ do
-  char '['
-  (:) <$> oneOf "123456789" <*> manyTill digit (char ']')
+noteMarker = try $ (:)
+  <$  char '['
+  <*> oneOf "123456789"
+  <*> manyTill digit (char ']')
 
 -- Amusewiki version of note
 -- Parsing is similar to list item, except that note marker is used instead of list marker
@@ -541,27 +542,15 @@ emacsNoteBlock = try $ do
 -- Verse markup
 --
 
-lineVerseLine :: PandocMonad m => MuseParser m (F Inlines)
-lineVerseLine = try $ do
-  string "> "
-  indent <- many (char ' ' >> pure '\160')
-  let indentEl = if null indent then mempty else B.str indent
-  rest <- manyTill (choice inlineList) eol
-  return $ trimInlinesF $ mconcat (pure indentEl : rest)
-
-blanklineVerseLine :: PandocMonad m => MuseParser m (F Inlines)
-blanklineVerseLine = try $ do
-  char '>'
-  blankline
-  pure mempty
-
 -- | Parse a line block indicated by @\'>\'@ characters.
 lineBlock :: PandocMonad m => MuseParser m (F Blocks)
 lineBlock = try $ do
-  many spaceChar
-  col <- sourceColumn <$> getPosition
-  lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1))
+  indent <- getIndent
+  lns <- (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent)
   return $ B.lineBlock <$> sequence lns
+  where
+    blankVerseLine = try $ mempty <$ char '>' <* blankline
+    nonblankVerseLine = try (string "> ") *> verseLine
 
 -- *** List parsers
 
@@ -573,7 +562,7 @@ bulletListItemsUntil indent end = try $ do
   char '-'
   void spaceChar <|> lookAhead eol
   updateState (\st -> st { museInPara = False })
-  (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (([],) <$> end)
+  (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end)
   return (x:xs, e)
 
 -- | Parse a bullet list.
@@ -581,19 +570,9 @@ bulletListUntil :: PandocMonad m
                 => MuseParser m a
                 -> MuseParser m (F Blocks, a)
 bulletListUntil end = try $ do
-  many spaceChar
-  pos <- getPosition
-  let indent = sourceColumn pos - 1
+  indent <- getIndent
   guard $ indent /= 0
-  (items, e) <- bulletListItemsUntil indent end
-  return (B.bulletList <$> sequence items, e)
-
--- | Parses an ordered list marker and returns list attributes.
-anyMuseOrderedListMarker :: PandocMonad m => MuseParser m ListAttributes
-anyMuseOrderedListMarker = do
-  (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha
-  char '.'
-  return (start, style, Period)
+  first (fmap B.bulletList . sequence) <$> bulletListItemsUntil indent end
 
 museOrderedListMarker :: PandocMonad m
                       => ListNumberStyle
@@ -620,7 +599,7 @@ orderedListItemsUntil indent style end =
       pos <- getPosition
       void spaceChar <|> lookAhead eol
       updateState (\st -> st { museInPara = False })
-      (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (([],) <$> end)
+      (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end)
       return (x:xs, e)
 
 -- | Parse an ordered list.
@@ -628,14 +607,12 @@ orderedListUntil :: PandocMonad m
                  => MuseParser m a
                  -> MuseParser m (F Blocks, a)
 orderedListUntil end = try $ do
-  many spaceChar
-  pos <- getPosition
-  let indent = sourceColumn pos - 1
+  indent <- getIndent
   guard $ indent /= 0
-  p@(_, style, _) <- anyMuseOrderedListMarker
-  guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman]
-  (items, e) <- orderedListItemsUntil indent style end
-  return (B.orderedListWith p <$> sequence items, e)
+  (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha
+  char '.'
+  first (fmap (B.orderedListWith (start, style, Period)) . sequence)
+    <$> orderedListItemsUntil indent style end
 
 descriptionsUntil :: PandocMonad m
                   => Int
@@ -644,7 +621,7 @@ descriptionsUntil :: PandocMonad m
 descriptionsUntil indent end = do
   void spaceChar <|> lookAhead eol
   updateState (\st -> st { museInPara = False })
-  (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (([],) <$> end)
+  (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end)
   return (x:xs, e)
 
 definitionListItemsUntil :: PandocMonad m
@@ -656,8 +633,8 @@ definitionListItemsUntil indent end =
   where
     continuation = try $ do
       pos <- getPosition
-      term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::")
-      (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> continuation) <|> (([],) <$> end))
+      term <- trimInlinesF . mconcat <$> manyTill inline' (try $ string "::")
+      (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> continuation) <|> (([],) <$> end))
       let xx = (,) <$> term <*> sequence x
       return (xx:xs, e)
 
@@ -666,9 +643,7 @@ definitionListUntil :: PandocMonad m
                     => MuseParser m a -- ^ Terminator parser
                     -> MuseParser m (F Blocks, a)
 definitionListUntil end = try $ do
-  many spaceChar
-  pos <- getPosition
-  let indent = sourceColumn pos - 1
+  indent <- getIndent
   guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse
   first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end
 
@@ -713,7 +688,7 @@ museAppendElement element tbl =
 
 tableCell :: PandocMonad m => MuseParser m (F Blocks)
 tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
-  where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol
+  where cellEnd = try $ void (many1 spaceChar *> char '|') <|> eol
 
 tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement])
 tableElements = sequence <$> (tableParseElement `sepEndBy1` eol)
@@ -735,11 +710,10 @@ tableParseElement = tableParseHeader
 tableParseRow :: PandocMonad m
               => Int -- ^ Number of separator characters
               -> MuseParser m (F [Blocks])
-tableParseRow n = try $ do
-  fields <- tableCell `sepBy2` fieldSep
-  return $ sequence fields
-    where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
-          fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
+tableParseRow n = try $
+  sequence <$> (tableCell `sepBy2` fieldSep)
+    where p `sepBy2` sep = (:) <$> p <*> many1 (sep *> p)
+          fieldSep = many1 spaceChar *> count n (char '|') *> (void (many1 spaceChar) <|> void (lookAhead newline))
 
 -- | Parse a table header row.
 tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement)
@@ -755,53 +729,51 @@ tableParseFooter = fmap MuseFooterRow <$> tableParseRow 3
 
 -- | Parse table caption.
 tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement)
-tableParseCaption = try $ do
-  many spaceChar
-  string "|+"
-  fmap MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|"))
+tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat
+  <$  many spaceChar
+  <*  string "|+"
+  <*> many1Till inline (try $ string "+|")
 
 -- ** Inline parsers
 
-inlineList :: PandocMonad m => [MuseParser m (F Inlines)]
-inlineList = [ whitespace
-             , br
-             , anchor
-             , footnote
-             , strong
-             , strongTag
-             , emph
-             , emphTag
-             , underlined
-             , superscriptTag
-             , subscriptTag
-             , strikeoutTag
-             , verbatimTag
-             , classTag
-             , nbsp
-             , link
-             , code
-             , codeTag
-             , mathTag
-             , inlineLiteralTag
-             , str
-             , symbol
-             ]
+inline' :: PandocMonad m => MuseParser m (F Inlines)
+inline' = whitespace
+      <|> br
+      <|> anchor
+      <|> footnote
+      <|> strong
+      <|> strongTag
+      <|> emph
+      <|> emphTag
+      <|> underlined
+      <|> superscriptTag
+      <|> subscriptTag
+      <|> strikeoutTag
+      <|> verbatimTag
+      <|> classTag
+      <|> nbsp
+      <|> linkOrImage
+      <|> code
+      <|> codeTag
+      <|> mathTag
+      <|> inlineLiteralTag
+      <|> str
+      <|> symbol
+      <?> "inline"
 
 inline :: PandocMonad m => MuseParser m (F Inlines)
-inline = endline <|> choice inlineList <?> "inline"
+inline = endline <|> inline'
 
 -- | Parse a soft break.
 endline :: PandocMonad m => MuseParser m (F Inlines)
-endline = try $ do
-  newline
-  notFollowedBy blankline
-  return $ return B.softbreak
+endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline
 
 parseAnchor :: PandocMonad m => MuseParser m String
-parseAnchor = try $ do
-  getPosition >>= \pos -> guard (sourceColumn pos == 1)
-  char '#'
-  (:) <$> letter <*> many (letter <|> digit <|> char '-')
+parseAnchor = try $ (:)
+  <$  firstColumn
+  <*  char '#'
+  <*> letter
+  <*> many (letter <|> digit <|> char '-')
 
 anchor :: PandocMonad m => MuseParser m (F Inlines)
 anchor = try $ do
@@ -812,7 +784,7 @@ anchor = try $ do
 -- | Parse a footnote reference.
 footnote :: PandocMonad m => MuseParser m (F Inlines)
 footnote = try $ do
-  inLink <- museInLink <$> getState
+  inLink <- asks museInLink
   guard $ not inLink
   ref <- noteMarker
   return $ do
@@ -825,33 +797,38 @@ footnote = try $ do
         return $ B.note contents'
 
 whitespace :: PandocMonad m => MuseParser m (F Inlines)
-whitespace = try $ do
-  skipMany1 spaceChar
-  return $ return B.space
+whitespace = try $ pure B.space <$ skipMany1 spaceChar
 
 -- | Parse @\<br>@ tag.
 br :: PandocMonad m => MuseParser m (F Inlines)
-br = try $ do
-  string "<br>"
-  return $ return B.linebreak
+br = try $ pure B.linebreak <$ string "<br>"
 
 emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines)
 emphasisBetween c = try $ enclosedInlines c c
 
+-- | Parses material enclosed between start and end parsers.
+enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser
+         -> ParserT s st m end  -- ^ end parser
+         -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
+         -> ParserT s st m [a]
+enclosed start end parser = try $
+  start *> notFollowedBy spaceChar *> many1Till parser end
+
 enclosedInlines :: (PandocMonad m, Show a, Show b)
                 => MuseParser m a
                 -> MuseParser m b
                 -> MuseParser m (F Inlines)
-enclosedInlines start end = try $
-  trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter))
+enclosedInlines start end = try $ trimInlinesF . mconcat
+  <$> enclosed (atStart start) end inline
+  <*  notFollowedBy (satisfy isAlphaNum)
 
 -- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
 inlineTag :: PandocMonad m
           => String -- ^ Tag name
           -> MuseParser m (F Inlines)
-inlineTag tag = try $ do
-  htmlTag (~== TagOpen tag [])
-  mconcat <$> manyTill inline (void $ htmlTag (~== TagClose tag))
+inlineTag tag = try $ mconcat
+  <$  openTag tag
+  <*> manyTill inline (closeTag tag)
 
 -- | Parse strong inline markup, indicated by @**@.
 strong :: PandocMonad m => MuseParser m (F Inlines)
@@ -864,9 +841,9 @@ emph = fmap B.emph <$> emphasisBetween (char '*')
 -- | Parse underline inline markup, indicated by @_@.
 -- Supported only in Emacs Muse mode, not Text::Amuse.
 underlined :: PandocMonad m => MuseParser m (F Inlines)
-underlined = do
-  guardDisabled Ext_amuse -- Supported only by Emacs Muse
-  fmap underlineSpan <$> emphasisBetween (char '_')
+underlined = fmap underlineSpan
+  <$  guardDisabled Ext_amuse -- Supported only by Emacs Muse
+  <*> emphasisBetween (char '_')
 
 -- | Parse @\<strong>@ tag.
 strongTag :: PandocMonad m => MuseParser m (F Inlines)
@@ -890,21 +867,20 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del"
 
 -- | Parse @\<verbatim>@ tag.
 verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
-verbatimTag = return . B.text . snd <$> htmlElement "verbatim"
+verbatimTag = return . B.text
+  <$  openTag "verbatim"
+  <*> manyTill anyChar (closeTag "verbatim")
 
 -- | Parse @\<class>@ tag.
 classTag :: PandocMonad m => MuseParser m (F Inlines)
 classTag = do
-  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "class" [])
-  res <- manyTill inline (void $ htmlTag (~== TagClose "class"))
-  let classes = maybe [] words $ lookup "name" attrs
+  classes <- maybe [] words . lookup "name" <$> openTag "class"
+  res <- manyTill inline $ closeTag "class"
   return $ B.spanWith ("", classes, []) <$> mconcat res
 
 -- | Parse "~~" as nonbreaking space.
 nbsp :: PandocMonad m => MuseParser m (F Inlines)
-nbsp = try $ do
-  string "~~"
-  return $ return $ B.str "\160"
+nbsp = try $ pure (B.str "\160") <$ string "~~"
 
 -- | Parse code markup, indicated by @\'=\'@ characters.
 code :: PandocMonad m => MuseParser m (F Inlines)
@@ -914,26 +890,27 @@ code = try $ do
   guard $ not $ null contents
   guard $ head contents `notElem` " \t\n"
   guard $ last contents `notElem` " \t\n"
-  notFollowedBy $ satisfy isLetter
+  notFollowedBy $ satisfy isAlphaNum
   return $ return $ B.code contents
 
 -- | Parse @\<code>@ tag.
 codeTag :: PandocMonad m => MuseParser m (F Inlines)
-codeTag = return . uncurry B.codeWith <$> htmlElement "code"
+codeTag = fmap pure $ B.codeWith
+  <$> (htmlAttrToPandoc <$> openTag "code")
+  <*> manyTill anyChar (closeTag "code")
 
 -- | Parse @\<math>@ tag.
 -- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
 mathTag :: PandocMonad m => MuseParser m (F Inlines)
-mathTag = return . B.math . snd <$> htmlElement "math"
+mathTag = return . B.math
+  <$  openTag "math"
+  <*> manyTill anyChar (closeTag "math")
 
 -- | Parse inline @\<literal>@ tag as a raw inline.
 inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
-inlineLiteralTag =
-  (return . rawInline) <$> htmlElement "literal"
-  where
-    -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
-    format (_, _, kvs)        = fromMaybe "html" $ lookup "style" kvs
-    rawInline (attrs, content) = B.rawInline (format attrs) content
+inlineLiteralTag = try $ fmap pure $ B.rawInline
+  <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
+  <*> manyTill anyChar (closeTag "literal")
 
 str :: PandocMonad m => MuseParser m (F Inlines)
 str = return . B.str <$> many1 alphaNum <* updateLastStrPos
@@ -942,29 +919,58 @@ symbol :: PandocMonad m => MuseParser m (F Inlines)
 symbol = return . B.str <$> count 1 nonspaceChar
 
 -- | Parse a link or image.
-link :: PandocMonad m => MuseParser m (F Inlines)
-link = try $ do
-  st <- getState
-  guard $ not $ museInLink st
-  setState $ st{ museInLink = True }
-  (url, content) <- linkText
-  updateState (\state -> state { museInLink = False })
-  return $ case stripPrefix "URL:" url of
-             Nothing -> if isImageUrl url
-                          then B.image url "" <$> fromMaybe (return mempty) content
-                          else B.link url "" <$> fromMaybe (return $ B.str url) content
-             Just url' -> B.link url' "" <$> fromMaybe (return $ B.str url') content
-    where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
-          imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
-          isImageUrl = (`elem` imageExtensions) . takeExtension
+linkOrImage :: PandocMonad m => MuseParser m (F Inlines)
+linkOrImage = try $ do
+  inLink <- asks museInLink
+  guard $ not inLink
+  local (\s -> s { museInLink = True }) (explicitLink <|> image <|> link)
 
 linkContent :: PandocMonad m => MuseParser m (F Inlines)
-linkContent = char '[' >> trimInlinesF . mconcat <$> manyTill inline (string "]")
+linkContent = trimInlinesF . mconcat
+  <$  char '['
+  <*> manyTill inline (char ']')
+
+-- | Parse a link starting with @URL:@
+explicitLink :: PandocMonad m => MuseParser m (F Inlines)
+explicitLink = try $ do
+  string "[[URL:"
+  url <- manyTill anyChar $ char ']'
+  content <- option (pure $ B.str url) linkContent
+  char ']'
+  return $ B.link url "" <$> content
 
-linkText :: PandocMonad m => MuseParser m (String, Maybe (F Inlines))
-linkText = do
+image :: PandocMonad m => MuseParser m (F Inlines)
+image = try $ do
+  string "[["
+  (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']')
+  content <- option mempty linkContent
+  char ']'
+  let widthAttr = case align of
+                    Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")]
+                    _ -> maybeToList (("width",) . (++ "%") <$> width)
+  let alignClass = case align of
+                     Just 'r' -> ["align-right"]
+                     Just 'l' -> ["align-left"]
+                     Just 'f' -> []
+                     _        -> []
+  return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content
+  where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
+        imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
+        imageExtension = choice (try . string <$> imageExtensions)
+        imageExtensionAndOptions = do
+          ext <- imageExtension
+          (width, align) <- option (Nothing, Nothing) imageAttrs
+          return (ext, width, align)
+        imageAttrs = (,)
+          <$  many1 spaceChar
+          <*> optionMaybe (many1 digit)
+          <*  many spaceChar
+          <*> optionMaybe (oneOf "rlf")
+
+link :: PandocMonad m => MuseParser m (F Inlines)
+link = try $ do
   string "[["
   url <- manyTill anyChar $ char ']'
   content <- optionMaybe linkContent
   char ']'
-  return (url, content)
+  return $ B.link url "" <$> fromMaybe (return $ B.str url) content
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
index d3db3a9e2..9e8221248 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
@@ -100,7 +100,7 @@ liftA  fun a = a >>^ fun
 -- | Duplicate a value to subsequently feed it into different arrows.
 -- Can almost always be replaced with '(&&&)', 'keepingTheValue',
 -- or even '(|||)'.
--- Aequivalent to
+-- Equivalent to
 -- > returnA &&& returnA
 duplicate :: (Arrow a) => a b (b,b)
 duplicate = arr $ join (,)
@@ -114,7 +114,7 @@ infixr 2 >>%
 
 
 -- | Duplicate a value and apply an arrow to the second instance.
--- Aequivalent to
+-- Equivalent to
 -- > \a -> duplicate >>> second a
 -- or
 -- > \a -> returnA &&& a
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 6d96897aa..e76bbf5cf 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -50,7 +50,7 @@ class (Eq nsID, Ord nsID) => NameSpaceID nsID where
   getNamespaceID   :: NameSpaceIRI
                       -> NameSpaceIRIs nsID
                       -> Maybe (NameSpaceIRIs nsID, nsID)
-  -- | Given a namespace id, lookup its IRI. May be overriden for performance.
+  -- | Given a namespace id, lookup its IRI. May be overridden for performance.
   getIRI           :: nsID
                       -> NameSpaceIRIs nsID
                       -> Maybe NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 616d9290b..45c6cd58c 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -61,7 +61,7 @@ import qualified Data.Foldable as F (Foldable, foldr)
 import Data.Maybe
 
 
--- | Aequivalent to
+-- | Equivalent to
 -- > foldr (.) id
 -- where '(.)' are 'id' are the ones from "Control.Category"
 -- and 'foldr' is the one from "Data.Foldable".
@@ -72,7 +72,7 @@ import Data.Maybe
 composition        :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
 composition        = F.foldr (<<<) Cat.id
 
--- | Aequivalent to
+-- | Equivalent to
 -- > foldr (flip (.)) id
 -- where '(.)' are 'id' are the ones from "Control.Category"
 -- and 'foldr' is the one from "Data.Foldable".
@@ -133,9 +133,7 @@ class Lookupable a where
 -- can be used directly in almost any case.
 readLookupables :: (Lookupable a) => String -> [(a,String)]
 readLookupables s = [ (a,rest) | (word,rest) <- lex s,
-                                 let result = lookup word lookupTable,
-                                 isJust result,
-                                 let Just a = result
+                                 a <- maybeToList (lookup word lookupTable)
                     ]
 
 -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 81392e16b..2327ea908 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -261,7 +261,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA
 -- The resulting converter even behaves like an identity converter on the
 -- value level.
 --
--- Aequivalent to
+-- Equivalent to
 --
 -- > \v x a -> convertingExtraState v (returnV x >>> a)
 --
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index e0444559b..6a1682829 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -138,7 +138,7 @@ fontPitchReader = executeIn NsOffice "font-face-decls" (
                               lookupDefaultingAttr NsStyle "font-pitch"
                             ))
                     >>?^ ( M.fromList . foldl accumLegalPitches [] )
-                  )
+                  ) `ifFailedDo` (returnV (Right M.empty))
   where accumLegalPitches ls (Nothing,_) = ls
         accumLegalPitches ls (Just n,p)  = (n,p):ls
 
@@ -342,7 +342,7 @@ instance Read XslUnit where
   readsPrec _  _   = []
 
 -- | Rough conversion of measures into millimetres.
--- Pixels and em's are actually implementation dependant/relative measures,
+-- Pixels and em's are actually implementation dependent/relative measures,
 -- so I could not really easily calculate anything exact here even if I wanted.
 -- But I do not care about exactness right now, as I only use measures
 -- to determine if a paragraph is "indented" or not.
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 888cd9307..1c52c3477 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -40,7 +40,7 @@ import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
 import Text.Pandoc.Readers.Org.ParserState
 import Text.Pandoc.Readers.Org.Parsing
 import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
-                                       originalLang, translateLang)
+                                       originalLang, translateLang, exportsCode)
 
 import Text.Pandoc.Builder (Blocks, Inlines)
 import Text.Pandoc.Class (PandocMonad)
@@ -314,9 +314,6 @@ codeBlock blockAttrs blockType = do
    labelledBlock :: F Inlines -> F Blocks
    labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
 
-   exportsCode :: [(String, String)] -> Bool
-   exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
-
    exportsResults :: [(String, String)] -> Bool
    exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
 
@@ -743,7 +740,7 @@ latexEnd envName = try $
 
 
 --
--- Footnote defintions
+-- Footnote definitions
 --
 noteBlock :: PandocMonad m => OrgParser m (F Blocks)
 noteBlock = try $ do
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index c9465581a..7d55892fe 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-
 Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 
@@ -17,8 +16,7 @@ along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 -}
 {-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards  #-}
-{-# LANGUAGE ViewPatterns     #-}
+{-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.DocumentTree
    Copyright   : Copyright (C) 2014-2018 Albert Krewinkel
@@ -45,7 +43,7 @@ import Text.Pandoc.Readers.Org.BlockStarts
 import Text.Pandoc.Readers.Org.ParserState
 import Text.Pandoc.Readers.Org.Parsing
 
-import qualified Data.Map as Map
+import qualified Data.Set as Set
 import qualified Text.Pandoc.Builder as B
 
 --
@@ -60,7 +58,7 @@ documentTree :: PandocMonad m
 documentTree blocks inline = do
   initialBlocks <- blocks
   headlines <- sequence <$> manyTill (headline blocks inline 1) eof
-  title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState
+  title <- fmap docTitle . orgStateMeta <$> getState
   return $ do
     headlines' <- headlines
     initialBlocks' <- initialBlocks
@@ -70,19 +68,11 @@ documentTree blocks inline = do
       , headlineTodoMarker = Nothing
       , headlineText = B.fromList title'
       , headlineTags = mempty
+      , headlinePlanning = emptyPlanning
       , headlineProperties = mempty
       , headlineContents = initialBlocks'
       , headlineChildren = headlines'
       }
- where
-  getTitle :: Map.Map String MetaValue -> [Inline]
-  getTitle metamap =
-    case Map.lookup "title" metamap of
-      Just (MetaInlines inlns) -> inlns
-      _                        -> []
-
-newtype Tag = Tag { fromTag :: String }
-  deriving (Show, Eq)
 
 -- | Create a tag containing the given string.
 toTag :: String -> Tag
@@ -117,6 +107,7 @@ data Headline = Headline
   , headlineTodoMarker :: Maybe TodoMarker
   , headlineText       :: Inlines
   , headlineTags       :: [Tag]
+  , headlinePlanning   :: PlanningInfo -- ^ subtree planning information
   , headlineProperties :: Properties
   , headlineContents   :: Blocks
   , headlineChildren   :: [Headline]
@@ -136,6 +127,7 @@ headline blocks inline lvl = try $ do
   title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
   tags  <- option [] headerTags
   newline
+  planning   <- option emptyPlanning planningInfo
   properties <- option mempty propertiesDrawer
   contents   <- blocks
   children   <- many (headline blocks inline (level + 1))
@@ -148,6 +140,7 @@ headline blocks inline lvl = try $ do
       , headlineTodoMarker = todoKw
       , headlineText = title'
       , headlineTags = tags
+      , headlinePlanning = planning
       , headlineProperties = properties
       , headlineContents = contents'
       , headlineChildren = children'
@@ -158,22 +151,27 @@ headline blocks inline lvl = try $ do
 
    headerTags :: Monad m => OrgParser m [Tag]
    headerTags = try $
-     let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+     let tag = orgTagWord <* char ':'
      in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
 
 -- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
 headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
-headlineToBlocks hdln@Headline {..} = do
-  maxHeadlineLevels <- getExportSetting exportHeadlineLevels
+headlineToBlocks hdln = do
+  maxLevel <- getExportSetting exportHeadlineLevels
+  let tags = headlineTags hdln
+  let text = headlineText hdln
+  let level = headlineLevel hdln
+  shouldNotExport <- hasDoNotExportTag tags
   case () of
-    _ | any isNoExportTag headlineTags     -> return mempty
-    _ | any isArchiveTag  headlineTags     -> archivedHeadlineToBlocks hdln
-    _ | isCommentTitle headlineText        -> return mempty
-    _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln
-    _ | otherwise                          -> headlineToHeaderWithContents hdln
+    _ | shouldNotExport -> return mempty
+    _ | any isArchiveTag  tags -> archivedHeadlineToBlocks hdln
+    _ | isCommentTitle text    -> return mempty
+    _ | maxLevel <= level      -> headlineToHeaderWithList hdln
+    _ | otherwise              -> headlineToHeaderWithContents hdln
 
-isNoExportTag :: Tag -> Bool
-isNoExportTag = (== toTag "noexport")
+hasDoNotExportTag :: Monad m => [Tag] -> OrgParser m Bool
+hasDoNotExportTag tags = containsExcludedTag . orgStateExcludedTags <$> getState
+  where containsExcludedTag s = any (`Set.member` s) tags
 
 isArchiveTag :: Tag -> Bool
 isArchiveTag = (== toTag "ARCHIVE")
@@ -182,8 +180,9 @@ isArchiveTag = (== toTag "ARCHIVE")
 -- FIXME: This accesses builder internals not intended for use in situations
 -- like these.  Replace once keyword parsing is supported.
 isCommentTitle :: Inlines -> Bool
-isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
-isCommentTitle _                               = False
+isCommentTitle inlns = case B.toList inlns of
+  (Str "COMMENT":_) -> True
+  _ -> False
 
 archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
 archivedHeadlineToBlocks hdln = do
@@ -194,17 +193,23 @@ archivedHeadlineToBlocks hdln = do
     ArchivedTreesHeadlineOnly -> headlineToHeader hdln
 
 headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithList hdln@Headline {..} = do
+headlineToHeaderWithList hdln = do
   maxHeadlineLevels <- getExportSetting exportHeadlineLevels
   header        <- headlineToHeader hdln
-  listElements  <- mapM headlineToBlocks headlineChildren
+  listElements  <- mapM headlineToBlocks (headlineChildren hdln)
+  planningBlock <- planningToBlock (headlinePlanning hdln)
   let listBlock  = if null listElements
                    then mempty
                    else B.orderedList listElements
-  let headerText = if maxHeadlineLevels == headlineLevel
+  let headerText = if maxHeadlineLevels == headlineLevel hdln
                    then header
                    else flattenHeader header
-  return $ headerText <> headlineContents <> listBlock
+  return . mconcat $
+    [ headerText
+    , headlineContents hdln
+    , planningBlock
+    , listBlock
+    ]
  where
    flattenHeader :: Blocks -> Blocks
    flattenHeader blks =
@@ -213,27 +218,28 @@ headlineToHeaderWithList hdln@Headline {..} = do
        _                    -> mempty
 
 headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithContents hdln@Headline {..} = do
+headlineToHeaderWithContents hdln = do
   header         <- headlineToHeader hdln
-  childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren
-  return $ header <> headlineContents <> childrenBlocks
+  planningBlock <- planningToBlock (headlinePlanning hdln)
+  childrenBlocks <- mconcat <$> mapM headlineToBlocks (headlineChildren hdln)
+  return $ header <> planningBlock <> headlineContents hdln <> childrenBlocks
 
 headlineToHeader :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeader Headline {..} = do
+headlineToHeader hdln = do
   exportTodoKeyword <- getExportSetting exportWithTodoKeywords
   exportTags        <- getExportSetting exportWithTags
   let todoText    = if exportTodoKeyword
-                    then case headlineTodoMarker of
+                    then case headlineTodoMarker hdln of
                       Just kw -> todoKeywordToInlines kw <> B.space
                       Nothing -> mempty
                     else mempty
-  let text        = todoText <> headlineText <>
+  let text        = todoText <> headlineText hdln <>
                     if exportTags
-                    then tagsToInlines headlineTags
+                    then tagsToInlines (headlineTags hdln)
                     else mempty
-  let propAttr    = propertiesToAttr headlineProperties
-  attr           <- registerHeader propAttr headlineText
-  return $ B.headerWith attr headlineLevel text
+  let propAttr    = propertiesToAttr (headlineProperties hdln)
+  attr           <- registerHeader propAttr (headlineText hdln)
+  return $ B.headerWith attr (headlineLevel hdln) text
 
 todoKeyword :: Monad m => OrgParser m TodoMarker
 todoKeyword = try $ do
@@ -277,9 +283,60 @@ tagsToInlines tags =
 tagSpan :: Tag -> Inlines -> Inlines
 tagSpan t = B.spanWith ("", ["tag"], [("tag-name", fromTag t)])
 
+-- | Render planning info as a block iff the respective export setting is
+-- enabled.
+planningToBlock :: Monad m => PlanningInfo -> OrgParser m Blocks
+planningToBlock planning = do
+  includePlanning <- getExportSetting exportWithPlanning
+  return $
+    if includePlanning
+    then B.plain . mconcat . intersperse B.space . filter (/= mempty) $
+         [ datumInlines planningClosed "CLOSED"
+         , datumInlines planningDeadline "DEADLINE"
+         , datumInlines planningScheduled "SCHEDULED"
+         ]
+    else mempty
+ where
+  datumInlines field name =
+    case field planning of
+      Nothing -> mempty
+      Just time ->   B.strong (B.str name <> B.str ":")
+                  <> B.space
+                  <> B.emph (B.str time)
+
+-- | An Org timestamp, including repetition marks. TODO: improve
+type Timestamp = String
+
+timestamp :: Monad m => OrgParser m Timestamp
+timestamp = try $ do
+  openChar <- oneOf "<["
+  let isActive = openChar == '<'
+  let closeChar = if isActive then '>' else ']'
+  content <- many1Till anyChar (char closeChar)
+  return (openChar : content ++ [closeChar])
+
+-- | Planning information for a subtree/headline.
+data PlanningInfo = PlanningInfo
+  { planningClosed :: Maybe Timestamp
+  , planningDeadline :: Maybe Timestamp
+  , planningScheduled :: Maybe Timestamp
+  }
 
+emptyPlanning :: PlanningInfo
+emptyPlanning = PlanningInfo Nothing Nothing Nothing
 
-
+-- | Read a single planning-related and timestamped line.
+planningInfo :: Monad m => OrgParser m PlanningInfo
+planningInfo = try $ do
+  updaters <- many1 planningDatum <* skipSpaces <* newline
+  return $ foldr ($) emptyPlanning updaters
+ where
+  planningDatum = skipSpaces *> choice
+    [ updateWith (\s p -> p { planningScheduled = Just s}) "SCHEDULED"
+    , updateWith (\d p -> p { planningDeadline = Just d}) "DEADLINE"
+    , updateWith (\c p -> p { planningClosed = Just c}) "CLOSED"
+    ]
+  updateWith fn cs = fn <$> (string cs *> char ':' *> skipSpaces *> timestamp)
 
 -- | Read a :PROPERTIES: drawer and return the key/value pairs contained
 -- within.
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index d02eb37c5..f79ee0d64 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -69,7 +69,7 @@ exportSetting = choice
   , integerSetting "H" (\val es -> es { exportHeadlineLevels = val })
   , ignoredSetting "inline"
   , ignoredSetting "num"
-  , ignoredSetting "p"
+  , booleanSetting "p" (\val es -> es { exportWithPlanning = val })
   , ignoredSetting "pri"
   , ignoredSetting "prop"
   , ignoredSetting "stat"
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 7d1568b80..a5335ca57 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker)
 import Text.Pandoc.Readers.Org.ParserState
 import Text.Pandoc.Readers.Org.Parsing
 import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
-                                       originalLang, translateLang)
+                                       originalLang, translateLang, exportsCode)
 
 import Text.Pandoc.Builder (Inlines)
 import qualified Text.Pandoc.Builder as B
@@ -510,7 +510,7 @@ anchor =  try $ do
                      <* string ">>"
                      <* skipSpaces
 
--- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'.  This mirrors
 -- the org function @org-export-solidify-link-text@.
 
 solidify :: String -> String
@@ -525,11 +525,13 @@ inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
 inlineCodeBlock = try $ do
   string "src_"
   lang <- many1 orgArgWordChar
-  opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
+  opts <- option [] $ try (enclosedByPair '[' ']' inlineBlockOption)
+                        <|> (mempty <$ string "[]")
   inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
   let attrClasses = [translateLang lang]
   let attrKeyVal  = originalLang lang <> opts
-  returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+  let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+  returnF $ (if exportsCode opts then codeInlineBlck else mempty)
  where
    inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
    inlineBlockOption = try $ do
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 965e33d94..cad1d7123 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -52,6 +52,7 @@ import Data.Char (toLower)
 import Data.List (intersperse)
 import Data.Maybe (fromMaybe)
 import qualified Data.Map as M
+import qualified Data.Set as Set
 import Network.HTTP (urlEncode)
 
 -- | Returns the current meta, respecting export options.
@@ -158,6 +159,7 @@ optionLine = try $ do
     "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
     "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
     "macro"    -> macroDefinition >>= updateState . registerMacro
+    "exclude_tags" -> excludedTagList >>= updateState . setExcludedTags
     "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
     "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
     _          -> mzero
@@ -190,6 +192,18 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
    rest            = manyTill anyChar         (eof <|> () <$ oneOf "\n\r")
    tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
 
+excludedTagList :: Monad m => OrgParser m [Tag]
+excludedTagList = do
+  skipSpaces
+  map Tag <$> many (orgTagWord <* skipSpaces) <* newline
+
+setExcludedTags :: [Tag] -> OrgParserState -> OrgParserState
+setExcludedTags tagList st =
+  let finalSet = if orgStateExcludedTagsChanged st
+                   then foldr Set.insert (orgStateExcludedTags st) tagList
+                   else Set.fromList tagList
+  in st { orgStateExcludedTags = finalSet, orgStateExcludedTagsChanged = True}
+
 setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState
 setEmphasisPreChar csMb st =
   let preChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 4cb5bb626..59478256f 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState
   , defaultOrgParserState
   , OrgParserLocal (..)
   , OrgNoteRecord
+  , Tag(..)
   , HasReaderOptions (..)
   , HasQuoteContext (..)
   , HasMacros (..)
@@ -88,6 +89,9 @@ type OrgNoteTable = [OrgNoteRecord]
 type OrgLinkFormatters = M.Map String (String -> String)
 -- | Macro expander function
 type MacroExpander = [String] -> String
+-- | Tag
+newtype Tag = Tag { fromTag :: String }
+  deriving (Show, Eq, Ord)
 
 -- | The states in which a todo item can be
 data TodoState = Todo | Done
@@ -113,6 +117,8 @@ data OrgParserState = OrgParserState
                                            -- specified here.
   , orgStateEmphasisPostChars    :: [Char] -- ^ Chars allowed at after emphasis
   , orgStateEmphasisNewlines     :: Maybe Int
+  , orgStateExcludedTags         :: Set.Set Tag
+  , orgStateExcludedTagsChanged  :: Bool
   , orgStateExportSettings       :: ExportSettings
   , orgStateHeaderMap            :: M.Map Inlines String
   , orgStateIdentifiers          :: Set.Set String
@@ -183,6 +189,8 @@ defaultOrgParserState = OrgParserState
   , orgStateEmphasisCharStack = []
   , orgStateEmphasisNewlines = Nothing
   , orgStateExportSettings = def
+  , orgStateExcludedTags = Set.singleton $ Tag "noexport"
+  , orgStateExcludedTagsChanged = False
   , orgStateHeaderMap = M.empty
   , orgStateIdentifiers = Set.empty
   , orgStateIncludeFiles = []
@@ -260,6 +268,7 @@ data ExportSettings = ExportSettings
   , exportWithAuthor       :: Bool -- ^ Include author in final meta-data
   , exportWithCreator      :: Bool -- ^ Include creator in final meta-data
   , exportWithEmail        :: Bool -- ^ Include email in final meta-data
+  , exportWithPlanning     :: Bool -- ^ Keep planning info after headlines
   , exportWithTags         :: Bool -- ^ Keep tags as part of headlines
   , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers
   }
@@ -280,6 +289,7 @@ defaultExportSettings = ExportSettings
   , exportWithAuthor = True
   , exportWithCreator = True
   , exportWithEmail = True
+  , exportWithPlanning = False
   , exportWithTags = True
   , exportWithTodoKeywords = True
   }
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index e014de65e..52a346e36 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -46,6 +46,8 @@ module Text.Pandoc.Readers.Org.Parsing
   , orgArgKey
   , orgArgWord
   , orgArgWordChar
+  , orgTagWord
+  , orgTagWordChar
   -- * Re-exports from Text.Pandoc.Parser
   , ParserContext (..)
   , many1Till
@@ -137,14 +139,13 @@ anyLine =
     <* updateLastPreCharPos
     <* updateLastForbiddenCharPos
 
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
+-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character
+-- allowed before emphasised text.
 parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
 parseFromString parser str' = do
-  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
   updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
   result <- P.parseFromString parser str'
-  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+  updateState $ \s -> s { orgStateLastPreCharPos = Nothing }
   return result
 
 -- | Skip one or more tab or space characters.
@@ -221,3 +222,9 @@ orgArgWord = many1 orgArgWordChar
 -- | Chars treated as part of a word in plists.
 orgArgWordChar :: Monad m => OrgParser m Char
 orgArgWordChar = alphaNum <|> oneOf "-_"
+
+orgTagWord :: Monad m => OrgParser m String
+orgTagWord = many1 orgTagWordChar
+
+orgTagWordChar :: Monad m => OrgParser m Char
+orgTagWordChar = alphaNum <|> oneOf "@%#_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 17fe34738..71d1dd517 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Readers.Org.Shared
   , isImageFilename
   , originalLang
   , translateLang
+  , exportsCode
   ) where
 
 import Prelude
@@ -96,3 +97,6 @@ translateLang cs =
     "sh"         -> "bash"
     "sqlite"     -> "sql"
     _            -> cs
+
+exportsCode :: [(String, String)] -> Bool
+exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 71a38cf82..28fa7b83e 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -37,7 +37,7 @@ import Control.Arrow (second)
 import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
 import Control.Monad.Except (throwError)
 import Control.Monad.Identity (Identity (..))
-import Data.Char (isHexDigit, isSpace, toLower, toUpper)
+import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
 import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
                   nub, sort, transpose, union)
 import qualified Data.Map as M
@@ -172,6 +172,7 @@ parseRST = do
   docMinusKeys <- concat <$>
                   manyTill (referenceKey <|> anchorDef <|>
                             noteBlock <|> citationBlock <|>
+                            (snd <$> withRaw comment) <|>
                             headerBlock <|> lineClump) eof
   setInput docMinusKeys
   setPosition startPos
@@ -1089,7 +1090,7 @@ referenceKey = do
 targetURI :: Monad m => ParserT [Char] st m [Char]
 targetURI = do
   skipSpaces
-  optional newline
+  optional $ try $ newline >> notFollowedBy blankline
   contents <- trim <$>
      many1 (satisfy (/='\n')
      <|> try (newline >> many1 spaceChar >> noneOf " \t\n"))
@@ -1313,19 +1314,24 @@ table = gridTable False <|> simpleTable False <|>
 
 inline :: PandocMonad m => RSTParser m Inlines
 inline = choice [ note          -- can start with whitespace, so try before ws
-                , whitespace
                 , link
-                , str
-                , endline
                 , strong
                 , emph
                 , code
                 , subst
                 , interpretedRole
-                , smart
-                , hyphens
-                , escapedChar
-                , symbol ] <?> "inline"
+                , inlineContent ] <?> "inline"
+
+-- strings, spaces and other characters that can appear either by
+-- themselves or within inline markup
+inlineContent :: PandocMonad m => RSTParser m Inlines
+inlineContent = choice [ whitespace
+                       , str
+                       , endline
+                       , smart
+                       , hyphens
+                       , escapedChar
+                       , symbol ] <?> "inline content"
 
 parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
 parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
@@ -1368,11 +1374,11 @@ atStart p = do
 
 emph :: PandocMonad m => RSTParser m Inlines
 emph = B.emph . trimInlines . mconcat <$>
-         enclosed (atStart $ char '*') (char '*') inline
+         enclosed (atStart $ char '*') (char '*') inlineContent
 
 strong :: PandocMonad m => RSTParser m Inlines
 strong = B.strong . trimInlines . mconcat <$>
-          enclosed (atStart $ string "**") (try $ string "**") inline
+          enclosed (atStart $ string "**") (try $ string "**") inlineContent
 
 -- Note, this doesn't precisely implement the complex rule in
 -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
@@ -1380,7 +1386,6 @@ strong = B.strong . trimInlines . mconcat <$>
 --
 -- TODO:
 --  - Classes are silently discarded in addNewRole
---  - Lacks sensible implementation for title-reference (which is the default)
 --  - Allows direct use of the :raw: role, rST only allows inherited use.
 interpretedRole :: PandocMonad m => RSTParser m Inlines
 interpretedRole = try $ do
@@ -1390,12 +1395,12 @@ interpretedRole = try $ do
 renderRole :: PandocMonad m
            => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
 renderRole contents fmt role attr = case role of
-    "sup"  -> return $ B.superscript $ B.str contents
-    "superscript" -> return $ B.superscript $ B.str contents
-    "sub"  -> return $ B.subscript $ B.str contents
-    "subscript"  -> return $ B.subscript $ B.str contents
-    "emphasis" -> return $ B.emph $ B.str contents
-    "strong" -> return $ B.strong $ B.str contents
+    "sup"  -> return $ B.superscript $ treatAsText contents
+    "superscript" -> return $ B.superscript $ treatAsText contents
+    "sub"  -> return $ B.subscript $ treatAsText contents
+    "subscript"  -> return $ B.subscript $ treatAsText contents
+    "emphasis" -> return $ B.emph $ treatAsText contents
+    "strong" -> return $ B.strong $ treatAsText contents
     "rfc-reference" -> return $ rfcLink contents
     "RFC" -> return $ rfcLink contents
     "pep-reference" -> return $ pepLink contents
@@ -1406,7 +1411,7 @@ renderRole contents fmt role attr = case role of
     "title" -> titleRef contents
     "t" -> titleRef contents
     "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
-    "span" -> return $ B.spanWith attr $ B.str contents
+    "span" -> return $ B.spanWith attr $ treatAsText contents
     "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
     custom -> do
         customRoles <- stateRstCustomRoles <$> getState
@@ -1414,14 +1419,20 @@ renderRole contents fmt role attr = case role of
             Just (newRole, newFmt, newAttr) ->
                 renderRole contents newFmt newRole newAttr
             Nothing -> -- undefined role
-                return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
+                return $ B.codeWith ("",["interpreted-text"],[("role",role)])
+                          contents
  where
-   titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
+   titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
    rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
      where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
    pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
      where padNo = replicate (4 - length pepNo) '0' ++ pepNo
            pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+   treatAsText = B.text . handleEscapes
+   handleEscapes [] = []
+   handleEscapes ('\\':' ':cs) = handleEscapes cs
+   handleEscapes ('\\':c:cs) = c : handleEscapes cs
+   handleEscapes (c:cs) = c : handleEscapes cs
 
 addClass :: String -> Attr -> Attr
 addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
@@ -1445,7 +1456,18 @@ roleAfter = try $ do
   return (role,contents)
 
 unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
-unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
+unmarkedInterpretedText = try $ do
+  atStart (char '`')
+  contents <- mconcat <$> (many1
+       (  many1 (noneOf "`\\\n")
+      <|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n"))
+      <|> (string "\n" <* notFollowedBy blankline)
+      <|> try (string "`" <*
+                notFollowedBy (() <$ roleMarker) <*
+                lookAhead (satisfy isAlphaNum))
+       ))
+  char '`'
+  return contents
 
 whitespace :: PandocMonad m => RSTParser m Inlines
 whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
@@ -1480,7 +1502,7 @@ explicitLink = try $ do
   char '`'
   notFollowedBy (char '`') -- `` marks start of inline code
   label' <- trimInlines . mconcat <$>
-             manyTill (notFollowedBy (char '`') >> inline) (char '<')
+             manyTill (notFollowedBy (char '`') >> inlineContent) (char '<')
   src <- trim <$> manyTill (noneOf ">\n") (char '>')
   skipSpaces
   string "`_"
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 1f230ae7e..c3cfedcfb 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -74,9 +74,6 @@ type TWParser = ParserT [Char] ParserState
 tryMsg :: String -> TWParser m a -> TWParser m a
 tryMsg msg p = try p <?> msg
 
-skip :: TWParser m a -> TWParser m ()
-skip parser = parser >> return ()
-
 nested :: PandocMonad m => TWParser m a -> TWParser m a
 nested p = do
   nestlevel <- stateMaxNestingLevel <$>  getState
@@ -92,7 +89,7 @@ htmlElement tag = tryMsg tag $ do
   content <- manyTill anyChar (endtag <|> endofinput)
   return (htmlAttrToPandoc attr, trim content)
   where
-    endtag     = skip $ htmlTag (~== TagClose tag)
+    endtag     = void $ htmlTag (~== TagClose tag)
     endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
     trim       = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
 
@@ -114,18 +111,15 @@ parseHtmlContentWithAttrs tag parser = do
     endOfContent = try $ skipMany blankline >> skipSpaces >> eof
 
 parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
-parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
+parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p
 
 --
 -- main parser
 --
 
 parseTWiki :: PandocMonad m => TWParser m Pandoc
-parseTWiki = do
-  bs <- mconcat <$> many block
-  spaces
-  eof
-  return $ B.doc bs
+parseTWiki =
+  B.doc . mconcat <$> many block <* spaces <* eof
 
 
 --
@@ -158,7 +152,7 @@ separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalR
 header :: PandocMonad m => TWParser m B.Blocks
 header = tryMsg "header" $ do
   string "---"
-  level <- many1 (char '+') >>= return . length
+  level <- length <$> many1 (char '+')
   guard $ level <= 6
   classes <- option [] $ string "!!" >> return ["unnumbered"]
   skipSpaces
@@ -167,11 +161,10 @@ header = tryMsg "header" $ do
   return $ B.headerWith attr level content
 
 verbatim :: PandocMonad m => TWParser m B.Blocks
-verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
-           >>= return . (uncurry B.codeBlockWith)
+verbatim = uncurry B.codeBlockWith <$> (htmlElement "verbatim" <|> htmlElement "pre")
 
 literal :: PandocMonad m => TWParser m B.Blocks
-literal = htmlElement "literal" >>= return . rawBlock
+literal = rawBlock <$> htmlElement "literal"
   where
     format (_, _, kvs)        = fromMaybe "html" $ lookup "format" kvs
     rawBlock (attrs, content) = B.rawBlock (format attrs) content
@@ -183,7 +176,7 @@ list prefix = choice [ bulletList prefix
 
 definitionList :: PandocMonad m => String -> TWParser m B.Blocks
 definitionList prefix = tryMsg "definitionList" $ do
-  indent <- lookAhead $ string prefix *> (many1 $ string "   ") <* string "$ "
+  indent <- lookAhead $ string prefix *> many1 (string "   ") <* string "$ "
   elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
   return $ B.definitionList elements
   where
@@ -193,7 +186,7 @@ definitionList prefix = tryMsg "definitionList" $ do
       string (indent ++ "$ ") >> skipSpaces
       term <- many1Till inline $ string ": "
       line <- listItemLine indent $ string "$ "
-      return $ (mconcat term, [line])
+      return (mconcat term, [line])
 
 bulletList :: PandocMonad m => String -> TWParser m B.Blocks
 bulletList prefix = tryMsg "bulletList" $
@@ -227,25 +220,24 @@ parseListItem prefix marker = string prefix >> marker >> listItemLine prefix mar
 
 listItemLine :: (PandocMonad m, Show a)
              => String -> TWParser m a -> TWParser m B.Blocks
-listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
+listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent)
   where
     lineContent = do
       content <- anyLine
       continuation <- optionMaybe listContinuation
-      return $ filterSpaces content ++ "\n" ++ (maybe "" ("   " ++) continuation)
+      return $ filterSpaces content ++ "\n" ++ maybe "" ("   " ++) continuation
     filterSpaces = reverse . dropWhile (== ' ') . reverse
     listContinuation = notFollowedBy (string prefix >> marker) >>
                        string "   " >> lineContent
     parseContent = parseFromString' $ many1 $ nestedList <|> parseInline
-    parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
-                  return . B.plain . mconcat
+    parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList)
     nestedList = list prefix
     lastNewline = try $ char '\n' <* eof
     newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
 
 table :: PandocMonad m => TWParser m B.Blocks
 table = try $ do
-  tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
+  tableHead <- optionMaybe (unzip <$> many1Till tableParseHeader newline)
   rows <- many1 tableParseRow
   return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
   where
@@ -258,11 +250,11 @@ table = try $ do
 tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks)
 tableParseHeader = try $ do
   char '|'
-  leftSpaces <- many spaceChar >>= return . length
+  leftSpaces <- length <$> many spaceChar
   char '*'
   content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
   char '*'
-  rightSpaces <- many spaceChar >>= return . length
+  rightSpaces <- length <$> many spaceChar
   optional tableEndOfRow
   return (tableAlign leftSpaces rightSpaces, content)
   where
@@ -283,13 +275,13 @@ tableEndOfRow :: PandocMonad m => TWParser m Char
 tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
 
 tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks
-tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
+tableColumnContent end = (B.plain . mconcat) <$> manyTill content (lookAhead $ try end)
   where
     content = continuation <|> inline
     continuation = try $ char '\\' >> newline >> return mempty
 
 blockQuote :: PandocMonad m => TWParser m B.Blocks
-blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
+blockQuote = (B.blockQuote . mconcat) <$> parseHtmlContent "blockquote" block
 
 noautolink :: PandocMonad m => TWParser m B.Blocks
 noautolink = do
@@ -300,15 +292,15 @@ noautolink = do
   setState $ st{ stateAllowLinks = True }
   return $ mconcat blocks
   where
-    parseContent      = parseFromString' $ many $ block
+    parseContent = parseFromString' $ many block
 
 para :: PandocMonad m => TWParser m B.Blocks
-para = many1Till inline endOfParaElement >>= return . result . mconcat
+para = (result . mconcat) <$> many1Till inline endOfParaElement
  where
    endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
    endOfInput       = try $ skipMany blankline >> skipSpaces >> eof
    endOfPara        = try $ blankline >> skipMany1 blankline
-   newBlockElement  = try $ blankline >> skip blockElements
+   newBlockElement  = try $ blankline >> void blockElements
    result content   = if F.all (==Space) content
                       then mempty
                       else B.para $ B.trimInlines content
@@ -340,7 +332,7 @@ inline = choice [ whitespace
                 ] <?> "inline"
 
 whitespace :: PandocMonad m => TWParser m B.Inlines
-whitespace = (lb <|> regsp) >>= return
+whitespace = lb <|> regsp
   where lb = try $ skipMany spaceChar >> linebreak >> return B.space
         regsp = try $ skipMany1 spaceChar >> return B.space
 
@@ -362,13 +354,13 @@ enclosed :: (Monoid b, PandocMonad m, Show a)
          => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b
 enclosed sep p = between sep (try $ sep <* endMarker) p
   where
-    endMarker   = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
+    endMarker   = lookAhead $ void endSpace <|> void (oneOf ".,!?:)|") <|> eof
     endSpace    = (spaceChar <|> newline) >> return B.space
 
 macro :: PandocMonad m => TWParser m B.Inlines
 macro = macroWithParameters <|> withoutParameters
   where
-    withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
+    withoutParameters = emptySpan <$> enclosed (char '%') (const macroName)
     emptySpan name = buildSpan name [] mempty
 
 macroWithParameters :: PandocMonad m => TWParser m B.Inlines
@@ -393,13 +385,13 @@ macroName = do
   return (first:rest)
 
 attributes :: PandocMonad m => TWParser m (String, [(String, String)])
-attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
-             return . foldr (either mkContent mkKvs) ([], [])
+attributes = foldr (either mkContent mkKvs) ([], [])
+  <$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}')
   where
     spnl                      = skipMany (spaceChar <|> newline)
     mkContent c  ([], kvs)   = (c, kvs)
     mkContent c  (rest, kvs) = (c ++ " " ++ rest, kvs)
-    mkKvs     kv (cont, rest) = (cont, (kv : rest))
+    mkKvs     kv (cont, rest) = (cont, kv : rest)
 
 attribute :: PandocMonad m => TWParser m (Either String (String, String))
 attribute = withKey <|> withoutKey
@@ -407,52 +399,50 @@ attribute = withKey <|> withoutKey
     withKey = try $ do
       key <- macroName
       char '='
-      parseValue False >>= return . (curry Right key)
-    withoutKey = try $ parseValue True >>= return . Left
-    parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
+      curry Right key <$> parseValue False
+    withoutKey = try $ Left <$> parseValue True
+    parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces)
     withQuotes             = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
     withoutQuotes allowSpaces
-      | allowSpaces == True = many1 $ noneOf "}"
-      | otherwise           = many1 $ noneOf " }"
+      | allowSpaces = many1 $ noneOf "}"
+      | otherwise   = many1 $ noneOf " }"
 
 nestedInlines :: (Show a, PandocMonad m)
               => TWParser m a -> TWParser m B.Inlines
 nestedInlines end = innerSpace <|> nestedInline
   where
-    innerSpace   = try $ whitespace <* (notFollowedBy end)
+    innerSpace   = try $ whitespace <* notFollowedBy end
     nestedInline = notFollowedBy whitespace >> nested inline
 
 strong :: PandocMonad m => TWParser m B.Inlines
-strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
+strong = try $ B.strong <$> enclosed (char '*') nestedInlines
 
 strongHtml :: PandocMonad m => TWParser m B.Inlines
-strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
-             >>= return . B.strong . mconcat
+strongHtml = B.strong . mconcat <$> (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
 
 strongAndEmph :: PandocMonad m => TWParser m B.Inlines
-strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
+strongAndEmph = try $ B.emph . B.strong <$> enclosed (string "__") nestedInlines
 
 emph :: PandocMonad m => TWParser m B.Inlines
-emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
+emph = try $ B.emph <$> enclosed (char '_') nestedInlines
 
 emphHtml :: PandocMonad m => TWParser m B.Inlines
-emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
-           >>= return . B.emph . mconcat
+emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
 
 nestedString :: (Show a, PandocMonad m)
              => TWParser m a -> TWParser m String
-nestedString end = innerSpace <|> (count 1 nonspaceChar)
+nestedString end = innerSpace <|> count 1 nonspaceChar
   where
     innerSpace = try $ many1 spaceChar <* notFollowedBy end
 
 boldCode :: PandocMonad m => TWParser m B.Inlines
-boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
+boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString
 
 htmlComment :: PandocMonad m => TWParser m B.Inlines
 htmlComment = htmlTag isCommentTag >> return mempty
 
 code :: PandocMonad m => TWParser m B.Inlines
-code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
+code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString
 
 codeHtml :: PandocMonad m => TWParser m B.Inlines
 codeHtml = do
@@ -464,7 +454,7 @@ autoLink = try $ do
   state <- getState
   guard $ stateAllowLinks state
   (text, url) <- parseLink
-  guard $ checkLink (head $ reverse url)
+  guard $ checkLink (last url)
   return $ makeLink (text, url)
   where
     parseLink            = notFollowedBy nop >> (uri <|> emailAddress)
@@ -474,17 +464,17 @@ autoLink = try $ do
       | otherwise = isAlphaNum c
 
 str :: PandocMonad m => TWParser m B.Inlines
-str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
+str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
 
 nop :: PandocMonad m => TWParser m B.Inlines
-nop = try $ (skip exclamation <|> skip nopTag) >> followContent
+nop = try $ (void exclamation <|> void nopTag) >> followContent
   where
     exclamation   = char '!'
     nopTag        = stringAnyCase "<nop>"
-    followContent = many1 nonspaceChar >>= return . B.str . fromEntities
+    followContent = B.str . fromEntities <$> many1 nonspaceChar
 
 symbol :: PandocMonad m => TWParser m B.Inlines
-symbol = count 1 nonspaceChar >>= return . B.str
+symbol = B.str <$> count 1 nonspaceChar
 
 smart :: PandocMonad m => TWParser m B.Inlines
 smart = do
@@ -498,17 +488,16 @@ smart = do
 singleQuoted :: PandocMonad m => TWParser m B.Inlines
 singleQuoted = try $ do
   singleQuoteStart
-  withQuoteContext InSingleQuote $
-    many1Till inline singleQuoteEnd >>=
-    (return . B.singleQuoted . B.trimInlines . mconcat)
+  withQuoteContext InSingleQuote
+    (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd)
 
 doubleQuoted :: PandocMonad m => TWParser m B.Inlines
 doubleQuoted = try $ do
   doubleQuoteStart
   contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
-  (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
+  withQuoteContext InDoubleQuote (doubleQuoteEnd >>
    return (B.doubleQuoted $ B.trimInlines contents))
-   <|> (return $ (B.str "\8220") B.<> contents)
+   <|> return (B.str "\8220" B.<> contents)
 
 link :: PandocMonad m => TWParser m B.Inlines
 link = try $ do
@@ -527,5 +516,5 @@ linkText = do
   char ']'
   return (url, "", content)
   where
-    linkContent      = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
+    linkContent      = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent
     parseLinkContent = parseFromString' $ many1 inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index bc3bcaf26..4b65be347 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -540,7 +540,7 @@ wordChunk = try $ do
 str :: PandocMonad m => ParserT [Char] ParserState m Inlines
 str = do
   baseStr <- hyphenedWords
-  -- RedCloth compliance : if parsed word is uppercase and immediatly
+  -- RedCloth compliance : if parsed word is uppercase and immediately
   -- followed by parens, parens content is unconditionally word acronym
   fullStr <- option baseStr $ try $ do
     guard $ all isUpper baseStr
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index 5c7507248..8458b05e5 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -22,6 +22,7 @@ import Prelude
 import Control.Monad
 import Control.Monad.Except (throwError)
 import qualified Data.Foldable as F
+import Data.List (dropWhileEnd)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -166,7 +167,7 @@ table = try $ do
   -- return $ B.simpleTable (headers rows) $ trace ("rows: " ++ (show rows)) rows
   return $B.simpleTable (headers rows) rows
   where
-    -- The headers are as many empty srings as the number of columns
+    -- The headers are as many empty strings as the number of columns
     -- in the first row
     headers rows = map (B.plain . B.str) $replicate (length $ head rows) ""
 
@@ -319,7 +320,7 @@ listItem = choice [
 bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
 bulletItem = try $ do
   prefix <- many1 $ char '*'
-  many1 $ char ' '
+  many $ char ' '
   content <- listItemLine (length prefix)
   return (LN Bullet (length prefix), B.plain content)
 
@@ -331,7 +332,7 @@ bulletItem = try $ do
 numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
 numberedItem = try $ do
   prefix <- many1 $ char '#'
-  many1 $ char ' '
+  many $ char ' '
   content <- listItemLine (length prefix)
   return (LN Numbered (length prefix), B.plain content)
 
@@ -346,7 +347,7 @@ listItemLine nest = lineContent >>= parseContent
     listContinuation = string (replicate nest '+') >> lineContent
     parseContent x = do
       parsed <- parseFromString (many1 inline) x
-      return $ mconcat parsed
+      return $ mconcat $ dropWhileEnd (== B.space) parsed
 
 -- Turn the CODE macro attributes into Pandoc code block attributes.
 mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)])
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index bed49fd46..26dc934a9 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -198,7 +198,7 @@ para = try $ do
 commentBlock :: T2T Blocks
 commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment
 
--- Seperator and Strong line treated the same
+-- Separator and Strong line treated the same
 hrule :: T2T Blocks
 hrule = try $ do
   spaces
@@ -575,8 +575,10 @@ symbol = B.str . (:[]) <$> oneOf specialChars
 getTarget :: T2T String
 getTarget = do
   mv <- lookupMeta "target" . stateMeta <$> getState
-  let MetaString target = fromMaybe (MetaString "html") mv
-  return target
+  return $ case mv of
+              Just (MetaString target)        -> target
+              Just (MetaInlines [Str target]) -> target
+              _                               -> "html"
 
 atStart :: T2T ()
 atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 824a912c3..15f0d991f 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -429,9 +429,7 @@ tableRow = try $ do
   s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar
     >> newline))
   guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|")
-  tr <- many tableCell
-  many spaceChar >> char '\n'
-  return tr
+  many tableCell <* many spaceChar <* char '\n'
 
 tableCell :: PandocMonad m => VwParser m Blocks
 tableCell = try $
@@ -451,13 +449,13 @@ ph s = try $ do
 
 noHtmlPh :: PandocMonad m => VwParser m ()
 noHtmlPh = try $
-  () <$ (many spaceChar >> string "%nohtml" >> many spaceChar
-    >> lookAhead newline)
+  () <$ many spaceChar <* string "%nohtml" <* many spaceChar
+    <* lookAhead newline
 
 templatePh :: PandocMonad m => VwParser m ()
 templatePh = try $
-  () <$ (many spaceChar >> string "%template" >>many (noneOf "\n")
-    >> lookAhead newline)
+  () <$ many spaceChar <* string "%template" <* many (noneOf "\n")
+    <* lookAhead newline
 
 -- inline parser
 
@@ -617,10 +615,8 @@ procImgurl :: String -> String
 procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s
 
 inlineMath :: PandocMonad m => VwParser m Inlines
-inlineMath = try $ do
-  char '$'
-  contents <- many1Till (noneOf "\n") (char '$')
-  return $ B.math contents
+inlineMath = try $
+  B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$')
 
 tag :: PandocMonad m => VwParser m Inlines
 tag = try $ do
author	Yan Pas <yanp.bugz@gmail.com>	2018-10-07 18:10:01 +0300
committer	Yan Pas <yanp.bugz@gmail.com>	2018-10-07 18:10:01 +0300
commit	27467189ab184c5d098e244e01f7d1bfdb0d4d45 (patch)
tree	d1fb96ebbc49ee0c4e73ef354feddd521690d545 /src/Text/Pandoc/Readers
parent	4f3dd3b1af7217214287ab886147c5e33a54774d (diff)
parent	bd8a66394bc25b52dca9ffd963a560a4ca492f9c (diff)
download	pandoc-27467189ab184c5d098e244e01f7d1bfdb0d4d45.tar.gz