28 files changed, 2351 insertions, 631 deletions
diff --git a/src/Text/Pandoc/Readers/Custom.hs b/src/Text/Pandoc/Readers/Custom.hs
new file mode 100644
index 000000000..9252a9e45
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Custom.hs
@@ -0,0 +1,83 @@
+{-# LANGUAGE LambdaCase          #-}
+{-# LANGUAGE OverloadedStrings   #-}
+{- |
+   Module      : Text.Pandoc.Readers.Custom
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Supports custom parsers written in Lua which produce a Pandoc AST.
+-}
+module Text.Pandoc.Readers.Custom ( readCustom ) where
+import Control.Exception
+import Control.Monad (when)
+import HsLua as Lua hiding (Operation (Div), render)
+import Control.Monad.IO.Class (MonadIO)
+import Text.Pandoc.Definition
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+import Text.Pandoc.Lua (Global (..), runLua, setGlobals)
+import Text.Pandoc.Lua.PandocLua
+import Text.Pandoc.Lua.Marshal.Pandoc (peekPandoc)
+import Text.Pandoc.Lua.Util (dofileWithTraceback, callWithTraceback,
+                             pcallWithTraceback)
+import Text.Pandoc.Options
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import qualified Data.Text as T
+
+-- | Convert custom markup to Pandoc.
+readCustom :: (PandocMonad m, MonadIO m, ToSources s)
+            => FilePath -> ReaderOptions -> s -> m Pandoc
+readCustom luaFile opts srcs = do
+  let globals = [ PANDOC_SCRIPT_FILE luaFile ]
+  res <- runLua $ do
+    setGlobals globals
+    stat <- dofileWithTraceback luaFile
+    -- check for error in lua script (later we'll change the return type
+    -- to handle this more gracefully):
+    when (stat /= Lua.OK)
+      Lua.throwErrorAsException
+    parseCustom
+  case res of
+    Left msg -> throw msg
+    Right doc -> return doc
+ where
+  parseCustom = do
+    let input = toSources srcs
+    getglobal "Reader"
+    push input
+    push opts
+    pcallWithTraceback 2 1 >>= \case
+      OK -> forcePeek $ peekPandoc top
+      ErrRun -> do
+        -- Caught a runtime error. Check if parsing might work if we
+        -- pass a string instead of a Sources list, then retry.
+        runPeek (peekText top) >>= \case
+          Failure {} ->
+            -- not a string error object. Bail!
+            throwErrorAsException
+          Success errmsg -> do
+            if "string expected, got pandoc Sources" `T.isInfixOf` errmsg
+              then do
+                pop 1
+                _ <- unPandocLua $ do
+                  report $ Deprecated "old Reader function signature" $
+                    T.unlines
+                    [ "Reader functions should accept a sources list; "
+                    , "functions expecting `string` input are deprecated. "
+                    , "Use `tostring` to convert the first argument to a "
+                    , "string."
+                    ]
+                getglobal "Reader"
+                push $ sourcesToText input  -- push sources as string
+                push opts
+                callWithTraceback 2 1
+                forcePeek $ peekPandoc top
+              else
+                -- nothing we can do here
+                throwErrorAsException
+      _ ->  -- not a runtime error, we won't be able to recover from that
+        throwErrorAsException
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index c49b82ccf..be90eb23e 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -19,7 +19,7 @@ import Data.Foldable (asum)
 import Data.Generics
 import Data.List (intersperse,elemIndex)
 import Data.List.NonEmpty (nonEmpty)
-import Data.Maybe (fromMaybe,mapMaybe)
+import Data.Maybe (catMaybes,fromMaybe,mapMaybe,maybeToList)
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
@@ -316,7 +316,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [ ] postcode - A postal code in an address
 [x] preface - Introductory matter preceding the first chapter of a book
 [ ] prefaceinfo - Meta-information for a Preface
-[ ] primary - The primary word or phrase under which an index term should be
+[x] primary - The primary word or phrase under which an index term should be
     sorted
 [ ] primaryie - A primary term in an index entry, not in the text
 [ ] printhistory - The printing history of a document
@@ -385,7 +385,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [o] screeninfo - Information about how a screen shot was produced
 [ ] screenshot - A representation of what the user sees or might see on a
     computer screen
-[ ] secondary - A secondary word or phrase in an index term
+[x] secondary - A secondary word or phrase in an index term
 [ ] secondaryie - A secondary term in an index entry, rather than in the text
 [x] sect1 - A top-level section of document
 [x] sect1info - Meta-information for a Sect1
@@ -461,7 +461,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] td - A table entry in an HTML table
 [x] term - The word or phrase being defined or described in a variable list
 [ ] termdef - An inline term definition
-[ ] tertiary - A tertiary word or phrase in an index term
+[x] tertiary - A tertiary word or phrase in an index term
 [ ] tertiaryie - A tertiary term in an index entry, rather than in the text
 [ ] textdata - Pointer to external text data
 [ ] textobject - A wrapper for a text description of an object and its
@@ -829,7 +829,7 @@ parseBlock (Elem e) =
         "section" -> gets dbSectionLevel >>= sect . (+1)
         "simplesect" ->
           gets dbSectionLevel >>=
-          sectWith (attrValue "id" e,["unnumbered"],[]) . (+1)
+          sectWith(attrValue "id" e) ["unnumbered"] [] . (+1)
         "refsect1" -> sect 1
         "refsect2" -> sect 2
         "refsect3" -> sect 3
@@ -907,6 +907,7 @@ parseBlock (Elem e) =
            let classes' = case attrValue "language" e of
                                 "" -> []
                                 x  -> [x]
+                ++ ["numberLines" | attrValue "linenumbering" e == "numbered"]
            return $ codeBlockWith (attrValue "id" e, classes', [])
                   $ trimNl $ strContentRecursive e
          parseBlockquote = do
@@ -993,8 +994,8 @@ parseBlock (Elem e) =
                                      (TableHead nullAttr $ toHeaderRow headrows)
                                      [TableBody nullAttr 0 [] $ map toRow bodyrows]
                                      (TableFoot nullAttr [])
-         sect n = sectWith (attrValue "id" e,[],[]) n
-         sectWith attr n = do
+         sect n = sectWith(attrValue "id" e) [] [] n
+         sectWith elId classes attrs n = do
            isbook <- gets dbBook
            let n' = if isbook || n == 0 then n + 1 else n
            headerText <- case filterChild (named "title") e `mplus`
@@ -1005,7 +1006,14 @@ parseBlock (Elem e) =
            modify $ \st -> st{ dbSectionLevel = n }
            b <- getBlocks e
            modify $ \st -> st{ dbSectionLevel = n - 1 }
-           return $ headerWith attr n' headerText <> b
+           return $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr++attrs) n' headerText <> b
+         titleabbrevElAsAttr = do
+           txt <- case filterChild (named "titleabbrev") e `mplus`
+                            (filterChild (named "info") e >>=
+                                filterChild (named "titleabbrev")) of
+                            Just t  -> Just ("titleabbrev", strContentRecursive t)
+                            Nothing -> Nothing
+           return txt
          lineItems = mapM getInlines $ filterChildren (named "line") e
          -- | Admonitions are parsed into a div. Following other Docbook tools that output HTML,
          -- we parse the optional title as a div with the @title@ class, and give the
@@ -1079,6 +1087,17 @@ elementToStr :: Content -> Content
 elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
+childElTextAsAttr :: Text -> Element -> Maybe (Text, Text)
+childElTextAsAttr n e = case findChild q e of
+        Nothing -> Nothing
+        Just childEl -> Just (n, strContentRecursive childEl)
+        where q = QName n (Just "http://docbook.org/ns/docbook") Nothing
+
+attrValueAsOptionalAttr :: Text -> Element -> Maybe (Text, Text)
+attrValueAsOptionalAttr n e = case attrValue n e of
+        "" -> Nothing
+        _ -> Just (n, attrValue n e)
+
 parseInline :: PandocMonad m => Content -> DB m Inlines
 parseInline (Text (CData _ s _)) = return $ text s
 parseInline (CRef ref) =
@@ -1093,6 +1112,28 @@ parseInline (Elem e) =
           if ident /= "" || classes /= []
             then innerInlines (spanWith (ident,classes,[]))
             else innerInlines id
+        "indexterm" -> do
+          let ident = attrValue "id" e
+          let classes = T.words $ attrValue "role" e
+          let attrs =
+                -- In DocBook, <primary>, <secondary>, <tertiary>, <see>, and <seealso>
+                -- have mixed content models. However, because we're representing these
+                -- elements in Pandoc's AST as attributes of a phrase, we flatten all
+                -- the descendant content of these elements.
+                [ childElTextAsAttr "primary" e
+                , childElTextAsAttr "secondary" e
+                , childElTextAsAttr "tertiary" e
+                , childElTextAsAttr "see" e
+                , childElTextAsAttr "seealso" e
+                , attrValueAsOptionalAttr "significance" e
+                , attrValueAsOptionalAttr "startref" e
+                , attrValueAsOptionalAttr "scope" e
+                , attrValueAsOptionalAttr "class" e
+                -- We don't do anything with the "pagenum" attribute, because these only
+                -- occur within literal <index> sections, which is not supported by Pandoc,
+                -- because Pandoc has no concept of pages.
+                ]
+          return $ spanWith (ident, ("indexterm" : classes), (catMaybes attrs)) mempty
         "equation" -> equation e displayMath
         "informalequation" -> equation e displayMath
         "inlineequation" -> equation e math
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index c06adf7e3..5c8f20c18 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -246,8 +246,8 @@ runToText _                = ""
 
 parPartToText :: ParPart -> T.Text
 parPartToText (PlainRun run)             = runToText run
-parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs
-parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs
+parPartToText (InternalHyperLink _ children) = T.concat $ map parPartToText children
+parPartToText (ExternalHyperLink _ children) = T.concat $ map parPartToText children
 parPartToText _                          = ""
 
 blacklistedCharStyles :: [CharStyleName]
@@ -322,6 +322,7 @@ runToInlines (InlineDrawing fp title alt bs ext) = do
   (lift . lift) $ P.insertMedia fp Nothing bs
   return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
 runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
+runToInlines InlineDiagram = return $ spanWith ("", ["diagram"], []) $ text "[DIAGRAM]"
 
 extentToAttr :: Extent -> Attr
 extentToAttr (Just (w, h)) =
@@ -434,18 +435,21 @@ parPartToInlines' (Drawing fp title alt bs ext) = do
   return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
 parPartToInlines' Chart =
   return $ spanWith ("", ["chart"], []) $ text "[CHART]"
-parPartToInlines' (InternalHyperLink anchor runs) = do
-  ils <- smushInlines <$> mapM runToInlines runs
+parPartToInlines' Diagram =
+  return $ spanWith ("", ["diagram"], []) $ text "[DIAGRAM]"
+parPartToInlines' (InternalHyperLink anchor children) = do
+  ils <- smushInlines <$> mapM parPartToInlines' children
   return $ link ("#" <> anchor) "" ils
-parPartToInlines' (ExternalHyperLink target runs) = do
-  ils <- smushInlines <$> mapM runToInlines runs
+parPartToInlines' (ExternalHyperLink target children) = do
+  ils <- smushInlines <$> mapM parPartToInlines' children
   return $ link target "" ils
 parPartToInlines' (PlainOMath exps) =
   return $ math $ writeTeX exps
-parPartToInlines' (Field info runs) =
+parPartToInlines' (Field info children) =
   case info of
-    HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs
-    UnknownField -> smushInlines <$> mapM runToInlines runs
+    HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
+    PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children
+    _ -> smushInlines <$> mapM parPartToInlines' children
 parPartToInlines' NullParPart = return mempty
 
 isAnchorSpan :: Inline -> Bool
@@ -532,34 +536,36 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
 extraAttr :: (Eq (StyleName a), HasStyleName a) => a -> Attr
 extraAttr s = ("", [], [("custom-style", fromStyleName $ getStyleName s)])
 
-parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
-parStyleToTransform pPr = case pStyle pPr of
-  c@(getStyleName -> styleName):cs
-    | styleName `elem` divsToKeep -> do
-        let pPr' = pPr { pStyle = cs }
-        transform <- parStyleToTransform pPr'
-        return $ divWith ("", [normalizeToClassName styleName], []) . transform
-    | styleName `elem` listParagraphStyles -> do
-        let pPr' = pPr { pStyle = cs, indentation = Nothing}
-        transform <- parStyleToTransform pPr'
-        return $ divWith ("", [normalizeToClassName styleName], []) . transform
-    | otherwise -> do
-        let pPr' = pPr { pStyle = cs }
-        transform <- parStyleToTransform pPr'
-        styles <- asks (isEnabled Ext_styles . docxOptions)
-        return $
-          (if styles then divWith (extraAttr c) else id)
-          . (if isBlockQuote c then blockQuote else id)
-          . transform
-  []
-    | Just left <- indentation pPr >>= leftParIndent -> do
-        let pPr' = pPr { indentation = Nothing }
-            hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent
-        transform <- parStyleToTransform pPr'
-        return $ if (left - hang) > 0
-                 then blockQuote . transform
-                 else transform
-    | otherwise -> return id
+paragraphStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
+paragraphStyleToTransform pPr =
+  let stylenames = map getStyleName (pStyle pPr)
+      transform = if (`elem` listParagraphStyles) `any` stylenames || relativeIndent pPr <= 0
+                  then id
+                  else blockQuote
+  in do
+    extStylesEnabled <- asks (isEnabled Ext_styles . docxOptions)
+    return $ foldr (\parStyle transform' ->
+        (parStyleToTransform extStylesEnabled parStyle) . transform'
+      ) transform (pStyle pPr)
+
+parStyleToTransform :: Bool -> ParStyle -> Blocks -> Blocks
+parStyleToTransform extStylesEnabled parStyle@(getStyleName -> styleName)
+  | (styleName `elem` divsToKeep) || (styleName `elem` listParagraphStyles) =
+      divWith ("", [normalizeToClassName styleName], [])
+  | otherwise =
+      (if extStylesEnabled then divWith (extraAttr parStyle) else id)
+      . (if isBlockQuote parStyle then blockQuote else id)
+
+-- The relative indent is the indentation minus the indentation of the parent style.
+-- This tells us whether this paragraph in particular was indented more and thus
+-- should be considered a block quote.
+relativeIndent :: ParagraphStyle -> Integer
+relativeIndent pPr =
+  let pStyleLeft = fromMaybe 0 $ pStyleIndentation pPr >>= leftParIndent
+      pStyleHang = fromMaybe 0 $ pStyleIndentation pPr >>= hangingParIndent
+      left = fromMaybe pStyleLeft $ indentation pPr >>= leftParIndent
+      hang = fromMaybe pStyleHang $ indentation pPr >>= hangingParIndent
+  in (left - hang) - (pStyleLeft - pStyleHang)
 
 normalizeToClassName :: (FromStyleName a) => a -> T.Text
 normalizeToClassName = T.map go . fromStyleName
@@ -578,7 +584,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
       local (\s -> s{ docxInBidi = True })
         (bodyPartToBlocks (Paragraph pPr' parparts))
   | isCodeDiv pPr = do
-      transform <- parStyleToTransform pPr
+      transform <- paragraphStyleToTransform pPr
       return $
         transform $
         codeBlock $
@@ -605,7 +611,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
                           else prevParaIls <> space) <> ils'
                   handleInsertion = do
                     modify $ \s -> s {docxPrevPara = mempty}
-                    transform <- parStyleToTransform pPr'
+                    transform <- paragraphStyleToTransform pPr'
                     return $ transform $ paraOrPlain ils''
               opts <- asks docxOptions
               case (pChange pPr', readerTrackChanges opts) of
@@ -620,7 +626,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
                    , AllChanges) -> do
                       let attr = ("", ["paragraph-insertion"], addAuthorAndDate cAuthor cDate)
                           insertMark = spanWith attr mempty
-                      transform <- parStyleToTransform pPr'
+                      transform <- paragraphStyleToTransform pPr'
                       return $ transform $
                         paraOrPlain $ ils'' <> insertMark
                   (Just (TrackedChange Deletion _), AcceptChanges) -> do
@@ -632,7 +638,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
                    , AllChanges) -> do
                       let attr = ("", ["paragraph-deletion"], addAuthorAndDate cAuthor cDate)
                           insertMark = spanWith attr mempty
-                      transform <- parStyleToTransform pPr'
+                      transform <- paragraphStyleToTransform pPr'
                       return $ transform $
                         paraOrPlain $ ils'' <> insertMark
                   _ -> handleInsertion
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
index 442bc3466..5f090b6be 100644
--- a/src/Text/Pandoc/Readers/Docx/Fields.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -21,8 +21,11 @@ import Text.Parsec
 import Text.Parsec.Text (Parser)
 
 type URL = T.Text
+type Anchor = T.Text
 
 data FieldInfo = HyperlinkField URL
+                -- The boolean indicates whether the field is a hyperlink.
+               | PagerefField Anchor Bool
                | UnknownField
                deriving (Show)
 
@@ -33,6 +36,8 @@ fieldInfo :: Parser FieldInfo
 fieldInfo =
   try (HyperlinkField <$> hyperlink)
   <|>
+  try ((uncurry PagerefField) <$> pageref) 
+  <|>
   return UnknownField
 
 escapedQuote :: Parser T.Text
@@ -72,3 +77,23 @@ hyperlink = do
               ("\\l", s) : _ -> farg <> "#" <> s
               _              -> farg
   return url
+
+-- See §17.16.5.45
+pagerefSwitch :: Parser (T.Text, T.Text)
+pagerefSwitch = do
+  sw <- string "\\h"
+  spaces
+  farg <- fieldArgument
+  return (T.pack sw, farg)
+
+pageref :: Parser (Anchor, Bool)
+pageref = do
+  many space
+  string "PAGEREF"
+  spaces
+  farg <- fieldArgument
+  switches <- spaces *> many pagerefSwitch
+  let isLink = case switches of
+              ("\\h", _) : _ -> True 
+              _              -> False
+  return (farg, isLink)
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index dbb16a821..87a3aebef 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , archiveToDocxWithWarnings
                                       , getStyleNames
                                       , pHeading
+                                      , pStyleIndentation
                                       , constructBogusParStyleData
                                       , leftBiasedMergeRunStyle
                                       , rowsToRowspans
@@ -92,14 +93,13 @@ data ReaderEnv = ReaderEnv { envNotes         :: Notes
                deriving Show
 
 data ReaderState = ReaderState { stateWarnings :: [T.Text]
-                               , stateFldCharState :: FldCharState
+                               , stateFldCharState :: [FldCharState]
                                }
                  deriving Show
 
 data FldCharState = FldCharOpen
                   | FldCharFieldInfo FieldInfo
-                  | FldCharContent FieldInfo [Run]
-                  | FldCharClosed
+                  | FldCharContent FieldInfo [ParPart]
                   deriving (Show)
 
 data DocxError = DocxError
@@ -194,11 +194,6 @@ data Notes = Notes NameSpaces
 data Comments = Comments NameSpaces (M.Map T.Text Element)
               deriving Show
 
-data ParIndentation = ParIndentation { leftParIndent    :: Maybe Integer
-                                     , rightParIndent   :: Maybe Integer
-                                     , hangingParIndent :: Maybe Integer}
-                      deriving Show
-
 data ChangeType = Insertion | Deletion
                 deriving Show
 
@@ -318,12 +313,13 @@ data ParPart = PlainRun Run
              | CommentStart CommentId Author (Maybe CommentDate) [BodyPart]
              | CommentEnd CommentId
              | BookMark BookMarkId Anchor
-             | InternalHyperLink Anchor [Run]
-             | ExternalHyperLink URL [Run]
+             | InternalHyperLink Anchor [ParPart]
+             | ExternalHyperLink URL [ParPart]
              | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
              | Chart                                              -- placeholder for now
+             | Diagram                                            -- placeholder for now
              | PlainOMath [Exp]
-             | Field FieldInfo [Run]
+             | Field FieldInfo [ParPart]
              | NullParPart      -- when we need to return nothing, but
                                 -- not because of an error.
              deriving Show
@@ -333,6 +329,7 @@ data Run = Run RunStyle [RunElem]
          | Endnote [BodyPart]
          | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
          | InlineChart          -- placeholder
+         | InlineDiagram        -- placeholder
            deriving Show
 
 data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen
@@ -375,7 +372,7 @@ archiveToDocxWithWarnings archive = do
                        , envDocXmlPath = docXmlPath
                        }
       rState = ReaderState { stateWarnings = []
-                           , stateFldCharState = FldCharClosed
+                           , stateFldCharState = []
                            }
       (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
   case eitherDoc of
@@ -437,6 +434,7 @@ getStyleNames = fmap getStyleName
 constructBogusParStyleData :: ParaStyleName -> ParStyle
 constructBogusParStyleData stName = ParStyle
   { headingLev = Nothing
+  , indent = Nothing
   , numInfo = Nothing
   , psParentStyle = Nothing
   , pStyleName = stName
@@ -507,9 +505,7 @@ archiveToRelationships archive docXmlPath =
 
 filePathIsMedia :: FilePath -> Bool
 filePathIsMedia fp =
-  let (dir, _) = splitFileName fp
-  in
-   (dir == "word/media/")
+  "media" `elem` splitDirectories (takeDirectory fp)
 
 lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
 lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
@@ -673,20 +669,6 @@ elemToCell ns element | isElem ns "w" "tc" element =
     return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents
 elemToCell _ _ = throwError WrongElem
 
-elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
-elemToParIndentation ns element | isElem ns "w" "ind" element =
- Just ParIndentation {
-    leftParIndent =
-       findAttrByName ns "w" "left" element >>=
-       stringToInteger
-    , rightParIndent =
-      findAttrByName ns "w" "right" element >>=
-      stringToInteger
-    , hangingParIndent =
-      findAttrByName ns "w" "hanging" element >>=
-      stringToInteger }
-elemToParIndentation _ _ = Nothing
-
 testBitMask :: Text -> Int -> Bool
 testBitMask bitMaskS n =
   case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
@@ -699,6 +681,9 @@ pHeading = getParStyleField headingLev . pStyle
 pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text)
 pNumInfo = getParStyleField numInfo . pStyle
 
+pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation
+pStyleIndentation style = (getParStyleField indent . pStyle) style
+
 elemToBodyPart :: NameSpaces -> Element -> D BodyPart
 elemToBodyPart ns element
   | isElem ns "w" "p" element
@@ -715,28 +700,31 @@ elemToBodyPart ns element
 elemToBodyPart ns element
   | isElem ns "w" "p" element = do
       parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
-      parparts <- mapD (elemToParPart ns) (elChildren element)
+      parparts' <- mapD (elemToParPart ns) (elChildren element)
+      fldCharState <- gets stateFldCharState
+      modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState}
       -- Word uses list enumeration for numbered headings, so we only
       -- want to infer a list from the styles if it is NOT a heading.
-      case pHeading parstyle of
-        Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
-                    levelInfo <- lookupLevel numId lvl <$> asks envNumbering
-                    return $ ListItem parstyle numId lvl levelInfo parparts
-        _ -> let
-          hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
-
-          hasSimpleTableField = fromMaybe False $ do
-            fldSimple <- findChildByName ns "w" "fldSimple" element
-            instr <- findAttrByName ns "w" "instr" fldSimple
-            pure ("Table" `elem` T.words instr)
-
-          hasComplexTableField = fromMaybe False $ do
-            instrText <- findElementByName ns "w" "instrText" element
-            pure ("Table" `elem` T.words (strContent instrText))
-
-          in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
-             then return $ TblCaption parstyle parparts
-             else return $ Paragraph parstyle parparts
+      let parparts = parparts' ++ (openFldCharsToParParts fldCharState) in
+        case pHeading parstyle of
+          Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
+                      levelInfo <- lookupLevel numId lvl <$> asks envNumbering
+                      return $ ListItem parstyle numId lvl levelInfo parparts
+          _ -> let
+            hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
+
+            hasSimpleTableField = fromMaybe False $ do
+              fldSimple <- findChildByName ns "w" "fldSimple" element
+              instr <- findAttrByName ns "w" "instr" fldSimple
+              pure ("Table" `elem` T.words instr)
+
+            hasComplexTableField = fromMaybe False $ do
+              instrText <- findElementByName ns "w" "instrText" element
+              pure ("Table" `elem` T.words (strContent instrText))
+
+            in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
+              then return $ TblCaption parstyle parparts
+              else return $ Paragraph parstyle parparts
 
 elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
@@ -768,14 +756,30 @@ lookupRelationship docLocation relid rels =
   where
     pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
 
+openFldCharsToParParts :: [FldCharState] -> [ParPart]
+openFldCharsToParParts [] = []
+openFldCharsToParParts (FldCharContent info children : ancestors) = case openFldCharsToParParts ancestors of
+  Field parentInfo siblings : _ -> [Field parentInfo $ siblings ++ [Field info $ reverse children]]
+  _ -> [Field info $ reverse children]
+openFldCharsToParParts (_ : ancestors) = openFldCharsToParParts ancestors
+
+emptyFldCharContents :: [FldCharState] -> [FldCharState]
+emptyFldCharContents = map
+  (\x -> case x of
+    FldCharContent info _ -> FldCharContent info []
+    _ -> x)
+
 expandDrawingId :: T.Text -> D (FilePath, B.ByteString)
 expandDrawingId s = do
   location <- asks envLocation
   target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
   case target of
     Just filepath -> do
-      bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
-      case bytes of
+      media <- asks envMedia
+      let filepath' = case filepath of
+                        ('/':rest) -> rest
+                        _ -> "word/" ++ filepath
+      case lookup filepath' media of
         Just bs -> return (filepath, bs)
         Nothing -> throwError DocxError
     Nothing -> throwError DocxError
@@ -789,44 +793,6 @@ getTitleAndAlt ns element =
   in (title, alt)
 
 elemToParPart :: NameSpaces -> Element -> D ParPart
-elemToParPart ns element
-  | isElem ns "w" "r" element
-  , Just drawingElem <- findChildByName ns "w" "drawing" element
-  , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
-  , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
-  = let (title, alt) = getTitleAndAlt ns drawingElem
-        a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
-        drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrByName ns "r" "embed"
-    in
-     case drawing of
-       Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
-       Nothing -> throwError WrongElem
--- The two cases below are an attempt to deal with images in deprecated vml format.
--- Todo: check out title and attr for deprecated format.
-elemToParPart ns element
-  | isElem ns "w" "r" element
-  , Just _ <- findChildByName ns "w" "pict" element =
-    let drawing = findElement (elemName ns "v" "imagedata") element
-                  >>= findAttrByName ns "r" "id"
-    in
-     case drawing of
-       Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
-       Nothing -> throwError WrongElem
-elemToParPart ns element
-  | isElem ns "w" "r" element
-  , Just objectElem <- findChildByName ns "w" "object" element
-  , Just shapeElem <- findChildByName ns "v" "shape" objectElem
-  , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
-  , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
-  = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
--- Chart
-elemToParPart ns element
-  | isElem ns "w" "r" element
-  , Just drawingElem <- findChildByName ns "w" "drawing" element
-  , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
-  , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
-  = return Chart
 {-
 The next one is a bit complicated. fldChar fields work by first
 having a <w:fldChar fldCharType="begin"> in a run, then a run with
@@ -858,8 +824,13 @@ example (omissions and my comments in brackets):
 So we do this in a number of steps. If we encounter the fldchar begin
 tag, we start open a fldchar state variable (see state above). We add
 the instrtext to it as FieldInfo. Then we close that and start adding
-the runs when we get to separate. Then when we get to end, we produce
-the Field type with appropriate FieldInfo and Runs.
+the children when we get to separate. Then when we get to end, we produce
+the Field type with appropriate FieldInfo and ParParts.
+
+Since there can be nested fields, the fldchar state needs to be a stack,
+so we can have multiple fldchars open at the same time. When a fldchar is
+closed, we either add the resulting field to its parent or we return it if
+there is no parent.
 -}
 elemToParPart ns element
   | isElem ns "w" "r" element
@@ -867,78 +838,142 @@ elemToParPart ns element
   , Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do
       fldCharState <- gets stateFldCharState
       case fldCharState of
-        FldCharClosed | fldCharType == "begin" -> do
-          modify $ \st -> st {stateFldCharState = FldCharOpen}
+        _ | fldCharType == "begin" -> do
+          modify $ \st -> st {stateFldCharState = FldCharOpen : fldCharState}
+          return NullParPart
+        FldCharFieldInfo info : ancestors | fldCharType == "separate" -> do
+          modify $ \st -> st {stateFldCharState = FldCharContent info [] : ancestors}
           return NullParPart
-        FldCharFieldInfo info | fldCharType == "separate" -> do
-          modify $ \st -> st {stateFldCharState = FldCharContent info []}
+        -- Some fields have no content, since Pandoc doesn't understand any of those fields, we can just close it.
+        FldCharFieldInfo _ : ancestors | fldCharType == "end" -> do
+          modify $ \st -> st {stateFldCharState = ancestors}
           return NullParPart
-        FldCharContent info runs | fldCharType == "end" -> do
-          modify $ \st -> st {stateFldCharState = FldCharClosed}
-          return $ Field info $ reverse runs
+        [FldCharContent info children] | fldCharType == "end" -> do
+          modify $ \st -> st {stateFldCharState = []}
+          return $ Field info $ reverse children
+        FldCharContent info children : FldCharContent parentInfo siblings : ancestors | fldCharType == "end" ->
+          let parent = FldCharContent parentInfo $ (Field info (reverse children)) : siblings in do
+            modify $ \st -> st {stateFldCharState = parent : ancestors}
+            return NullParPart
         _ -> throwError WrongElem
 elemToParPart ns element
   | isElem ns "w" "r" element
   , Just instrText <- findChildByName ns "w" "instrText" element = do
       fldCharState <- gets stateFldCharState
       case fldCharState of
-        FldCharOpen -> do
+        FldCharOpen : ancestors -> do
           info <- eitherToD $ parseFieldInfo $ strContent instrText
-          modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
+          modify $ \st -> st {stateFldCharState = FldCharFieldInfo info : ancestors}
           return NullParPart
         _ -> return NullParPart
-elemToParPart ns element
+{-
+There is an open fldchar, so we calculate the element and add it to the
+children. For this we need to first change the fldchar state to an empty
+stack to avoid descendants of children simply being added to the state instead
+of to their direct parent element. This would happen in the case of a
+w:hyperlink element for example.
+-}
+elemToParPart ns element = do
+  fldCharState <- gets stateFldCharState
+  case fldCharState of
+    FldCharContent info children : ancestors -> do
+      modify $ \st -> st {stateFldCharState = []}
+      parPart <- elemToParPart' ns element `catchError` \_ -> return NullParPart
+      modify $ \st -> st{stateFldCharState = FldCharContent info (parPart : children) : ancestors}
+      return NullParPart
+    _ -> elemToParPart' ns element
+
+elemToParPart' :: NameSpaces -> Element -> D ParPart
+elemToParPart' ns element
+  | isElem ns "w" "r" element
+  , Just drawingElem <- findChildByName ns "w" "drawing" element
+  , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
+  , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
+  = let (title, alt) = getTitleAndAlt ns drawingElem
+        a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+        drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
+                  >>= findAttrByName ns "r" "embed"
+    in
+     case drawing of
+       Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
+       Nothing -> throwError WrongElem
+-- The two cases below are an attempt to deal with images in deprecated vml format.
+-- Todo: check out title and attr for deprecated format.
+elemToParPart' ns element
+  | isElem ns "w" "r" element
+  , Just _ <- findChildByName ns "w" "pict" element =
+    let drawing = findElement (elemName ns "v" "imagedata") element
+                  >>= findAttrByName ns "r" "id"
+    in
+     case drawing of
+       Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
+       Nothing -> throwError WrongElem
+elemToParPart' ns element
+  | isElem ns "w" "r" element
+  , Just objectElem <- findChildByName ns "w" "object" element
+  , Just shapeElem <- findChildByName ns "v" "shape" objectElem
+  , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
+  , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
+  = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
+-- Diagram
+elemToParPart' ns element
+  | isElem ns "w" "r" element
+  , Just drawingElem <- findChildByName ns "w" "drawing" element
+  , d_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram"
+  , Just _ <- findElement (QName "relIds" (Just d_ns) (Just "dgm")) drawingElem
+  = return Diagram
+-- Chart
+elemToParPart' ns element
+  | isElem ns "w" "r" element
+  , Just drawingElem <- findChildByName ns "w" "drawing" element
+  , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
+  , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
+  = return Chart
+elemToParPart' ns element
   | isElem ns "w" "r" element = do
     run <- elemToRun ns element
-    -- we check to see if we have an open FldChar in state that we're
-    -- recording.
-    fldCharState <- gets stateFldCharState
-    case fldCharState of
-      FldCharContent info runs -> do
-        modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)}
-        return NullParPart
-      _ -> return $ PlainRun run
-elemToParPart ns element
+    return $ PlainRun run
+elemToParPart' ns element
   | Just change <- getTrackedChange ns element = do
       runs <- mapD (elemToRun ns) (elChildren element)
       return $ ChangedRuns change runs
-elemToParPart ns element
+elemToParPart' ns element
   | isElem ns "w" "bookmarkStart" element
   , Just bmId <- findAttrByName ns "w" "id" element
   , Just bmName <- findAttrByName ns "w" "name" element =
     return $ BookMark bmId bmName
-elemToParPart ns element
+elemToParPart' ns element
   | isElem ns "w" "hyperlink" element
   , Just relId <- findAttrByName ns "r" "id" element = do
     location <- asks envLocation
-    runs <- mapD (elemToRun ns) (elChildren element)
+    children <- mapD (elemToParPart ns) (elChildren element)
     rels <- asks envRelationships
     case lookupRelationship location relId rels of
       Just target ->
          case findAttrByName ns "w" "anchor" element of
-             Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
-             Nothing -> return $ ExternalHyperLink target runs
-      Nothing     -> return $ ExternalHyperLink "" runs
-elemToParPart ns element
+             Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) children
+             Nothing -> return $ ExternalHyperLink target children
+      Nothing     -> return $ ExternalHyperLink "" children
+elemToParPart' ns element
   | isElem ns "w" "hyperlink" element
   , Just anchor <- findAttrByName ns "w" "anchor" element = do
-    runs <- mapD (elemToRun ns) (elChildren element)
-    return $ InternalHyperLink anchor runs
-elemToParPart ns element
+    children <- mapD (elemToParPart ns) (elChildren element)
+    return $ InternalHyperLink anchor children
+elemToParPart' ns element
   | isElem ns "w" "commentRangeStart" element
   , Just cmtId <- findAttrByName ns "w" "id" element = do
       (Comments _ commentMap) <- asks envComments
       case M.lookup cmtId commentMap of
         Just cmtElem -> elemToCommentStart ns cmtElem
         Nothing      -> throwError WrongElem
-elemToParPart ns element
+elemToParPart' ns element
   | isElem ns "w" "commentRangeEnd" element
   , Just cmtId <- findAttrByName ns "w" "id" element =
     return $ CommentEnd cmtId
-elemToParPart ns element
+elemToParPart' ns element
   | isElem ns "m" "oMath" element =
     fmap PlainOMath (eitherToD $ readOMML $ showElement element)
-elemToParPart _ _ = throwError WrongElem
+elemToParPart' _ _ = throwError WrongElem
 
 elemToCommentStart :: NameSpaces -> Element -> D ParPart
 elemToCommentStart ns element
@@ -987,6 +1022,11 @@ childElemToRun ns element
   , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element
   = return InlineChart
 childElemToRun ns element
+  | isElem ns "w" "drawing" element
+  , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram"
+  , Just _ <- findElement (QName "relIds" (Just c_ns) (Just "dgm")) element
+  = return InlineDiagram
+childElemToRun ns element
   | isElem ns "w" "footnoteReference" element
   , Just fnId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
@@ -1071,8 +1111,7 @@ elemToParagraphStyle ns element sty
     in ParagraphStyle
       {pStyle = mapMaybe (`M.lookup` sty) style
       , indentation =
-          findChildByName ns "w" "ind" pPr >>=
-          elemToParIndentation ns
+          getIndentation ns element
       , dropCap =
           case
             findChildByName ns "w" "framePr" pPr >>=
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index 0d7271d6a..df942579a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles (
   , CharStyle
   , ParaStyleId(..)
   , ParStyle(..)
+  , ParIndentation(..)
   , RunStyle(..)
   , HasStyleName
   , StyleName
@@ -37,6 +38,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles (
   , fromStyleName
   , fromStyleId
   , stringToInteger
+  , getIndentation
   , getNumInfo
   , elemToRunStyle
   , defaultRunStyle
@@ -115,7 +117,13 @@ data RunStyle = RunStyle { isBold       :: Maybe Bool
                          }
                 deriving Show
 
+data ParIndentation = ParIndentation { leftParIndent    :: Maybe Integer
+                                     , rightParIndent   :: Maybe Integer
+                                     , hangingParIndent :: Maybe Integer}
+                      deriving Show
+
 data ParStyle = ParStyle { headingLev    :: Maybe (ParaStyleName, Int)
+                         , indent        :: Maybe ParIndentation
                          , numInfo       :: Maybe (T.Text, T.Text)
                          , psParentStyle :: Maybe ParStyle
                          , pStyleName    :: ParaStyleName
@@ -290,6 +298,22 @@ getHeaderLevel ns element
   , n > 0 = Just (styleName, fromInteger n)
 getHeaderLevel _ _ = Nothing
 
+getIndentation :: NameSpaces -> Element -> Maybe ParIndentation
+getIndentation ns el = do
+  indElement <- findChildByName ns "w" "pPr" el >>=
+                findChildByName ns "w" "ind"
+  return $ ParIndentation
+    {
+      leftParIndent = findAttrByName ns "w" "left" indElement <|>
+                      findAttrByName ns "w" "start" indElement >>=
+                      stringToInteger
+    , rightParIndent = findAttrByName ns "w" "right" indElement <|>
+                       findAttrByName ns "w" "end" indElement >>=
+                       stringToInteger
+    , hangingParIndent = findAttrByName ns "w" "hanging" indElement >>=
+                         stringToInteger
+    }
+
 getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
 getElementStyleName ns el = coerce <$>
   ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
@@ -314,6 +338,7 @@ elemToParStyleData ns element parentStyle
   = Just $ ParStyle
       {
         headingLev = getHeaderLevel ns element
+      , indent = getIndentation ns element
       , numInfo = getNumInfo ns element
       , psParentStyle = parentStyle
       , pStyleName = styleName
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fdf4f28e0..8aa2646b2 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -551,7 +551,7 @@ pFigure = try $ do
   let caption = fromMaybe mempty mbcap
   case B.toList <$> mbimg of
        Just [Image attr _ (url, tit)] ->
-         return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption
+         return $ B.simpleFigureWith attr caption url tit
        _ -> mzero
 
 pCodeBlock :: PandocMonad m => TagParser m Blocks
@@ -643,7 +643,7 @@ pQ = do
   case lookup "cite" attrs of
     Just url -> do
       let uid = fromMaybe mempty $
-                   lookup "name" attrs <> lookup "id" attrs
+                   lookup "name" attrs <|> lookup "id" attrs
       let cls = maybe [] T.words $ lookup "class" attrs
       url' <- canonicalizeUrl url
       makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
@@ -705,20 +705,18 @@ pLineBreak = do
 
 pLink :: PandocMonad m => TagParser m Inlines
 pLink = try $ do
-  tag <- pSatisfy $ tagOpenLit "a" (const True)
+  tag@(TagOpen _ attr') <- pSatisfy $ tagOpenLit "a" (const True)
   let title = fromAttrib "title" tag
-  -- take id from id attribute if present, otherwise name
-  let uid = fromMaybe (fromAttrib "name" tag) $
-               maybeFromAttrib "id" tag
-  let cls = T.words $ fromAttrib "class" tag
+  let attr = toAttr $ filter (\(k,_) -> k /= "title" && k /= "href") attr'
   lab <- mconcat <$> manyTill inline (pCloses "a")
   -- check for href; if href, then a link, otherwise a span
   case maybeFromAttrib "href" tag of
        Nothing   ->
-         return $ extractSpaces (B.spanWith (uid, cls, [])) lab
+         return $ extractSpaces (B.spanWith attr) lab
        Just url' -> do
          url <- canonicalizeUrl url'
-         return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
+         return $ extractSpaces
+                   (B.linkWith attr (escapeURI url) title) lab
 
 pImage :: PandocMonad m => TagParser m Inlines
 pImage = do
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index bd8d7c96c..a8cdf1de2 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -30,11 +30,11 @@ module Text.Pandoc.Readers.HTML.Parsing
   )
 where
 
-import Control.Monad (guard, void, mzero)
+import Control.Monad (void, mzero, mplus)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import Text.HTML.TagSoup
-  ( Attribute, Tag (..), isTagText, isTagPosition, isTagOpen, isTagClose, (~==) )
+  ( Attribute, Tag (..), isTagPosition, isTagOpen, isTagClose, (~==) )
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Definition (Attr)
 import Text.Pandoc.Parsing
@@ -118,9 +118,11 @@ pCloses tagtype = try $ do
        _ -> mzero
 
 pBlank :: PandocMonad m => TagParser m ()
-pBlank = try $ do
-  (TagText str) <- pSatisfy isTagText
-  guard $ T.all isSpace str
+pBlank = void $ pSatisfy isBlank
+ where
+  isBlank (TagText t) = T.all isSpace t
+  isBlank (TagComment _) = True
+  isBlank _ = False
 
 pLocation :: PandocMonad m => TagParser m ()
 pLocation = do
@@ -218,9 +220,10 @@ maybeFromAttrib _ _ = Nothing
 
 mkAttr :: [(Text, Text)] -> Attr
 mkAttr attr = (attribsId, attribsClasses, attribsKV)
-  where attribsId = fromMaybe "" $ lookup "id" attr
+  where attribsId = fromMaybe "" $ lookup "id" attr `mplus` lookup "name" attr
         attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes
-        attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+        attribsKV = filter (\(k,_) -> k /= "class" && k /= "id" && k /= "name")
+                           attr
         epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr
 
 toAttr :: [(Text, Text)] -> Attr
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 6e62e12f5..b23a2abc8 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -16,7 +16,7 @@ HTML table parser.
 module Text.Pandoc.Readers.HTML.Table (pTable) where
 
 import Control.Applicative ((<|>))
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, isJust)
 import Data.Either (lefts, rights)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
@@ -27,12 +27,13 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Parsing
   ( eof, lookAhead, many, many1, manyTill, option, optional
-  , optionMaybe, skipMany, try)
+  , optionMaybe, skipMany, try )
 import Text.Pandoc.Readers.HTML.Parsing
 import Text.Pandoc.Readers.HTML.Types (TagParser)
 import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
+import Control.Monad (guard)
 
 -- | Parses a @<col>@ element, returning the column's width.
 -- An Either value is used:  Left i means a "relative length" with
@@ -183,11 +184,13 @@ pTableBody :: PandocMonad m
            -> TagParser m TableBody
 pTableBody block = try $ do
   skipMany pBlank
-  attribs <- option [] $ getAttribs <$> pSatisfy (matchTagOpen "tbody" [])
-             <* skipMany pBlank
+  mbattribs <- option Nothing $ Just . getAttribs <$>
+                 pSatisfy (matchTagOpen "tbody" []) <* skipMany pBlank
   bodyheads <- many (pHeaderRow block)
-  (rowheads, rows) <- unzip <$> many1 (pRow block <* skipMany pBlank)
+  (rowheads, rows) <- unzip <$> many (pRow block <* skipMany pBlank)
   optional $ pSatisfy (matchTagClose "tbody")
+  guard $ isJust mbattribs || not (null bodyheads && null rows)
+  let attribs = fromMaybe [] mbattribs
   return $ TableBody (toAttr attribs) (foldr max 0 rowheads) bodyheads rows
   where
     getAttribs (TagOpen _ attribs) = attribs
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index cd1093109..8e742a888 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -19,6 +19,7 @@ import Data.Char (isDigit)
 import Data.Maybe (fromMaybe)
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Text.Pandoc.Options
+import Control.Applicative ((<|>))
 import qualified Data.Scientific as Scientific
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Logging
@@ -76,7 +77,10 @@ cellToBlocks opts lang c = do
   let Source ts = cellSource c
   let source = mconcat ts
   let kvs = jsonMetaToPairs (cellMetadata c)
-  let attachments = maybe mempty M.toList $ cellAttachments c
+  let attachments = case cellAttachments c of
+                      Nothing -> mempty
+                      Just (MimeAttachments m) -> M.toList m
+  let ident = fromMaybe mempty $ cellId c
   mapM_ addAttachment attachments
   case cellType c of
     Ipynb.Markdown -> do
@@ -85,29 +89,34 @@ cellToBlocks opts lang c = do
                else do
                  Pandoc _ bs <- walk fixImage <$> readMarkdown opts source
                  return bs
-      return $ B.divWith ("",["cell","markdown"],kvs)
+      return $ B.divWith (ident,["cell","markdown"],kvs)
              $ B.fromList bs
     Ipynb.Heading lev -> do
       Pandoc _ bs <- readMarkdown opts
         (T.replicate lev "#" <> " " <> source)
-      return $ B.divWith ("",["cell","markdown"],kvs)
+      return $ B.divWith (ident,["cell","markdown"],kvs)
              $ B.fromList bs
     Ipynb.Raw -> do
       -- we use ipynb to indicate no format given (a wildcard in nbformat)
-      let format = fromMaybe "ipynb" $ lookup "format" kvs
+      let format = fromMaybe "ipynb" $ lookup "raw_mimetype" kvs <|> lookup "format" kvs
       let format' =
             case format of
-              "text/html"       -> "html"
-              "text/latex"      -> "latex"
-              "application/pdf" -> "latex"
-              "text/markdown"   -> "markdown"
-              "text/x-rsrt"     -> "rst"
-              _                 -> format
-      return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source
+              "text/html"             -> "html"
+              "slides"                -> "html"
+              "text/latex"            -> "latex"
+              "application/pdf"       -> "latex"
+              "pdf"                   -> "latex"
+              "text/markdown"         -> "markdown"
+              "text/x-rst"            -> "rst"
+              "text/restructuredtext" -> "rst"
+              "text/asciidoc"         -> "asciidoc"
+              _                       -> format
+      return $ B.divWith (ident,["cell","raw"],kvs)
+             $ B.rawBlock format' source
     Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
       outputBlocks <- mconcat <$> mapM outputToBlock outputs
       let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec
-      return $ B.divWith ("",["cell","code"],kvs') $
+      return $ B.divWith (ident,["cell","code"],kvs') $
         B.codeBlockWith ("",[lang],[]) source
         <> outputBlocks
 
@@ -156,7 +165,7 @@ outputToBlock Err{ errName = ename,
 -- the output format.
 handleData :: PandocMonad m
            => JSONMeta -> MimeBundle -> m B.Blocks
-handleData metadata (MimeBundle mb) =
+handleData (JSONMeta metadata) (MimeBundle mb) =
   mconcat <$> mapM dataBlock (M.toList mb)
 
   where
@@ -192,6 +201,9 @@ handleData metadata (MimeBundle mb) =
     dataBlock ("text/latex", TextualData t)
       = return $ B.rawBlock "latex" t
 
+    dataBlock ("text/markdown", TextualData t)
+      = return $ B.rawBlock "markdown" t
+
     dataBlock ("text/plain", TextualData t) =
       return $ B.codeBlock t
 
@@ -201,7 +213,7 @@ handleData metadata (MimeBundle mb) =
     dataBlock _ = return mempty
 
 jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue
-jsonMetaToMeta = M.map valueToMetaValue
+jsonMetaToMeta (JSONMeta m) = M.map valueToMetaValue m
   where
     valueToMetaValue :: Value -> MetaValue
     valueToMetaValue x@Object{} =
@@ -220,11 +232,11 @@ jsonMetaToMeta = M.map valueToMetaValue
     valueToMetaValue Aeson.Null = MetaString ""
 
 jsonMetaToPairs :: JSONMeta -> [(Text, Text)]
-jsonMetaToPairs = M.toList . M.map
+jsonMetaToPairs (JSONMeta m) = M.toList . M.map
   (\case
       String t
         | not (T.all isDigit t)
         , t /= "true"
         , t /= "false"
                  -> t
-      x          -> T.pack $ UTF8.toStringLazy $ Aeson.encode x)
+      x          -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) $ m
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 9cdbf1611..37e0d13bc 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -35,6 +35,7 @@ import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
 import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import qualified Data.Foldable as DF
 
 type JATS m = StateT JATSState m
 
@@ -226,9 +227,19 @@ parseBlock (Elem e) =
                                           mapM getInlines
                                           (filterChildren (const True) t)
                                         Nothing -> return mempty
-                         img <- getGraphic (Just (capt, attrValue "id" e)) g
-                         return $ para img
+
+                         let figAttributes = DF.toList $
+                              ("alt", ) . strContent <$>
+                              filterChild (named "alt-text") e
+
+                         return $ simpleFigureWith
+                          (attrValue "id" e, [], figAttributes)
+                          capt
+                          (attrValue "href" g)
+                          (attrValue "title" g)
+
                   _   -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e
+
          parseTable = do
                       let isCaption x = named "title" x || named "caption" x
                       capt <- case filterChild isCaption e of
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 27c018e73..20a2db76b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -390,8 +390,8 @@ inlineCommands = M.unions
                                unescapeURL .
                                removeDoubleQuotes $ untokenize src)
     -- hyperref
-    , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
-                    bracedUrl)
+    , ("url", (\url -> linkWith ("",["uri"],[]) url "" (str url))
+                        . unescapeURL . untokenize <$> bracedUrl)
     , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
     , ("href", do url <- bracedUrl
                   sp
@@ -893,7 +893,7 @@ blockCommands = M.fromList
          addMeta "bibliography" . splitBibs . untokenize))
    , ("addbibresource", mempty <$ (skipopts *> braced >>=
          addMeta "bibliography" . splitBibs . untokenize))
-   , ("endinput", mempty <$ skipMany anyTok)
+   , ("endinput", mempty <$ skipSameFileToks)
    -- includes
    , ("lstinputlisting", inputListing)
    , ("inputminted", inputMinted)
@@ -924,6 +924,10 @@ blockCommands = M.fromList
    , ("epigraph", epigraph)
    ]
 
+skipSameFileToks :: PandocMonad m => LP m ()
+skipSameFileToks = do
+    pos <- getPosition
+    skipMany $ infile (sourceName pos)
 
 environments :: PandocMonad m => M.Map Text (LP m Blocks)
 environments = M.union (tableEnvironments blocks inline) $
@@ -970,6 +974,7 @@ environments = M.union (tableEnvironments blocks inline) $
    , ("toggletrue", braced >>= setToggle True)
    , ("togglefalse", braced >>= setToggle False)
    , ("iftoggle", try $ ifToggle >> block)
+   , ("CSLReferences", braced >> braced >> env "CSLReferences" blocks)
    ]
 
 filecontents :: PandocMonad m => LP m Blocks
@@ -1109,24 +1114,28 @@ figure = try $ do
 
 addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
 addImageCaption = walkM go
-  where go (Image attr@(_, cls, kvs) alt (src,tit))
+  where go p@(Para [Image attr@(_, cls, kvs) _ (src, tit)])
             | not ("fig:" `T.isPrefixOf` tit) = do
           st <- getState
-          let (alt', tit') = case sCaption st of
-                               Just ils -> (toList ils, "fig:" <> tit)
-                               Nothing  -> (alt, tit)
-              attr' = case sLastLabel st of
-                        Just lab -> (lab, cls, kvs)
-                        Nothing  -> attr
-          case attr' of
-               ("", _, _)    -> return ()
-               (ident, _, _) -> do
-                  num <- getNextNumber sLastFigureNum
-                  setState
-                    st{ sLastFigureNum = num
-                      , sLabels = M.insert ident
-                                 [Str (renderDottedNum num)] (sLabels st) }
-          return $ Image attr' alt' (src, tit')
+          case sCaption st of
+            Nothing -> return p
+            Just figureCaption -> do
+              let mblabel = sLastLabel st
+              let attr' = case mblabel of
+                            Just lab -> (lab, cls, kvs)
+                            Nothing  -> attr
+              case attr' of
+                   ("", _, _)    -> return ()
+                   (ident, _, _) -> do
+                      num <- getNextNumber sLastFigureNum
+                      setState
+                        st{ sLastFigureNum = num
+                          , sLabels = M.insert ident
+                                     [Str (renderDottedNum num)] (sLabels st) }
+
+              return $ SimpleFigure attr'
+                       (maybe id removeLabel mblabel (B.toList figureCaption))
+                       (src, tit)
         go x = return x
 
 coloredBlock :: PandocMonad m => Text -> LP m Blocks
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 7b8bca4af..5938096fd 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -35,7 +35,7 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1, try)
+                            option, many1)
 import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList, fromMaybe)
@@ -56,8 +56,7 @@ dolabel = do
   let refstr = untokenize v
   updateState $ \st ->
     st{ sLastLabel = Just refstr }
-  return $ spanWith (refstr,[],[("label", refstr)])
-    $ inBrackets $ str $ untokenize v
+  return $ spanWith (refstr,[],[("label", refstr)]) mempty
 
 doref :: PandocMonad m => Text -> LP m Inlines
 doref cls = do
@@ -160,8 +159,8 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
 
 accentWith :: PandocMonad m
            => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
-accentWith tok combiningAccent fallBack = try $ do
-  ils <- tok
+accentWith tok combiningAccent fallBack = do
+  ils <- option mempty tok
   case toList ils of
        (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
          -- try to normalize to the combined character:
@@ -339,6 +338,7 @@ refCommands = M.fromList
   , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
   , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
   , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
+  , ("autoref", rawInlineOr "autoref" $ doref "autoref") -- from hyperref.sty
   ]
 
 acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
index 5495a8e74..d40277eb5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -15,6 +15,8 @@ import Control.Applicative ((<|>), optional)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.List.NonEmpty as NonEmpty
+import Data.List.NonEmpty (NonEmpty(..))
 
 macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
 macroDef constructor = do
@@ -23,51 +25,91 @@ macroDef constructor = do
       guardDisabled Ext_latex_macros)
      <|> return mempty
   where commandDef = do
-          nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
+          nameMacroPairs <- newcommand <|>
+            checkGlobal (letmacro <|> edefmacro <|> defmacro <|> newif)
           guardDisabled Ext_latex_macros <|>
-           mapM_ (\(name, macro') ->
-                   updateState (\s -> s{ sMacros = M.insert name macro'
-                                          (sMacros s) })) nameMacroPairs
+            mapM_ insertMacro nameMacroPairs
         environmentDef = do
           mbenv <- newenvironment
           case mbenv of
             Nothing -> return ()
             Just (name, macro1, macro2) ->
               guardDisabled Ext_latex_macros <|>
-                do updateState $ \s -> s{ sMacros =
-                    M.insert name macro1 (sMacros s) }
-                   updateState $ \s -> s{ sMacros =
-                    M.insert ("end" <> name) macro2 (sMacros s) }
+                do insertMacro (name, macro1)
+                   insertMacro ("end" <> name, macro2)
         -- @\newenvironment{envname}[n-args][default]{begin}{end}@
         -- is equivalent to
         -- @\newcommand{\envname}[n-args][default]{begin}@
         -- @\newcommand{\endenvname}@
 
+insertMacro :: PandocMonad m => (Text, Macro) -> LP m ()
+insertMacro (name, macro'@(Macro GlobalScope _ _ _ _)) =
+  updateState $ \s ->
+     s{ sMacros = NonEmpty.map (M.insert name macro') (sMacros s) }
+insertMacro (name, macro'@(Macro GroupScope _ _ _ _)) =
+  updateState $ \s ->
+     s{ sMacros = M.insert name macro' (NonEmpty.head (sMacros s)) :|
+                      NonEmpty.tail (sMacros s) }
+
+lookupMacro :: PandocMonad m => Text -> LP m Macro
+lookupMacro name = do
+   macros :| _ <- sMacros <$> getState
+   case M.lookup name macros of
+     Just m -> return m
+     Nothing -> fail "Macro not found"
+
 letmacro :: PandocMonad m => LP m [(Text, Macro)]
 letmacro = do
   controlSeq "let"
-  (name, contents) <- withVerbatimMode $ do
+  withVerbatimMode $ do
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     optional $ symbol '='
     spaces
     -- we first parse in verbatim mode, and then expand macros,
     -- because we don't want \let\foo\bar to turn into
     -- \let\foo hello if we have previously \def\bar{hello}
+    target <- anyControlSeq <|> singleChar
+    case target of
+      (Tok _ (CtrlSeq name') _) ->
+         (do m <- lookupMacro name'
+             pure [(name, m)])
+         <|> pure [(name,
+                    Macro GroupScope ExpandWhenDefined [] Nothing [target])]
+      _ -> pure [(name, Macro GroupScope ExpandWhenDefined [] Nothing [target])]
+
+checkGlobal :: PandocMonad m => LP m [(Text, Macro)] -> LP m [(Text, Macro)]
+checkGlobal p =
+  (controlSeq "global" *>
+      (map (\(n, Macro _ expand arg optarg contents) ->
+                (n, Macro GlobalScope expand arg optarg contents)) <$> p))
+   <|> p
+
+edefmacro :: PandocMonad m => LP m [(Text, Macro)]
+edefmacro = do
+  scope <- (GroupScope <$ controlSeq "edef")
+       <|> (GlobalScope <$ controlSeq "xdef")
+  (name, contents) <- withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    -- we first parse in verbatim mode, and then expand macros,
+    -- because we don't want \let\foo\bar to turn into
+    -- \let\foo hello if we have previously \def\bar{hello}
     contents <- bracedOrToken
     return (name, contents)
-  contents' <- doMacros' 0 contents
-  return [(name, Macro ExpandWhenDefined [] Nothing contents')]
+  -- expand macros
+  contents' <- parseFromToks (many anyTok) contents
+  return [(name, Macro scope ExpandWhenDefined [] Nothing contents')]
 
 defmacro :: PandocMonad m => LP m [(Text, Macro)]
 defmacro = do
   -- we use withVerbatimMode, because macros are to be expanded
   -- at point of use, not point of definition
-  controlSeq "def"
+  scope <- (GroupScope <$ controlSeq "def")
+       <|> (GlobalScope <$ controlSeq "gdef")
   withVerbatimMode $ do
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     argspecs <- many (argspecArg <|> argspecPattern)
     contents <- bracedOrToken
-    return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+    return [(name, Macro scope ExpandWhenUsed argspecs Nothing contents)]
 
 -- \newif\iffoo' defines:
 -- \iffoo to be \iffalse
@@ -82,16 +124,16 @@ newif = do
     -- \def\footrue{\def\iffoo\iftrue}
     -- \def\foofalse{\def\iffoo\iffalse}
     let base = T.drop 2 name
-    return [ (name, Macro ExpandWhenUsed [] Nothing
+    return [ (name, Macro GroupScope ExpandWhenUsed [] Nothing
                     [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
            , (base <> "true",
-                   Macro ExpandWhenUsed [] Nothing
+                   Macro GroupScope ExpandWhenUsed [] Nothing
                    [ Tok pos (CtrlSeq "def") "\\def"
                    , Tok pos (CtrlSeq name) ("\\" <> name)
                    , Tok pos (CtrlSeq "iftrue") "\\iftrue"
                    ])
            , (base <> "false",
-                   Macro ExpandWhenUsed [] Nothing
+                   Macro GroupScope ExpandWhenUsed [] Nothing
                    [ Tok pos (CtrlSeq "def") "\\def"
                    , Tok pos (CtrlSeq name) ("\\" <> name)
                    , Tok pos (CtrlSeq "iffalse") "\\iffalse"
@@ -138,14 +180,13 @@ newcommand = do
                  : (contents' ++
                    [ Tok pos Symbol "}", Tok pos Symbol "}" ])
               _                     -> contents'
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-        Just macro
-          | mtype == "newcommand" -> do
-              report $ MacroAlreadyDefined txt pos
-              return [(name, macro)]
-          | mtype == "providecommand" -> return [(name, macro)]
-        _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
+    let macro = Macro GroupScope ExpandWhenUsed argspecs optarg contents
+    (do lookupMacro name
+        case mtype of
+          "providecommand" -> return []
+          "renewcommand" -> return [(name, macro)]
+          _ -> [] <$ report (MacroAlreadyDefined txt pos))
+      <|> pure [(name, macro)]
 
 newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
 newenvironment = do
@@ -164,17 +205,23 @@ newenvironment = do
     let argspecs = map (\i -> ArgNum i) [1..numargs]
     startcontents <- spaces >> bracedOrToken
     endcontents <- spaces >> bracedOrToken
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-         Just _
-           | mtype == "newenvironment" -> do
-               report $ MacroAlreadyDefined name pos
-               return Nothing
-           | mtype == "provideenvironment" ->
-               return Nothing
-         _ -> return $ Just (name,
-                      Macro ExpandWhenUsed argspecs optarg startcontents,
-                      Macro ExpandWhenUsed [] Nothing endcontents)
+    -- we need the environment to be in a group so macros defined
+    -- inside behave correctly:
+    let bg = Tok pos (CtrlSeq "bgroup") "\\bgroup "
+    let eg = Tok pos (CtrlSeq "egroup") "\\egroup "
+    let result = (name,
+                    Macro GroupScope ExpandWhenUsed argspecs optarg
+                      (bg:startcontents),
+                    Macro GroupScope ExpandWhenUsed [] Nothing
+                      (endcontents ++ [eg]))
+    (do lookupMacro name
+        case mtype of
+          "provideenvironment" -> return Nothing
+          "renewenvironment" -> return (Just result)
+          _ -> do
+             report $ MacroAlreadyDefined name pos
+             return Nothing)
+      <|> return (Just result)
 
 bracketedNum :: PandocMonad m => LP m Int
 bracketedNum = do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
index 5b49a0376..01edce7ed 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Math.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -142,14 +142,15 @@ newtheorem inline = do
 theoremEnvironment :: PandocMonad m
                    => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
 theoremEnvironment blocks opt name = do
+  resetCaption
   tmap <- sTheoremMap <$> getState
   case M.lookup name tmap of
     Nothing -> mzero
     Just tspec -> do
        optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
-       mblabel <- option Nothing $ Just . untokenize <$>
-                   try (spaces >> controlSeq "label" >> spaces >> braced)
        bs <- env name blocks
+       mblabel <- sLastLabel <$> getState
+
        number <-
          if theoremNumber tspec
             then do
@@ -169,9 +170,7 @@ theoremEnvironment blocks opt name = do
                  Just ident ->
                    updateState $ \s ->
                      s{ sLabels = M.insert ident
-                         (B.toList $
-                           theoremName tspec <> "\160" <>
-                           str (renderDottedNum num)) (sLabels s) }
+                         (B.toList $ str (renderDottedNum num)) (sLabels s) }
                  Nothing -> return ()
                return $ space <> B.text (renderDottedNum num)
             else return mempty
@@ -181,13 +180,14 @@ theoremEnvironment blocks opt name = do
                          RemarkStyle     -> B.emph
        let title = titleEmph (theoremName tspec <> number)
                       <> optTitle <> "." <> space
-       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+       return $ divWith (fromMaybe "" mblabel, [name], [])
+              $ addTitle title
+              $ maybe id removeLabel mblabel
               $ case theoremStyle tspec of
                   PlainStyle -> walk italicize bs
                   _          -> bs
 
 
-
 proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
 proof blocks opt = do
   title <- option (B.text "Proof") opt
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 9dac4d6ef..9eb4a0cbc 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -45,6 +45,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , isNewlineTok
   , isWordTok
   , isArgTok
+  , infile
   , spaces
   , spaces1
   , tokTypeIn
@@ -89,6 +90,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , resetCaption
   , env
   , addMeta
+  , removeLabel
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -102,6 +104,9 @@ import qualified Data.IntMap as IntMap
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import Data.List.NonEmpty (NonEmpty(..))
+import qualified Data.List.NonEmpty as NonEmpty
 import qualified Data.Text as T
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -115,6 +120,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Shared
 import Text.Parsec.Pos
+import Text.Pandoc.Walk
 
 newtype DottedNum = DottedNum [Int]
   deriving (Show, Eq)
@@ -146,7 +152,7 @@ data TheoremSpec =
 data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sMeta          :: Meta
                             , sQuoteContext  :: QuoteContext
-                            , sMacros        :: M.Map Text Macro
+                            , sMacros        :: NonEmpty (M.Map Text Macro)
                             , sContainers    :: [Text]
                             , sLogMessages   :: [LogMessage]
                             , sIdentifiers   :: Set.Set Text
@@ -173,7 +179,7 @@ defaultLaTeXState :: LaTeXState
 defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sMeta          = nullMeta
                               , sQuoteContext  = NoQuote
-                              , sMacros        = M.empty
+                              , sMacros        = M.empty :| []
                               , sContainers    = []
                               , sLogMessages   = []
                               , sIdentifiers   = Set.empty
@@ -220,8 +226,9 @@ instance HasIncludeFiles LaTeXState where
   dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
 
 instance HasMacros LaTeXState where
-  extractMacros  st  = sMacros st
-  updateMacros f st  = st{ sMacros = f (sMacros st) }
+  extractMacros  st  = NonEmpty.head $ sMacros st
+  updateMacros f st  = st{ sMacros = f (NonEmpty.head (sMacros st))
+                                     :| NonEmpty.tail (sMacros st) }
 
 instance HasReaderOptions LaTeXState where
   extractReaderOptions = sOptions
@@ -254,7 +261,7 @@ rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
-  let lstate' = lstate { sMacros = extractMacros pstate }
+  let lstate' = lstate { sMacros = extractMacros pstate :| [] }
   let setStartPos = case toks of
                       Tok pos _ _ : _ -> setPosition pos
                       _ -> return ()
@@ -267,14 +274,14 @@ rawLaTeXParser toks retokenize parser valParser = do
        Right (endpos, toks') -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
-                                        ts <- many (satisfyTok (const True))
+                                        ts <- many anyTok
                                         setInput ts
                                       rawparser)
                         lstate' "chunk" toks'
          case res of
               Left _    -> mzero
               Right ((val, raw), st) -> do
-                updateState (updateMacros (sMacros st <>))
+                updateState (updateMacros ((NonEmpty.head (sMacros st)) <>))
                 let skipTilPos stopPos = do
                       anyChar
                       pos <- getPosition
@@ -296,10 +303,10 @@ rawLaTeXParser toks retokenize parser valParser = do
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
             => Text -> ParserT Sources s m Text
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
-   do let retokenize = untokenize <$> many (satisfyTok (const True))
+   do let retokenize = untokenize <$> many anyTok
       pstate <- getState
       let lstate = def{ sOptions = extractReaderOptions pstate
-                      , sMacros  = extractMacros pstate }
+                      , sMacros  = extractMacros pstate :| [] }
       res <- runParserT retokenize lstate "math" (tokenize "math" s)
       case res of
            Left e   -> Prelude.fail (show e)
@@ -552,10 +559,10 @@ doMacros' n inp =
     handleMacros n' spos name ts = do
       when (n' > 20)  -- detect macro expansion loops
         $ throwError $ PandocMacroLoop name
-      macros <- sMacros <$> getState
+      (macros :| _ ) <- sMacros <$> getState
       case M.lookup name macros of
            Nothing -> trySpecialMacro name ts
-           Just (Macro expansionPoint argspecs optarg newtoks) -> do
+           Just (Macro _scope expansionPoint argspecs optarg newtoks) -> do
              let getargs' = do
                    args <-
                      (case expansionPoint of
@@ -642,6 +649,9 @@ isArgTok :: Tok -> Bool
 isArgTok (Tok _ (Arg _) _) = True
 isArgTok _                 = False
 
+infile :: PandocMonad m => SourceName -> LP m Tok
+infile reference = satisfyTok (\(Tok source _ _) -> (sourceName source) == reference)
+
 spaces :: PandocMonad m => LP m ()
 spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
 
@@ -745,10 +755,22 @@ primEscape = do
 bgroup :: PandocMonad m => LP m Tok
 bgroup = try $ do
   optional sp
-  symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+  t <- symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+  -- Add a copy of the macro table to the top of the macro stack,
+  -- private for this group. We inherit all the macros defined in
+  -- the parent group.
+  updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s))
+                                                 (sMacros s) }
+  return t
+
 
 egroup :: PandocMonad m => LP m Tok
-egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+egroup = do
+  t <- symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+  -- remove the group's macro table from the stack
+  updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $
+      NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) }
+  return t
 
 grouped :: (PandocMonad m,  Monoid a) => LP m a -> LP m a
 grouped parser = try $ do
@@ -921,6 +943,9 @@ getRawCommand name txt = do
              void $ count 4 braced
            "def" ->
              void $ manyTill anyTok braced
+           "vadjust" ->
+             void (manyTill anyTok braced) <|>
+                void (satisfyTok isPreTok) -- see #7531
            _ | isFontSizeCommand name -> return ()
              | otherwise -> do
                skipopts
@@ -928,6 +953,10 @@ getRawCommand name txt = do
                void $ many braced
   return $ txt <> untokenize rawargs
 
+isPreTok :: Tok -> Bool
+isPreTok (Tok _ Word "pre") = True
+isPreTok _ = False
+
 isDigitTok :: Tok -> Bool
 isDigitTok (Tok _ Word t) = T.all isDigit t
 isDigitTok _              = False
@@ -1017,7 +1046,16 @@ resetCaption = updateState $ \st -> st{ sCaption   = Nothing
                                       , sLastLabel = Nothing }
 
 env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
+env name p = do
+  -- environments are groups as far as macros are concerned,
+  -- so we need a local copy of the macro table (see above, bgroup, egroup):
+  updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s))
+                                                 (sMacros s) }
+  result <- p
+  updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $
+      NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) }
+  end_ name
+  return result
 
 tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
 tokWith inlineParser = try $ spaces >>
@@ -1031,3 +1069,16 @@ tokWith inlineParser = try $ spaces >>
 addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
 addMeta field val = updateState $ \st ->
    st{ sMeta = addMetaField field val $ sMeta st }
+
+-- remove label spans to avoid duplicated identifier
+removeLabel :: Walkable [Inline] a => Text -> a -> a
+removeLabel lbl = walk go
+ where
+  go (Span (_,_,kvs) _ : rest)
+    | Just lbl' <- lookup "label" kvs
+    , lbl' == lbl = go (dropWhile isSpaceOrSoftBreak rest)
+  go (x:xs) = x : go xs
+  go [] = []
+  isSpaceOrSoftBreak Space = True
+  isSpaceOrSoftBreak SoftBreak = True
+  isSpaceOrSoftBreak _ = False
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index b8bf0ce7f..e4738a763 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -23,11 +23,15 @@ siunitxCommands :: PandocMonad m
                  => LP m Inlines -> M.Map Text (LP m Inlines)
 siunitxCommands tok = M.fromList
   [ ("si", dosi tok)
+  , ("unit", dosi tok) -- v3 version of si
   , ("SI", doSI tok)
+  , ("qty", doSI tok) -- v3 version of SI
   , ("SIrange", doSIrange True tok)
+  , ("qtyrange", doSIrange True tok) -- v3 version of SIrange
+  , ("SIlist", doSIlist tok)
+  , ("qtylist", doSIlist tok) -- v3 version of SIlist
   , ("numrange", doSIrange False tok)
   , ("numlist", doSInumlist)
-  , ("SIlist", doSIlist tok)
   , ("num", doSInum)
   , ("ang", doSIang)
   ]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
index f56728fe1..7d5c4f265 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -368,7 +368,9 @@ addTableCaption = walkM go
                         ((_,classes,kvs), Just ident) ->
                            (ident,classes,kvs)
                         _ -> attr
-          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+          return $ addAttrDiv attr'
+                 $ maybe id removeLabel mblabel
+                 $ Table nullAttr capt spec th tb tf
         go x = return x
 
 -- TODO: For now we add a Div to contain table attributes, since
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index c20b72bc5..a4eae56db 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        , Macro(..)
                                        , ArgSpec(..)
                                        , ExpansionPoint(..)
+                                       , MacroScope(..)
                                        , SourcePos
                                        )
 where
@@ -43,7 +44,10 @@ tokToText (Tok _ _ t) = t
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
-data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
+data MacroScope = GlobalScope | GroupScope
+  deriving (Eq, Ord, Show)
+
+data Macro = Macro MacroScope ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
      deriving Show
 
 data ArgSpec = ArgNum Int | Pattern [Tok]
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2dc7ddf52..b5017a433 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts    #-}
 {-# LANGUAGE TupleSections       #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ViewPatterns        #-}
@@ -21,14 +22,14 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
+import Text.DocLayout (realLength)
 import Data.List (transpose, elemIndex, sortOn, foldl')
-import Data.List.NonEmpty (NonEmpty(..))
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as BS
 import System.FilePath (addExtension, takeExtension, takeDirectory)
 import qualified System.FilePath.Windows as Windows
 import qualified System.FilePath.Posix as Posix
@@ -39,6 +40,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report)
 import Text.Pandoc.Definition as Pandoc
 import Text.Pandoc.Emoji (emojiToInline)
 import Text.Pandoc.Error
+import Safe.Foldable (maximumBounded)
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Walk (walk)
@@ -72,14 +74,12 @@ readMarkdown opts s = do
 yamlToMeta :: PandocMonad m
            => ReaderOptions
            -> Maybe FilePath
-           -> BL.ByteString
+           -> BS.ByteString
            -> m Meta
 yamlToMeta opts mbfp bstr = do
   let parser = do
         oldPos <- getPosition
-        case mbfp of
-          Nothing -> return ()
-          Just fp -> setPosition $ initialPos fp
+        setPosition $ initialPos (fromMaybe "" mbfp)
         meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
         setPosition oldPos
         return $ runF meta defaultParserState
@@ -95,7 +95,7 @@ yamlToRefs :: PandocMonad m
            => (Text -> Bool)
            -> ReaderOptions
            -> Maybe FilePath
-           -> BL.ByteString
+           -> BS.ByteString
            -> m [MetaValue]
 yamlToRefs idpred opts mbfp bstr = do
   let parser = do
@@ -198,6 +198,7 @@ inlinesInBalancedBrackets =
         go openBrackets =
           (() <$ (escapedChar <|>
                 code <|>
+                math <|>
                 rawHtmlInline <|>
                 rawLaTeXInline') >> go openBrackets)
           <|>
@@ -326,6 +327,7 @@ referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
 referenceKey = try $ do
   pos <- getPosition
   skipNonindentSpaces
+  notFollowedBy (void cite)
   (_,raw) <- reference
   char ':'
   skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
@@ -829,7 +831,7 @@ listLineCommon :: PandocMonad m => MarkdownParser m Text
 listLineCommon = T.concat <$> manyTill
               (  many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
              <|> fmap snd (withRaw code)
-             <|> fmap snd (htmlTag isCommentTag)
+             <|> fmap (renderTags . (:[]) . fst) (htmlTag isCommentTag)
              <|> countChar 1 anyChar
               ) newline
 
@@ -1013,19 +1015,18 @@ normalDefinitionList = do
 para :: PandocMonad m => MarkdownParser m (F Blocks)
 para = try $ do
   exts <- getOption readerExtensions
-  let implicitFigures x
-       | extensionEnabled Ext_implicit_figures exts = do
-         x' <- x
-         case B.toList x' of
-               [Image attr alt (src,tit)]
-                 | not (null alt) ->
-                    -- the fig: at beginning of title indicates a figure
-                    return $ B.singleton
-                           $ Image attr alt (src, "fig:" <> tit)
-               _ -> return x'
-       | otherwise = x
-  result <- implicitFigures . trimInlinesF <$> inlines1
-  option (B.plain <$> result)
+
+  result <- trimInlinesF <$> inlines1
+  let figureOr constr inlns =
+        case B.toList inlns of
+          [Image attr figCaption (src, tit)]
+            | extensionEnabled Ext_implicit_figures exts
+            , not (null figCaption) -> do
+                B.simpleFigureWith attr (B.fromList figCaption) src tit
+
+          _ -> constr inlns
+
+  option (figureOr B.plain <$> result)
     $ try $ do
             newline
             (mempty <$ blanklines)
@@ -1047,7 +1048,7 @@ para = try $ do
                      if divLevel > 0
                         then lookAhead divFenceEnd
                         else mzero
-            return $ B.para <$> result
+            return $ figureOr B.para <$> result
 
 plain :: PandocMonad m => MarkdownParser m (F Blocks)
 plain = fmap B.plain . trimInlinesF <$> inlines1
@@ -1124,7 +1125,12 @@ rawHtmlBlocks = do
   let selfClosing = "/>" `T.isSuffixOf` raw
   -- we don't want '<td>    text' to be a code block:
   skipMany spaceChar
-  indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
+  tabStop <- getOption readerTabStop
+  indentlevel <- option 0 $
+                 do blankline
+                    sum <$> many ( (1 <$ char ' ')
+                                   <|>
+                                   (tabStop <$ char '\t') )
   -- try to find closing tag
   -- we set stateInHtmlBlock so that closing tags that can be either block or
   -- inline will not be parsed as inline tags
@@ -1355,26 +1361,30 @@ pipeTable = try $ do
   nonindentSpaces
   lookAhead nonspaceChar
   (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
-  let heads' = take (length aligns) <$> heads
+  let cellContents = parseFromString' pipeTableCell . trim
+  let numcols = length aligns
+  let heads' = take numcols heads
   lines' <- many pipeTableRow
-  let lines'' = map (take (length aligns) <$>) lines'
-  let maxlength = maximum $
-       fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
-  numColumns <- getOption readerColumns
-  let widths = if maxlength > numColumns
+  let lines'' = map (take numcols) lines'
+  let lineWidths = map (sum . map realLength) (heads' : lines'')
+  columns <- getOption readerColumns
+  -- add numcols + 1 for the pipes themselves
+  let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns
                   then map (\len ->
                          fromIntegral len / fromIntegral (sum seplengths))
                          seplengths
                   else replicate (length aligns) 0.0
-  return (aligns, widths, toHeaderRow <$> heads', map toRow <$> sequence lines'')
+  (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads'
+  (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines''
+  return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows)
 
 sepPipe :: PandocMonad m => MarkdownParser m ()
 sepPipe = try $ do
   char '|' <|> char '+'
   notFollowedBy blankline
 
--- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
+-- parse a row, returning raw cell contents
+pipeTableRow :: PandocMonad m => MarkdownParser m [Text]
 pipeTableRow = try $ do
   scanForPipe
   skipMany spaceChar
@@ -1382,13 +1392,11 @@ pipeTableRow = try $ do
   -- split into cells
   let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
        <|> void (noneOf "|\n\r")
-  let cellContents = withRaw (many chunk) >>=
-        parseFromString' pipeTableCell . trim . snd
-  cells <- cellContents `sepEndBy1` char '|'
+  cells <- (snd <$> withRaw (many chunk)) `sepEndBy1` char '|'
   -- surrounding pipes needed for a one-column table:
   guard $ not (length cells == 1 && not openPipe)
   blankline
-  return $ sequence cells
+  return cells
 
 pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
 pipeTableCell =
@@ -1692,21 +1700,29 @@ strikeout = fmap B.strikeout <$>
 
 superscript :: PandocMonad m => MarkdownParser m (F Inlines)
 superscript = do
-  guardEnabled Ext_superscript
   fmap B.superscript <$> try (do
     char '^'
-    mconcat <$> many1Till (do notFollowedBy spaceChar
-                              notFollowedBy newline
-                              inline) (char '^'))
+    mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript))
+      where regularSuperscript = many1Till (do guardEnabled Ext_superscript
+                                               notFollowedBy spaceChar
+                                               notFollowedBy newline
+                                               inline) (char '^')
+            mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts
+                                     result <- T.pack <$> many1 alphaNum
+                                     return $ return $ return $ B.str result
 
 subscript :: PandocMonad m => MarkdownParser m (F Inlines)
 subscript = do
-  guardEnabled Ext_subscript
   fmap B.subscript <$> try (do
     char '~'
-    mconcat <$> many1Till (do notFollowedBy spaceChar
-                              notFollowedBy newline
-                              inline) (char '~'))
+    mconcat <$> (try regularSubscript <|> mmdShortSubscript))
+      where regularSubscript = many1Till (do guardEnabled Ext_subscript
+                                             notFollowedBy spaceChar
+                                             notFollowedBy newline
+                                             inline) (char '~')
+            mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts
+                                   result <- T.pack <$> many1 alphaNum
+                                   return $ return $ return $ B.str result
 
 whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
 whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
@@ -1768,7 +1784,6 @@ endline = try $ do
 reference :: PandocMonad m => MarkdownParser m (F Inlines, Text)
 reference = do
   guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
-  guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
   withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
 
 parenthesizedChars :: PandocMonad m => MarkdownParser m Text
@@ -2187,6 +2202,7 @@ normalCite = try $ do
   citations <- citeList
   spnl
   char ']'
+  notFollowedBy (oneOf "{([")  -- not a link or a bracketed span
   return citations
 
 suffix :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -2200,7 +2216,7 @@ suffix = try $ do
 
 prefix :: PandocMonad m => MarkdownParser m (F Inlines)
 prefix = trimInlinesF . mconcat <$>
-  manyTill inline (char ']'
+  manyTill (notFollowedBy (char ';') >> inline) (char ']'
    <|> lookAhead
          (try $ do optional (try (char ';' >> spnl))
                    citeKey True
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 825e4a2eb..9348a8053 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -201,7 +201,12 @@ para = do
   contents <- trimInlines . mconcat <$> many1 inline
   if F.all (==Space) contents
      then return mempty
-     else return $ B.para contents
+     else case B.toList contents of
+         -- For the MediaWiki format all images are considered figures
+         [Image attr figureCaption (src, title)] ->
+             return $ B.simpleFigureWith
+                 attr (B.fromList figureCaption) src title
+         _ -> return $ B.para contents
 
 table :: PandocMonad m => MWParser m Blocks
 table = do
@@ -631,7 +636,7 @@ image = try $ do
   let attr = ("", [], kvs)
   caption <-   (B.str fname <$ sym "]]")
            <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
-  return $ B.imageWith attr fname ("fig:" <> stringify caption) caption
+  return $ B.imageWith attr fname (stringify caption) caption
 
 imageOption :: PandocMonad m => MWParser m Text
 imageOption = try $ char '|' *> opt
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cbc523b25..7991dca5c 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -17,102 +17,62 @@ module Text.Pandoc.Readers.Metadata (
   yamlMetaBlock,
   yamlMap ) where
 
-import Control.Monad
+
 import Control.Monad.Except (throwError)
-import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as B
 import qualified Data.Map as M
-import Data.Maybe
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.YAML as YAML
-import qualified Data.YAML.Event as YE
+import qualified Data.Yaml as Yaml
+import Data.Aeson (Value(..), Object, Result(..), fromJSON, (.:?), withObject)
+import Data.Aeson.Types (parse)
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
-import Text.Pandoc.Definition
+import Text.Pandoc.Definition hiding (Null)
 import Text.Pandoc.Error
-import Text.Pandoc.Parsing hiding (tableWith)
-import Text.Pandoc.Shared
-import qualified Data.Text.Lazy as TL
+import Text.Pandoc.Parsing hiding (tableWith, parse)
+
+
 import qualified Text.Pandoc.UTF8 as UTF8
 
 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
              => ParserT Sources st m (Future st MetaValue)
-             -> BL.ByteString
+             -> B.ByteString
              -> ParserT Sources st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
-  case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
-       Right (YAML.Doc (YAML.Mapping _ _ o):_)
-                -> fmap Meta <$> yamlMap pMetaValue o
+  case Yaml.decodeAllEither' bstr of
+       Right (Object o:_) -> fmap Meta <$> yamlMap pMetaValue o
        Right [] -> return . return $ mempty
-       Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
-                -> return . return $ mempty
-       -- the following is what we get from a comment:
-       Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))]
-                -> return . return $ mempty
+       Right [Null] -> return . return $ mempty
        Right _  -> Prelude.fail "expected YAML object"
-       Left (yamlpos, err')
-                -> do pos <- getPosition
-                      setPosition $ incSourceLine
-                            (setSourceColumn pos (YE.posColumn yamlpos))
-                            (YE.posLine yamlpos - 1)
-                      Prelude.fail err'
-
-fakePos :: YAML.Pos
-fakePos = YAML.Pos (-1) (-1) 1 0
-
-lookupYAML :: Text
-           -> YAML.Node YE.Pos
-           -> Maybe (YAML.Node YE.Pos)
-lookupYAML t (YAML.Mapping _ _ m) =
-  M.lookup (YAML.Scalar fakePos (YAML.SUnknown YE.untagged t)) m
-    `mplus`
-    M.lookup (YAML.Scalar fakePos (YAML.SStr t)) m
-lookupYAML _ _ = Nothing
+       Left err' -> do
+         throwError $ PandocParseError
+                    $ T.pack $ Yaml.prettyPrintParseException err'
 
 -- Returns filtered list of references.
 yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
              => ParserT Sources st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
-             -> BL.ByteString
+             -> B.ByteString
              -> ParserT Sources st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
-  case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
-       Right (YAML.Doc o@YAML.Mapping{}:_)
-                -> case lookupYAML "references" o of
-                     Just (YAML.Sequence _ _ ns) -> do
-                       let g n = case lookupYAML "id" n of
-                                    Just n' ->
-                                      case nodeToKey n' of
-                                        Nothing -> False
-                                        Just t -> idpred t ||
-                                          case lookupYAML "other-ids" n of
-                                            Just (YAML.Sequence _ _ ns') ->
-                                              let ts' = mapMaybe nodeToKey ns'
-                                               in any idpred ts'
-                                            _ -> False
-                                    Nothing   -> False
-                       sequence <$>
-                         mapM (yamlToMetaValue pMetaValue) (filter g ns)
-                     Just _ ->
-                       Prelude.fail "expecting sequence in 'references' field"
-                     Nothing ->
-                       Prelude.fail "expecting 'references' field"
-
-       Right [] -> return . return $ mempty
-       Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
-                -> return . return $ mempty
-       Right _  -> Prelude.fail "expecting YAML object"
-       Left (yamlpos, err')
-                -> do pos <- getPosition
-                      setPosition $ incSourceLine
-                            (setSourceColumn pos (YE.posColumn yamlpos))
-                            (YE.posLine yamlpos - 1)
-                      Prelude.fail err'
-
-
-nodeToKey :: YAML.Node YE.Pos -> Maybe Text
-nodeToKey (YAML.Scalar _ (YAML.SStr t))       = Just t
-nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
-nodeToKey _                                   = Nothing
+  case Yaml.decodeAllEither' bstr of
+       Right (Object m : _) -> do
+         let isSelected (String t) = idpred t
+             isSelected _ = False
+         let hasSelectedId (Object o) =
+               case parse (withObject "ref" (.:? "id")) (Object o) of
+                 Success (Just id') -> isSelected id'
+                 _ -> False
+             hasSelectedId _ = False
+         case parse (withObject "metadata" (.:? "references")) (Object m) of
+           Success (Just refs) -> sequence <$>
+                 mapM (yamlToMetaValue pMetaValue) (filter hasSelectedId refs)
+           _ -> return $ return []
+       Right _ -> return . return $ []
+       Left err' -> do
+         throwError $ PandocParseError
+                    $ T.pack $ Yaml.prettyPrintParseException err'
 
 normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
                    => ParserT Sources st m (Future st MetaValue)
@@ -133,47 +93,36 @@ normalizeMetaValue pMetaValue x =
         isSpaceChar '\t' = True
         isSpaceChar _    = False
 
-checkBoolean :: Text -> Maybe Bool
-checkBoolean t
-  | t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE" = Just True
-  | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False
-  | otherwise = Nothing
-
 yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
                 => ParserT Sources st m (Future st MetaValue)
-                -> YAML.Node YE.Pos
+                -> Value
                 -> ParserT Sources st m (Future st MetaValue)
-yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
-  case x of
-       YAML.SStr t       -> normalizeMetaValue pMetaValue t
-       YAML.SBool b      -> return $ return $ MetaBool b
-       YAML.SFloat d     -> return $ return $ MetaString $ tshow d
-       YAML.SInt i       -> return $ return $ MetaString $ tshow i
-       YAML.SUnknown _ t ->
-         case checkBoolean t of
-           Just b        -> return $ return $ MetaBool b
-           Nothing       -> normalizeMetaValue pMetaValue t
-       YAML.SNull        -> return $ return $ MetaString ""
-
-yamlToMetaValue pMetaValue (YAML.Sequence _ _ xs) =
-  fmap MetaList . sequence
-  <$> mapM (yamlToMetaValue pMetaValue) xs
-yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
-  fmap MetaMap <$> yamlMap pMetaValue o
-yamlToMetaValue _ _ = return $ return $ MetaString ""
+yamlToMetaValue pMetaValue v =
+  case v of
+       String t -> normalizeMetaValue pMetaValue t
+       Bool b -> return $ return $ MetaBool b
+       Number d -> normalizeMetaValue pMetaValue $
+         case fromJSON v of
+           Success (x :: Int) -> tshow x
+           _ -> tshow d
+       Null -> return $ return $ MetaString ""
+       Array{} -> do
+         case fromJSON v of
+           Error err' -> throwError $ PandocParseError $ T.pack err'
+           Success xs -> fmap MetaList . sequence <$>
+                          mapM (yamlToMetaValue pMetaValue) xs
+       Object o -> fmap MetaMap <$> yamlMap pMetaValue o
 
 yamlMap :: (PandocMonad m, HasLastStrPosition st)
         => ParserT Sources st m (Future st MetaValue)
-        -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
+        -> Object
         -> ParserT Sources st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
-    kvs <- forM (M.toList o) $ \(key, v) -> do
-             k <- maybe (throwError $ PandocParseError
-                            "Non-string key in YAML mapping")
-                        return $ nodeToKey key
-             return (k, v)
-    let kvs' = filter (not . ignorable . fst) kvs
-    fmap M.fromList . sequence <$> mapM toMeta kvs'
+    case fromJSON (Object o) of
+      Error err' -> throwError $ PandocParseError $ T.pack err'
+      Success (m' :: M.Map Text Value) -> do
+        let kvs = filter (not . ignorable . fst) $ M.toList m'
+        fmap M.fromList . sequence <$> mapM toMeta kvs
   where
     ignorable t = "_" `T.isSuffixOf` t
     toMeta (k, v) = do
@@ -194,7 +143,7 @@ yamlMetaBlock parser = try $ do
   -- by including --- and ..., we allow yaml blocks with just comments:
   let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
   optional blanklines
-  yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+  yamlBsToMeta parser $ UTF8.fromText rawYaml
 
 stopLine :: Monad m => ParserT Sources st m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index f18d2f9a7..9a689b0e8 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -474,15 +474,16 @@ figure = try $ do
        figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
        figKeyVals = blockAttrKeyValues figAttrs
        attr       = (figLabel, mempty, figKeyVals)
-       figTitle   = (if isFigure then withFigPrefix else id) figName
-     in
-       B.para . B.imageWith attr imgSrc figTitle <$> figCaption
-
-   withFigPrefix :: Text -> Text
-   withFigPrefix cs =
-     if "fig:" `T.isPrefixOf` cs
-     then cs
-     else "fig:" <> cs
+     in if isFigure
+           then (\c ->
+               B.simpleFigureWith
+                   attr c imgSrc (unstackFig figName)) <$> figCaption
+           else B.para . B.imageWith attr imgSrc figName <$> figCaption
+   unstackFig :: Text -> Text
+   unstackFig figName =
+       if "fig:" `T.isPrefixOf` figName
+           then T.drop 4 figName
+           else figName
 
 -- | Succeeds if looking at the end of the current paragraph
 endOfParagraph :: Monad m => OrgParser m ()
@@ -889,7 +890,10 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do
   firstLine <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
   rest <- T.concat <$> many (listContinuation markerLength)
-  contents <- parseFromString blocks $ firstLine <> blank <> rest
+  contents <- parseFromString (do initial <- paraOrPlain <|> pure mempty
+                                  subsequent <- blocks
+                                  return $ initial <> subsequent)
+                (firstLine <> blank <> rest)
   return (maybe id (prependInlines . checkboxToInlines) box <$> contents)
 
 -- | Prepend inlines to blocks, adding them to the first paragraph or
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 2dcbecb1d..1c4f253cc 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -41,6 +41,7 @@ documentTree :: PandocMonad m
              -> OrgParser m (F Inlines)
              -> OrgParser m (F Headline)
 documentTree blocks inline = do
+  properties <- option mempty propertiesDrawer
   initialBlocks <- blocks
   headlines <- sequence <$> manyTill (headline blocks inline 1) eof
   title <- fmap docTitle . orgStateMeta <$> getState
@@ -54,7 +55,7 @@ documentTree blocks inline = do
       , headlineText = B.fromList title'
       , headlineTags = mempty
       , headlinePlanning = emptyPlanning
-      , headlineProperties = mempty
+      , headlineProperties = properties
       , headlineContents = initialBlocks'
       , headlineChildren = headlines'
       }
@@ -163,8 +164,15 @@ unprunedHeadlineToBlocks hdln st =
   in if not usingSelectedTags ||
         any (`Set.member` orgStateSelectTags st) (headlineTags rootNode')
         then do headlineBlocks <- headlineToBlocks rootNode'
+                -- add metadata from root node :PROPERTIES:
+                updateState $ \s ->
+                  s{ orgStateMeta = foldr
+                    (\(PropertyKey k, PropertyValue v) m ->
+                        B.setMeta k v <$> m)
+                    (orgStateMeta s)
+                    (headlineProperties rootNode') }
                 -- ignore first headline, it's the document's title
-                return . drop 1 . B.toList $ headlineBlocks
+                return $ drop 1 $ B.toList headlineBlocks
         else do headlineBlocks <- mconcat <$> mapM headlineToBlocks
                                                    (headlineChildren rootNode')
                 return . B.toList $ headlineBlocks
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 6862dd71e..617f98a10 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -31,11 +31,10 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
 import Text.Pandoc.Sources (ToSources(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-
-import Control.Monad (guard, mplus, mzero, unless, void, when)
+import Safe (lastMay)
+import Control.Monad (guard, mplus, mzero, unless, when, void)
 import Control.Monad.Trans (lift)
 import Data.Char (isAlphaNum, isSpace)
-import Data.List (intersperse)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -148,31 +147,177 @@ endline = try $ do
 -- Citations
 --
 
--- The state of citations is a bit confusing due to the lack of an official
--- syntax and multiple syntaxes coexisting.  The pandocOrgCite syntax was the
--- first to be implemented here and is almost identical to Markdown's citation
--- syntax.  The org-ref package is in wide use to handle citations, but the
--- syntax is a bit limiting and not quite as simple to write.  The
--- semi-official Org-mode citation syntax is based on John MacFarlane's Pandoc
--- sytax and Org-oriented enhancements contributed by Richard Lawrence and
--- others.  It's dubbed Berkeley syntax due the place of activity of its main
--- contributors.  All this should be consolidated once an official Org-mode
--- citation syntax has emerged.
+-- We first try to parse official org-cite citations, then fall
+-- back to org-ref citations (which are still in wide use).
+
+-- | A citation in org-cite style
+orgCite :: PandocMonad m => OrgParser m (F [Citation])
+orgCite = try $ do
+  string "[cite"
+  (sty, _variants) <- citeStyle
+  char ':'
+  spnl
+  globalPref <- option mempty (try (citePrefix <* char ';'))
+  items <- citeItems
+  globalSuff <- option mempty (try (char ';' *> citeSuffix))
+  spnl
+  char ']'
+  return $ adjustCiteStyle sty .
+           addPrefixToFirstItem globalPref .
+           addSuffixToLastItem globalSuff $ items
+
+adjustCiteStyle :: CiteStyle -> (F [Citation]) -> (F [Citation])
+adjustCiteStyle sty cs = do
+  cs' <- cs
+  case cs' of
+    [] -> return []
+    (d:ds)  -- TODO needs refinement
+      -> case sty of
+         TextStyle -> return $ d{ citationMode = AuthorInText
+                                , citationSuffix = dropWhile (== Space)
+                                    (citationSuffix d)} : ds
+         NoAuthorStyle -> return $ d{ citationMode = SuppressAuthor } : ds
+         _ -> return (d:ds)
+
+addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation])
+addPrefixToFirstItem aff cs = do
+  cs' <- cs
+  aff' <- aff
+  case cs' of
+    [] -> return []
+    (d:ds) -> return (d{ citationPrefix =
+                          B.toList aff' <> citationPrefix d }:ds)
+
+addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation])
+addSuffixToLastItem aff cs = do
+  cs' <- cs
+  aff' <- aff
+  case lastMay cs' of
+    Nothing -> return cs'
+    Just d  ->
+      return (init cs' ++ [d{ citationSuffix =
+                                citationSuffix d <> B.toList aff' }])
+
+citeItems :: PandocMonad m => OrgParser m (F [Citation])
+citeItems = sequence <$> citeItem `sepBy1` (char ';')
+
+citeItem :: PandocMonad m => OrgParser m (F Citation)
+citeItem = do
+  pref <- citePrefix
+  itemKey <- orgCiteKey
+  suff <- citeSuffix
+  return $ do
+    pre' <- pref
+    suf' <- suff
+    return Citation
+      { citationId      = itemKey
+      , citationPrefix  = B.toList pre'
+      , citationSuffix  = B.toList suf'
+      , citationMode    = NormalCitation
+      , citationNoteNum = 0
+      , citationHash    = 0
+      }
+
+orgCiteKey :: PandocMonad m => OrgParser m Text
+orgCiteKey = do
+  char '@'
+  T.pack <$> many1 (satisfy orgCiteKeyChar)
+
+orgCiteKeyChar :: Char -> Bool
+orgCiteKeyChar c =
+  isAlphaNum c || c `elem` ['.',':','?','!','`','\'','/','*','@','+','|',
+                            '(',')','{','}','<','>','&','_','^','$','#',
+                            '%','~','-']
+
+rawAffix :: PandocMonad m => Bool -> OrgParser m Text
+rawAffix isPrefix = snd <$> withRaw
+  (many
+   (affixChar
+     <|>
+     try (void (char '[' >> rawAffix isPrefix >> char ']'))))
+ where
+   affixChar = void $ satisfy $ \c ->
+     not (c == '^' || c == ';' || c == '[' || c == ']') &&
+     (not isPrefix || c /= '@')
+
+citePrefix :: PandocMonad m => OrgParser m (F Inlines)
+citePrefix =
+  rawAffix True >>= parseFromString (trimInlinesF . mconcat <$> many inline)
+
+citeSuffix :: PandocMonad m => OrgParser m (F Inlines)
+citeSuffix =
+  rawAffix False >>= parseFromString parseSuffix
+ where
+   parseSuffix = do
+     hasSpace <- option False
+              (True <$ try (spaceChar >> skipSpaces >> lookAhead nonspaceChar))
+     ils <- trimInlinesF . mconcat <$> many inline
+     return $ if hasSpace
+                 then (B.space <>) <$> ils
+                 else ils
+
+citeStyle :: PandocMonad m => OrgParser m (CiteStyle, [CiteVariant])
+citeStyle = option (DefStyle, []) $ do
+  sty <- option DefStyle $ try $ char '/' *> orgCiteStyle
+  variants <- option [] $ try $ char '/' *> orgCiteVariants
+  return (sty, variants)
+
+orgCiteStyle :: PandocMonad m => OrgParser m CiteStyle
+orgCiteStyle = choice $ map try
+  [ NoAuthorStyle <$ string "noauthor"
+  , NoAuthorStyle <$ string "na"
+  , LocatorsStyle <$ string "locators"
+  , LocatorsStyle <$ char 'l'
+  , NociteStyle <$ string "nocite"
+  , NociteStyle <$ char 'n'
+  , TextStyle <$ string "text"
+  , TextStyle <$ char 't'
+  ]
+
+orgCiteVariants :: PandocMonad m => OrgParser m [CiteVariant]
+orgCiteVariants =
+  (fullnameVariant `sepBy1` (char '-')) <|> (many1 onecharVariant)
+ where
+  fullnameVariant = choice $ map try
+     [ Bare <$ string "bare"
+     , Caps <$ string "caps"
+     , Full <$ string "full"
+     ]
+  onecharVariant = choice
+     [ Bare <$ char 'b'
+     , Caps <$ char 'c'
+     , Full <$ char 'f'
+     ]
+
+data CiteStyle =
+    NoAuthorStyle
+  | LocatorsStyle
+  | NociteStyle
+  | TextStyle
+  | DefStyle
+  deriving Show
+
+data CiteVariant =
+    Caps
+  | Bare
+  | Full
+  deriving Show
+
+
+spnl :: PandocMonad m => OrgParser m ()
+spnl =
+  skipSpaces *> optional (newline *> notFollowedBy blankline *> skipSpaces)
 
 cite :: PandocMonad m => OrgParser m (F Inlines)
-cite = try $ berkeleyCite <|> do
+cite = do
   guardEnabled Ext_citations
-  (cs, raw) <- withRaw $ choice
-               [ pandocOrgCite
+  (cs, raw) <- withRaw $ try $ choice
+               [ orgCite
                , orgRefCite
-               , berkeleyTextualCite
                ]
   return $ flip B.cite (B.text raw) <$> cs
 
--- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
-pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
-pandocOrgCite = try $
-  char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
+-- org-ref
 
 orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
 orgRefCite = try $ choice
@@ -201,100 +346,6 @@ normalOrgRefCite = try $ do
      , citationHash    = 0
      }
 
--- | Read an Berkeley-style Org-mode citation.  Berkeley citation style was
--- develop and adjusted to Org-mode style by John MacFarlane and Richard
--- Lawrence, respectively, both philosophers at UC Berkeley.
-berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
-berkeleyCite = try $ do
-  bcl <- berkeleyCitationList
-  return $ do
-    parens <- berkeleyCiteParens <$> bcl
-    prefix <- berkeleyCiteCommonPrefix <$> bcl
-    suffix <- berkeleyCiteCommonSuffix <$> bcl
-    citationList <- berkeleyCiteCitations <$> bcl
-    return $
-      if parens
-      then toCite
-           . maybe id (alterFirst . prependPrefix) prefix
-           . maybe id (alterLast . appendSuffix) suffix
-           $ citationList
-      else maybe mempty (<> " ") prefix
-             <> toListOfCites (map toInTextMode citationList)
-             <> maybe mempty (", " <>) suffix
- where
-   toCite :: [Citation] -> Inlines
-   toCite cs = B.cite cs mempty
-
-   toListOfCites :: [Citation] -> Inlines
-   toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty)
-
-   toInTextMode :: Citation -> Citation
-   toInTextMode c = c { citationMode = AuthorInText }
-
-   alterFirst, alterLast :: (a -> a) -> [a] -> [a]
-   alterFirst _ []     = []
-   alterFirst f (c:cs) = f c : cs
-   alterLast  f = reverse . alterFirst f . reverse
-
-   prependPrefix, appendSuffix :: Inlines -> Citation -> Citation
-   prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c }
-   appendSuffix  suf c = c { citationSuffix = citationSuffix c <> B.toList suf }
-
-data BerkeleyCitationList = BerkeleyCitationList
-  { berkeleyCiteParens       :: Bool
-  , berkeleyCiteCommonPrefix :: Maybe Inlines
-  , berkeleyCiteCommonSuffix :: Maybe Inlines
-  , berkeleyCiteCitations    :: [Citation]
-  }
-berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
-berkeleyCitationList = try $ do
-  char '['
-  parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
-  char ':'
-  skipSpaces
-  commonPrefix <- optionMaybe (try $ citationListPart <* char ';')
-  citations    <- citeList
-  commonSuffix <- optionMaybe (try citationListPart)
-  char ']'
-  return (BerkeleyCitationList parens
-    <$> sequence commonPrefix
-    <*> sequence commonSuffix
-    <*> citations)
- where
-   citationListPart :: PandocMonad m => OrgParser m (F Inlines)
-   citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
-     notFollowedBy' $ citeKey False
-     notFollowedBy (oneOf ";]")
-     inline
-
-berkeleyBareTag :: PandocMonad m => OrgParser m ()
-berkeleyBareTag = try $ void berkeleyBareTag'
-
-berkeleyParensTag :: PandocMonad m => OrgParser m ()
-berkeleyParensTag = try . void $ enclosedByPair1 '(' ')' berkeleyBareTag'
-
-berkeleyBareTag' :: PandocMonad m => OrgParser m ()
-berkeleyBareTag' = try $ void (string "cite")
-
-berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
-berkeleyTextualCite = try $ do
-  (suppressAuthor, key) <- citeKey False
-  returnF . return $ Citation
-    { citationId      = key
-    , citationPrefix  = mempty
-    , citationSuffix  = mempty
-    , citationMode    = if suppressAuthor then SuppressAuthor else AuthorInText
-    , citationNoteNum = 0
-    , citationHash    = 0
-    }
-
--- The following is what a Berkeley-style bracketed textual citation parser
--- would look like.  However, as these citations are a subset of Pandoc's Org
--- citation style, this isn't used.
--- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
--- berkeleyBracketedTextualCite = try . (fmap head) $
---   enclosedByPair1 '[' ']' berkeleyTextualCite
-
 -- | Read a link-like org-ref style citation.  The citation includes pre and
 -- post text.  However, multiple citations are not possible due to limitations
 -- in the syntax.
@@ -345,39 +396,6 @@ orgRefCiteMode =
     , ("citeyear", SuppressAuthor)
     ]
 
-citeList :: PandocMonad m => OrgParser m (F [Citation])
-citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
-
-citation :: PandocMonad m => OrgParser m (F Citation)
-citation = try $ do
-  pref <- prefix
-  (suppress_author, key) <- citeKey False
-  suff <- suffix
-  return $ do
-    x <- pref
-    y <- suff
-    return Citation
-      { citationId      = key
-      , citationPrefix  = B.toList x
-      , citationSuffix  = B.toList y
-      , citationMode    = if suppress_author
-                          then SuppressAuthor
-                          else NormalCitation
-      , citationNoteNum = 0
-      , citationHash    = 0
-      }
- where
-   prefix = trimInlinesF . mconcat <$>
-            manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
-   suffix = try $ do
-     hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
-     skipSpaces
-     rest <- trimInlinesF . mconcat <$>
-             many (notFollowedBy (oneOf ";]") *> inline)
-     return $ if hasSpace
-              then (B.space <>) <$> rest
-              else rest
-
 footnote :: PandocMonad m => OrgParser m (F Inlines)
 footnote = try $ do
   note <- inlineNote <|> referencedNote
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index a1b21046a..ccb6744e7 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -27,13 +27,13 @@ import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared (blocksToInlines, safeRead)
+import Text.Pandoc.Network.HTTP (urlEncode)
 
 import Control.Monad (mzero, void)
 import Data.List (intercalate, intersperse)
 import Data.Map (Map)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
-import Network.HTTP (urlEncode)
 import qualified Data.Map as Map
 import qualified Data.Set as Set
 import qualified Data.Text as T
@@ -188,7 +188,7 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
    -- inefficient
    replacePlain = try $ (\x -> T.concat . flip intersperse x)
                      <$> sequence [tillSpecifier 's', rest]
-   replaceUrl   = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
+   replaceUrl   = try $ (\x -> T.concat . flip intersperse x . urlEncode)
                      <$> sequence [tillSpecifier 'h', rest]
    justAppend   = try $ (<>) <$> rest
 
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 3990f0cb5..88471eb0a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -466,14 +466,11 @@ includeDirective top fields body = do
   let classes =  maybe [] T.words (lookup "class" fields)
   let ident = maybe "" trimr $ lookup "name" fields
   let parser =
-       case lookup "code" fields of
+       case lookup "code" fields `mplus` lookup "literal" fields of
          Just lang ->
            (codeblock ident classes fields (trimr lang) False
             . sourcesToText) <$> getInput
-         Nothing   ->
-           case lookup "literal" fields of
-             Just _  -> B.rawBlock "rst" . sourcesToText <$> getInput
-             Nothing -> parseBlocks
+         Nothing   -> parseBlocks
   let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields)
   let selectLines =
         (case trim <$> lookup "end-before" fields of
@@ -728,8 +725,8 @@ directive' = do
         "figure" -> do
            (caption, legend) <- parseFromString' extractCaption body'
            let src = escapeURI $ trim top
-           return $ B.para (B.imageWith (imgAttr "figclass") src "fig:"
-                       caption) <> legend
+           return $ B.simpleFigureWith
+               (imgAttr "figclass") caption src "" <> legend
         "image" -> do
            let src = escapeURI $ trim top
            let alt = B.str $ maybe "image" trim $ lookup "alt" fields
@@ -922,14 +919,22 @@ addNewRole roleText fields = do
         (baseRole, baseFmt, baseAttr) =
                getBaseRole (parentRole, Nothing, nullAttr) customRoles
         fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
-        annotate :: [Text] -> [Text]
-        annotate = maybe id (:) $
-            if baseRole == "code"
-               then lookup "language" fields
-               else Nothing
-        attr = let (ident, classes, keyValues) = baseAttr
-        -- nub in case role name & language class are the same
-               in (ident, nub . (role :) . annotate $ classes, keyValues)
+
+        updateClasses :: [Text] -> [Text]
+        updateClasses oldClasses = let
+
+          codeLanguageClass = if baseRole == "code"
+            then maybeToList (lookup "language" fields)
+            else []
+
+          -- if no ":class:" field is given, the default is the role name
+          classFieldClasses = maybe [role] T.words (lookup "class" fields)
+
+          -- nub in case role name & language class are the same
+          in nub (classFieldClasses ++ codeLanguageClass ++ oldClasses)
+
+        attr = let (ident, baseClasses, keyValues) = baseAttr
+               in (ident, updateClasses baseClasses, keyValues)
 
     -- warn about syntax we ignore
     forM_ fields $ \(key, _) -> case key of
@@ -1158,10 +1163,11 @@ referenceNames = do
   let rn = try $ do
              string ".. _"
              ref <- quotedReferenceName
-                  <|> manyChar (  noneOf ":\n"
+                  <|> manyChar (  noneOf "\\:\n"
                               <|> try (char '\n' <*
                                        string "   " <*
                                        notFollowedBy blankline)
+                              <|> try (char '\\' *> char ':')
                               <|> try (char ':' <* lookAhead alphaNum)
                                )
              char ':'
diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs
new file mode 100644
index 000000000..3938681f4
--- /dev/null
+++ b/src/Text/Pandoc/Readers/RTF.hs
@@ -0,0 +1,1351 @@
+{-# LANGUAGE FlexibleContexts  #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Readers.RTF
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane (<jgm@berkeley.edu>)
+   Stability   : alpha
+   Portability : portable
+
+Conversion of RTF documents 'Pandoc' document.
+We target version 1.5 of the RTF spec.
+-}
+module Text.Pandoc.Readers.RTF (readRTF) where
+
+import qualified Data.IntMap as IntMap
+import qualified Data.Sequence as Seq
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.List (find, foldl')
+import Data.Word (Word8, Word16)
+import Data.Default
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Read as TR
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), insertMedia)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared (safeRead, tshow)
+import Data.Char (isAlphaNum, chr, isAscii, isLetter, isSpace, ord)
+import qualified Data.ByteString.Lazy as BL
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Data.Maybe (mapMaybe, fromMaybe)
+import Safe (lastMay, initSafe, headDef)
+-- import Debug.Trace
+
+-- TODO:
+-- [ ] more complex table features
+--
+
+-- | Read RTF from an input string and return a Pandoc document.
+readRTF  :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
+readRTF opts s = do
+  let sources = toSources s
+  parsed <- readWithM parseRTF def{ sOptions = opts } sources
+  case parsed of
+       Left e  -> throwError e
+       Right d -> return d
+
+data CharSet = ANSI | Mac | Pc | Pca
+  deriving (Show, Eq)
+
+-- first index is the list (or override) id, second is the list level
+type ListTable = IntMap.IntMap ListLevelTable
+type ListLevelTable = IntMap.IntMap ListType
+
+data RTFState = RTFState  { sOptions     :: ReaderOptions
+                          , sCharSet     :: CharSet
+                          , sGroupStack  :: [Properties]
+                          , sListStack   :: [List]
+                          , sCurrentCell :: Blocks
+                          , sTableRows   :: [TableRow] -- reverse order
+                          , sTextContent :: [(Properties, Text)]
+                          , sMetadata    :: [(Text, Inlines)]
+                          , sFontTable   :: FontTable
+                          , sStylesheet  :: Stylesheet
+                          , sListTable   :: ListTable
+                          , sListOverrideTable :: ListTable
+                          , sEatChars    :: Int
+                          } deriving (Show)
+
+instance Default RTFState where
+ def = RTFState { sOptions = def
+                , sCharSet = ANSI
+                , sGroupStack = []
+                , sListStack = []
+                , sCurrentCell = mempty
+                , sTableRows = []
+                , sTextContent = []
+                , sMetadata = []
+                , sFontTable = mempty
+                , sStylesheet = mempty
+                , sListTable = mempty
+                , sListOverrideTable = mempty
+                , sEatChars = 0
+                }
+
+type FontTable = IntMap.IntMap FontFamily
+
+data FontFamily =
+  Roman | Swiss | Modern | Script | Decor | Tech | Bidi
+  deriving (Show, Eq)
+
+data StyleType = ParagraphStyle | SectionStyle | CharStyle | TableStyle
+  deriving (Show, Eq)
+
+data Style =
+  Style { styleNum :: Int
+        , styleType :: StyleType
+        , styleBasedOn :: Maybe Int
+        , styleName :: Text
+        , styleFormatting :: [Tok]
+        } deriving (Show, Eq)
+
+type Stylesheet = IntMap.IntMap Style
+
+data PictType =
+  Emfblip | Pngblip | Jpegblip
+  deriving (Show, Eq)
+
+data Pict =
+  Pict { picType :: Maybe PictType
+       , picWidth :: Maybe Int
+       , picHeight :: Maybe Int
+       , picWidthGoal :: Maybe Int
+       , picHeightGoal :: Maybe Int
+       , picBinary :: Bool
+       , picData :: Text
+       , picName :: Text
+       , picBytes :: BL.ByteString
+       } deriving (Show, Eq)
+
+instance Default Pict where
+ def = Pict { picType = Nothing
+            , picWidth = Nothing
+            , picHeight = Nothing
+            , picWidthGoal = Nothing
+            , picHeightGoal = Nothing
+            , picBinary = False
+            , picData = mempty
+            , picName = mempty
+            , picBytes = mempty }
+
+data Properties =
+  Properties
+  { gBold :: Bool
+  , gItalic :: Bool
+  , gCaps :: Bool
+  , gDeleted :: Bool
+  , gSub :: Bool
+  , gSuper :: Bool
+  , gSmallCaps :: Bool
+  , gUnderline :: Bool
+  , gHyperlink :: Maybe Text
+  , gAnchor :: Maybe Text
+  , gImage :: Maybe Pict
+  , gFontFamily :: Maybe FontFamily
+  , gHidden :: Bool
+  , gUC :: Int -- number of ansi chars to skip after unicode char
+  , gFootnote :: Maybe Blocks
+  , gOutlineLevel :: Maybe ListLevel
+  , gListOverride :: Maybe Override
+  , gListLevel :: Maybe Int
+  , gInTable :: Bool
+  } deriving (Show, Eq)
+
+instance Default Properties where
+   def = Properties { gBold = False
+                    , gItalic = False
+                    , gCaps = False
+                    , gDeleted = False
+                    , gSub = False
+                    , gSuper = False
+                    , gSmallCaps = False
+                    , gUnderline = False
+                    , gHyperlink = Nothing
+                    , gAnchor = Nothing
+                    , gImage = Nothing
+                    , gFontFamily = Nothing
+                    , gHidden = False
+                    , gUC = 1
+                    , gFootnote = Nothing
+                    , gOutlineLevel = Nothing
+                    , gListOverride = Nothing
+                    , gListLevel = Nothing
+                    , gInTable = False
+                    }
+
+type RTFParser m = ParserT Sources RTFState m
+
+data ListType = Bullet | Ordered ListAttributes
+  deriving (Show, Eq)
+
+type Override = Int
+
+type ListLevel = Int
+
+data List =
+    List Override ListLevel ListType [Blocks]  -- items in reverse order
+    deriving (Show, Eq)
+
+newtype TableRow = TableRow [Blocks] -- cells in reverse order
+    deriving (Show, Eq)
+
+parseRTF :: PandocMonad m => RTFParser m Pandoc
+parseRTF = do
+  skipMany nl
+  toks <- many tok
+  -- return $! traceShowId toks
+  bs <- (case toks of
+          -- if we start with {\rtf1...}, parse that and ignore
+          -- what follows (which in certain cases can be non-RTF content)
+          rtftok@(Tok _ (Grouped (Tok _ (ControlWord "rtf" (Just 1)) : _))) : _
+            -> foldM processTok mempty [rtftok]
+          _ -> foldM processTok mempty toks)
+        >>= emitBlocks
+  unclosed <- closeContainers
+  let doc = B.doc $ bs <> unclosed
+  kvs <- sMetadata <$> getState
+  pure $ foldr (uncurry B.setMeta) doc kvs
+
+data Tok = Tok SourcePos TokContents
+  deriving (Show, Eq)
+
+data TokContents =
+    ControlWord Text (Maybe Int)
+  | ControlSymbol Char
+  | UnformattedText Text
+  | BinData BL.ByteString
+  | HexVal Word8
+  | Grouped [Tok]
+  deriving (Show, Eq)
+
+tok :: PandocMonad m => RTFParser m Tok
+tok = do
+  pos <- getPosition
+  Tok pos <$> ((controlThing <|> unformattedText <|> grouped) <* skipMany nl)
+ where
+  controlThing = do
+    char '\\' *>
+      ( binData
+     <|> (ControlWord <$> letterSequence <*> (parameter <* optional delimChar))
+     <|> (HexVal <$> hexVal)
+     <|> (ControlSymbol <$> anyChar) )
+  binData = try $ do
+    string "bin" <* notFollowedBy letter
+    n <- fromMaybe 0 <$> parameter
+    spaces
+    -- NOTE: We assume here that if the document contains binary
+    -- data, it will not be valid UTF-8 and hence it will have been
+    -- read as latin1, so we can recover the data in the following
+    -- way.  This is probably not completely reliable, but I don't
+    -- know if we can do better without making this reader take
+    -- a ByteString input.
+    dat <- BL.pack . map (fromIntegral . ord) <$> count n anyChar
+    return $ BinData dat
+  parameter = do
+    hyph <- string "-" <|> pure ""
+    rest <- many digit
+    let pstr = T.pack $ hyph <> rest
+    return $ safeRead pstr
+  hexVal = do
+    char '\''
+    x <- hexDigit
+    y <- hexDigit
+    return $ hexToWord (T.pack [x,y])
+  letterSequence = T.pack <$> many1 (satisfy (\c -> isAscii c && isLetter c))
+  unformattedText =
+    UnformattedText . T.pack . mconcat <$>
+      many1 (   many1 (satisfy (not . isSpecial))
+            <|> ("" <$ nl))
+  grouped = Grouped <$> (char '{' *> skipMany nl *> manyTill tok (char '}'))
+
+nl :: PandocMonad m => RTFParser m ()
+nl = void (char '\n' <|> char '\r')
+
+isSpecial :: Char -> Bool
+isSpecial '{' = True
+isSpecial '}' = True
+isSpecial '\\' = True
+isSpecial '\n' = True
+isSpecial _ = False
+
+delimChar :: PandocMonad m => RTFParser m Char
+delimChar = satisfy (\c -> not (isAlphaNum c || isSpecial c))
+
+modifyGroup :: PandocMonad m
+            => (Properties -> Properties)
+            -> RTFParser m ()
+modifyGroup f =
+  updateState $ \st ->
+    st{ sGroupStack =
+          case sGroupStack st of
+            [] -> []
+            (x:xs) -> f x : xs }
+
+addFormatting :: (Properties, Text) -> Inlines
+addFormatting (_, "\n") = B.linebreak
+addFormatting (props, _) | gHidden props = mempty
+addFormatting (props, _) | Just bs <- gFootnote props = B.note bs
+addFormatting (props, txt) =
+  (if gBold props then B.strong else id) .
+  (if gItalic props then B.emph else id) .
+  (if gDeleted props then B.strikeout else id) .
+  (if gSub props then B.subscript else id) .
+  (if gSuper props then B.superscript else id) .
+  (if gSmallCaps props then B.smallcaps else id) .
+  (if gUnderline props then B.underline else id) .
+  (case gHyperlink props of
+     Nothing -> id
+     Just linkdest -> B.link linkdest mempty) .
+  (case gAnchor props of
+     Nothing -> id
+     Just ident -> B.spanWith (ident,[],[])) .
+  (case gFontFamily props of
+     Just Modern -> B.code
+     _ -> case gImage props of
+            Just pict ->
+              let attr = ("",[],
+                         (case picWidthGoal pict of
+                           Nothing -> []
+                           Just w  -> [("width", tshow (fromIntegral w / 1440
+                                                         :: Double)
+                                          <> "in")]) ++
+                         (case picHeightGoal pict of
+                            Nothing -> []
+                            Just h -> [("height", tshow (fromIntegral h / 1440
+                                                         :: Double)
+                                          <> "in")]))
+              in  B.imageWith attr (picName pict) "" . B.text
+            Nothing -> B.text) .
+  (if gCaps props then T.toUpper else id)
+  $ txt
+
+addText :: PandocMonad m => Text -> RTFParser m ()
+addText t = do
+  gs <- sGroupStack <$> getState
+  let props = case gs of
+                (x:_) -> x
+                _ -> def
+  updateState (\s -> s{ sTextContent = (props, t) : sTextContent s })
+
+inGroup :: PandocMonad m => RTFParser m a -> RTFParser m a
+inGroup p = do
+  updateState $ \st ->
+    st{ sGroupStack =
+        case sGroupStack st of
+          [] -> [def]
+          (x:xs) -> (x:x:xs) } -- inherit current group's properties
+  result <- p
+  updateState $ \st ->
+    st{ sGroupStack =
+        case sGroupStack st of
+          [] -> [] -- should not happen
+          (_:xs) -> xs }
+  return result
+
+getStyleFormatting :: PandocMonad m => Int -> RTFParser m [Tok]
+getStyleFormatting stynum = do
+  stylesheet <- sStylesheet <$> getState
+  case IntMap.lookup stynum stylesheet of
+    Nothing -> return []
+    Just sty ->
+      case styleBasedOn sty of
+        Just i -> (<> styleFormatting sty)  <$> getStyleFormatting i
+        Nothing -> return $ styleFormatting sty
+
+isMetadataField :: Text -> Bool
+isMetadataField "title" = True
+isMetadataField "subject" = True
+isMetadataField "author" = True
+isMetadataField "manager" = True
+isMetadataField "company" = True
+isMetadataField "operator" = True
+isMetadataField "category" = True
+isMetadataField "keywords" = True
+isMetadataField "comment" = True
+isMetadataField "doccomm" = True
+isMetadataField "hlinkbase" = True
+isMetadataField "generator" = True
+isMetadataField _ = False
+
+isHeaderFooter :: Text -> Bool
+isHeaderFooter "header" = True
+isHeaderFooter "headerl" = True
+isHeaderFooter "headerr" = True
+isHeaderFooter "headerf" = True
+isHeaderFooter "footer" = True
+isHeaderFooter "footerl" = True
+isHeaderFooter "footerr" = True
+isHeaderFooter "footerf" = True
+isHeaderFooter _ = False
+
+boolParam :: Maybe Int -> Bool
+boolParam (Just 0) = False
+boolParam _ = True
+
+isUnderline :: Text -> Bool
+isUnderline "ul" = True
+isUnderline "uld" = True
+isUnderline "uldash" = True
+isUnderline "uldashd" = True
+isUnderline "uldashdd" = True
+isUnderline "uldb" = True
+isUnderline "ulth" = True
+isUnderline "ulthd" = True
+isUnderline "ulthdash" = True
+isUnderline "ulw" = True
+isUnderline "ulwave" = True
+isUnderline _ = False
+
+processTok :: PandocMonad m => Blocks -> Tok -> RTFParser m Blocks
+processTok bs (Tok pos tok') = do
+  setPosition pos
+  case tok' of
+    HexVal{} -> return ()
+    UnformattedText{} -> return ()
+    _ -> updateState $ \s -> s{ sEatChars = 0 }
+  case tok' of
+    Grouped (Tok _ (ControlSymbol '*') : toks) ->
+      bs <$ (do oldTextContent <- sTextContent <$> getState
+                processTok mempty (Tok pos (Grouped toks))
+                updateState $ \st -> st{ sTextContent = oldTextContent })
+    Grouped (Tok _ (ControlWord "fonttbl" _) : toks) -> inGroup $ do
+      updateState $ \s -> s{ sFontTable = processFontTable toks }
+      pure bs
+    Grouped (Tok _ (ControlWord "field" _) : toks) ->
+      inGroup $ handleField bs toks
+    Grouped (Tok _ (ControlWord "pict" _) : toks) ->
+      bs <$ inGroup (handlePict toks)
+    Grouped (Tok _ (ControlWord "stylesheet" _) : toks) ->
+      bs <$ inGroup (handleStylesheet toks)
+    Grouped (Tok _ (ControlWord "listtext" _) : _) -> do
+      -- eject any previous list items...sometimes TextEdit
+      -- doesn't put in a \par
+      emitBlocks bs
+    Grouped (Tok _ (ControlWord "pgdsc" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "colortbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "listtable" _) : toks) ->
+      bs <$ inGroup (handleListTable toks)
+    Grouped (Tok _ (ControlWord "listoverridetable" _) : toks) ->
+      bs <$ inGroup (handleListOverrideTable toks)
+    Grouped (Tok _ (ControlWord "wgrffmtfilter" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "themedata" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "colorschememapping" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "datastore" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "latentstyles" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "pntxta" _) : _) -> pure bs -- TODO
+    Grouped (Tok _ (ControlWord "pntxtb" _) : _) -> pure bs -- TODO
+    Grouped (Tok _ (ControlWord "xmlnstbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "filetbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "expandedcolortbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "listtables" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "revtbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "bkmkstart" _)
+             : Tok _ (UnformattedText t) : _) -> do
+      -- TODO ideally we'd put the span around bkmkstart/end, but this
+      -- is good for now:
+      modifyGroup (\g -> g{ gAnchor = Just $ T.strip t })
+      pure bs
+    Grouped (Tok _ (ControlWord "bkmkend" _) : _) -> do
+      modifyGroup (\g -> g{ gAnchor = Nothing })
+      pure bs
+    Grouped (Tok _ (ControlWord f _) : _) | isHeaderFooter f -> pure bs
+    Grouped (Tok _ (ControlWord "footnote" _) : toks) -> do
+      noteBs <- inGroup $ processDestinationToks toks
+      modifyGroup (\g -> g{ gFootnote = Just noteBs })
+      addText "*"
+      modifyGroup (\g -> g{ gFootnote = Nothing })
+      return bs
+    Grouped (Tok _ (ControlWord "info" _) : toks) ->
+      bs <$ inGroup (processDestinationToks toks)
+    Grouped (Tok _ (ControlWord f _) : toks) | isMetadataField f -> inGroup $ do
+      foldM_ processTok mempty toks
+      annotatedToks <- reverse . sTextContent <$> getState
+      updateState $ \s -> s{ sTextContent = [] }
+      let ils = B.trimInlines . mconcat $ map addFormatting annotatedToks
+      updateState $ \s -> s{ sMetadata = (f, ils) : sMetadata s }
+      pure bs
+    Grouped toks -> inGroup (foldM processTok bs toks)
+    UnformattedText t -> bs <$ do
+      -- return $! traceShowId $! (pos, t)
+      eatChars <- sEatChars <$> getState
+      case eatChars of
+        0 -> addText t
+        n | n < T.length t -> do
+             updateState $ \s -> s{ sEatChars = 0 }
+             addText (T.drop n t)
+          | otherwise -> do
+             updateState $ \s -> s{ sEatChars = n - T.length t }
+    HexVal n -> bs <$ do
+      eatChars <- sEatChars <$> getState
+      if eatChars == 0
+         then do
+           charset <- sCharSet <$> getState
+           case charset of
+             ANSI -> addText (T.singleton $ ansiToChar n)
+             Mac  -> addText (T.singleton $ macToChar n)
+             Pc   -> addText (T.singleton $ pcToChar n)
+             Pca  -> addText (T.singleton $ pcaToChar n)
+         else updateState $ \s -> s{ sEatChars = eatChars - 1 }
+    ControlWord "ansi" _ -> bs <$
+      updateState (\s -> s{ sCharSet = ANSI })
+    ControlWord "mac" _ -> bs <$
+      updateState (\s -> s{ sCharSet = Mac })
+    ControlWord "pc" _ -> bs <$
+      updateState (\s -> s{ sCharSet = Pc })
+    ControlWord "pca" _ -> bs <$
+      updateState (\s -> s{ sCharSet = Pca })
+    ControlWord "outlinelevel" mbp -> bs <$
+      modifyGroup (\g -> g{ gOutlineLevel = mbp })
+    ControlWord "ls" mbp -> bs <$
+      modifyGroup (\g -> g{ gListOverride = mbp })
+    ControlWord "ilvl" mbp -> bs <$
+      modifyGroup (\g -> g{ gListLevel = mbp })
+    ControlSymbol '\\' -> bs <$ addText "\\"
+    ControlSymbol '{' -> bs <$ addText "{"
+    ControlSymbol '}' -> bs <$ addText "}"
+    ControlSymbol '~' -> bs <$ addText "\x00a0"
+    ControlSymbol '-' -> bs <$ addText "\x00ad"
+    ControlSymbol '_' -> bs <$ addText "\x2011"
+    ControlWord "trowd" _ -> bs <$ do -- add new row
+      updateState $ \s -> s{ sTableRows = TableRow [] : sTableRows s
+                           , sCurrentCell = mempty }
+    ControlWord "cell" _ -> bs <$ do
+      new <- emitBlocks mempty
+      curCell <- (<> new) . sCurrentCell <$> getState
+      updateState $ \s -> s{ sTableRows =
+                                case sTableRows s of
+                                  TableRow cs : rs ->
+                                    TableRow (curCell : cs) : rs
+                                  [] -> [TableRow [curCell]] -- shouldn't happen
+                           , sCurrentCell = mempty }
+    ControlWord "intbl" _ -> bs <$ modifyGroup (\g -> g{ gInTable = True })
+    ControlWord "plain" _ -> bs <$ modifyGroup (const def)
+    ControlWord "lquote" _ -> bs <$ addText "\x2018"
+    ControlWord "rquote" _ -> bs <$ addText "\x2019"
+    ControlWord "ldblquote" _ -> bs <$ addText "\x201C"
+    ControlWord "rdblquote" _ -> bs <$ addText "\x201D"
+    ControlWord "emdash" _ -> bs <$ addText "\x2014"
+    ControlWord "emspace" _ -> bs <$ addText "\x2003"
+    ControlWord "enspace" _ -> bs <$ addText "\x2002"
+    ControlWord "endash" _ -> bs <$ addText "\x2013"
+    ControlWord "bullet" _ -> bs <$ addText "\x2022"
+    ControlWord "tab" _ -> bs <$ addText "\t"
+    ControlWord "line" _ -> bs <$ addText "\n"
+    ControlSymbol '\n' -> bs <$ addText "\n"
+    ControlSymbol '\r' -> bs <$ addText "\n"
+    ControlWord "uc" (Just i) -> bs <$ modifyGroup (\g -> g{ gUC = i })
+    ControlWord "cs" (Just n) -> do
+      getStyleFormatting n >>= foldM processTok bs
+    ControlWord "s" (Just n) -> do
+      getStyleFormatting n >>= foldM processTok bs
+    ControlWord "ds" (Just n) -> do
+      getStyleFormatting n >>= foldM processTok bs
+    ControlWord "f" (Just i) -> bs <$ do
+      fontTable <- sFontTable <$> getState
+      modifyGroup (\g -> g{ gFontFamily = IntMap.lookup i fontTable })
+    ControlWord "u" (Just i) -> bs <$ do
+      st <- getState
+      let curgroup = case sGroupStack st of
+                       [] -> def
+                       (x:_) -> x
+      updateState $ \s -> s{ sEatChars = gUC curgroup }
+      -- "RTF control words generally accept signed 16-bit numbers as
+      -- arguments. For this reason, Unicode values greater than 32767
+      -- must be expressed as negative numbers."
+      let codepoint :: Word16
+          codepoint = fromIntegral i
+      addText (T.singleton (chr $ fromIntegral codepoint))
+    ControlWord "caps" mbp -> bs <$
+      modifyGroup (\g -> g{ gCaps = boolParam mbp })
+    ControlWord "deleted" mbp -> bs <$
+      modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+    ControlWord "b" mbp -> bs <$
+      modifyGroup (\g -> g{ gBold = boolParam mbp })
+    ControlWord "i" mbp -> bs <$
+      modifyGroup (\g -> g{ gItalic = boolParam mbp })
+    ControlWord "sub" mbp -> bs <$
+      modifyGroup (\g -> g{ gSub = boolParam mbp })
+    ControlWord "super" mbp -> bs <$
+      modifyGroup (\g -> g{ gSuper = boolParam mbp })
+    ControlWord "up" mbp -> bs <$
+      modifyGroup (\g -> g{ gSuper = boolParam mbp })
+    ControlWord "strike" mbp -> bs <$
+      modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+    ControlWord "strikedl" mbp -> bs <$
+      modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+    ControlWord "striked" mbp -> bs <$
+      modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+    ControlWord "scaps" mbp -> bs <$
+      modifyGroup (\g -> g{ gSmallCaps = boolParam mbp })
+    ControlWord "v" mbp -> bs <$
+      modifyGroup (\g -> g{ gHidden = boolParam mbp })
+    ControlWord x mbp | isUnderline x -> bs <$
+      modifyGroup (\g -> g{ gUnderline = boolParam mbp })
+    ControlWord "ulnone" _ -> bs <$
+      modifyGroup (\g -> g{ gUnderline = False })
+    ControlWord "pard" _ -> bs <$ do
+      modifyGroup (const def)
+      getStyleFormatting 0 >>= foldM processTok bs
+    ControlWord "par" _ -> emitBlocks bs
+    _ -> pure bs
+
+processDestinationToks :: PandocMonad m => [Tok] -> RTFParser m Blocks
+processDestinationToks toks = do
+  textContent <- sTextContent <$> getState
+  liststack <- sListStack <$> getState
+  updateState $ \s -> s{ sTextContent = mempty
+                       , sListStack = [] }
+  result <- inGroup $
+              foldM processTok mempty toks >>= emitBlocks
+  unclosed <- closeContainers
+  updateState $ \s -> s{ sTextContent = textContent
+                       , sListStack = liststack }
+  return $ result <> unclosed
+
+-- close lists >= level
+closeLists :: PandocMonad m => Int -> RTFParser m Blocks
+closeLists lvl = do
+  lists <- sListStack <$> getState
+  case lists of
+    (List _ lvl' lt items : rest) | lvl' >= lvl -> do
+      let newlist = (case lt of
+                      Bullet -> B.bulletList
+                      Ordered listAttr -> B.orderedListWith listAttr)
+                    (reverse items)
+      updateState $ \s -> s{ sListStack = rest }
+      case rest of
+        [] -> do
+          updateState $ \s -> s{ sListStack = rest }
+          pure newlist
+        (List lo lvl'' lt' [] : rest') -> do -- should not happen
+          updateState $ \s -> s{ sListStack =
+               List lo lvl'' lt' [newlist] : rest' }
+          closeLists lvl
+        (List lo lvl'' lt' (i:is) : rest') -> do
+          updateState $ \s -> s{ sListStack =
+               List lo lvl'' lt' (i <> newlist : is) : rest' }
+          closeLists lvl
+    _ -> pure mempty
+
+closeTable :: PandocMonad m => RTFParser m Blocks
+closeTable = do
+  rawrows <- sTableRows <$> getState
+  if null rawrows
+     then return mempty
+     else do
+       let getCells (TableRow cs) = reverse cs
+       let rows = map getCells . reverse $ rawrows
+       updateState $ \s -> s{ sCurrentCell = mempty
+                            , sTableRows = [] }
+       return $ B.simpleTable [] rows
+
+closeContainers :: PandocMonad m => RTFParser m Blocks
+closeContainers = do
+  tbl <- closeTable
+  lists <- closeLists 0
+  return $ tbl <> lists
+
+trimFinalLineBreak :: Inlines -> Inlines
+trimFinalLineBreak ils =
+  case Seq.viewr (B.unMany ils) of
+    rest Seq.:> LineBreak -> B.Many rest
+    _ -> ils
+
+emitBlocks :: PandocMonad m => Blocks -> RTFParser m Blocks
+emitBlocks bs = do
+  annotatedToks <- reverse . sTextContent <$> getState
+  updateState $ \s -> s{ sTextContent = [] }
+  let justCode = def{ gFontFamily = Just Modern }
+  let prop = case annotatedToks of
+               [] -> def
+               ((p,_):_) -> p
+  tbl <- if gInTable prop || null annotatedToks
+            then pure mempty
+            else closeTable
+  new <-
+    case annotatedToks of
+      [] -> pure mempty
+      _ | Just lst <- gListOverride prop
+         -> do
+           let level = fromMaybe 0 $ gListLevel prop
+           listOverrideTable <- sListOverrideTable <$> getState
+           let listType = fromMaybe Bullet $
+                 IntMap.lookup lst listOverrideTable >>= IntMap.lookup level
+           lists <- sListStack <$> getState
+           -- get para contents of list item
+           let newbs = B.para . B.trimInlines . trimFinalLineBreak . mconcat $
+                        map addFormatting annotatedToks
+           case lists of
+             (List lo parentlevel _lt items : cs)
+               | lo == lst
+               , parentlevel == level
+               -- add another item to existing list
+               -> do updateState $ \s ->
+                        s{ sListStack =
+                             List lo level listType (newbs:items) : cs }
+                     pure mempty
+               | lo /= lst || level < parentlevel
+               -- close parent list and add new list
+               -> do new <- closeLists level  -- close open lists > level
+                     updateState $ \s ->
+                       s{ sListStack = List lst level listType [newbs] :
+                           sListStack s }
+                     pure new
+             _ -> do -- add new list (level > parentlevel)
+                  updateState $ \s ->
+                    s{ sListStack = List lst level listType [newbs] :
+                         sListStack s }
+                  pure mempty
+        | Just lvl <- gOutlineLevel prop
+         -> do
+            lists <- closeLists 0
+            pure $ lists <>
+                   B.header (lvl + 1)
+                   (B.trimInlines . mconcat $ map addFormatting
+                                            $ removeCommonFormatting
+                                              annotatedToks)
+        | all ((== justCode) . fst) annotatedToks
+         -> do
+            lists <- closeLists 0
+            pure $ lists <>
+                    B.codeBlock (mconcat $ map snd annotatedToks)
+        | all (T.all isSpace . snd) annotatedToks
+         -> closeLists 0
+        | otherwise -> do
+            lists <- closeLists 0
+            pure $ lists <>
+              B.para (B.trimInlines . trimFinalLineBreak . mconcat
+                $ map addFormatting annotatedToks)
+  if gInTable prop
+     then do
+       updateState $ \s -> s{ sCurrentCell = sCurrentCell s <> new }
+       pure bs
+     else do
+       pure $ bs <> tbl <> new
+
+-- Headers often have a style applied. We usually want to remove
+-- this, because headers will have their own styling in the target
+-- format.
+removeCommonFormatting :: [(Properties, Text)] -> [(Properties, Text)]
+removeCommonFormatting =
+  (\ts ->
+    if all (gBold . fst) ts
+       then map (\(p,t) -> (p{ gBold = False }, t)) ts
+       else ts) .
+  (\ts ->
+    if all (gItalic . fst) ts
+       then map (\(p,t) -> (p{ gItalic = False }, t)) ts
+       else ts)
+
+
+-- {\field{\*\fldinst{HYPERLINK "http://pandoc.org"}}{\fldrslt foo}}
+handleField :: PandocMonad m => Blocks -> [Tok] -> RTFParser m Blocks
+handleField bs
+  (Tok _
+    (Grouped
+     (Tok _ (ControlSymbol '*')
+     :Tok _ (ControlWord "fldinst" Nothing)
+     :Tok _ (Grouped (Tok _ (UnformattedText insttext):rest))
+     :_))
+  :linktoks)
+  | Just linkdest <- getHyperlink insttext
+  = do let linkdest' = case rest of
+                         (Tok _ (ControlSymbol '\\')
+                          : Tok _ (UnformattedText t)
+                          : _) | Just bkmrk <- T.stripPrefix "l" t
+                           -> "#" <> unquote bkmrk
+                         _ -> linkdest
+       modifyGroup $ \g -> g{ gHyperlink = Just linkdest' }
+       result <- foldM processTok bs linktoks
+       modifyGroup $ \g -> g{ gHyperlink = Nothing }
+       return result
+handleField bs _ = pure bs
+
+unquote :: Text -> Text
+unquote = T.dropWhile (=='"') . T.dropWhileEnd (=='"') . T.strip
+
+handleListTable :: PandocMonad m => [Tok] -> RTFParser m ()
+handleListTable toks = do
+  mapM_ handleList toks
+
+handleList :: PandocMonad m => Tok -> RTFParser m ()
+handleList (Tok _ (Grouped (Tok _ (ControlWord "list" _) : toks))) = do
+  let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks]
+  let levels = [ts | Tok _ (Grouped (Tok _ (ControlWord "listlevel" _) : ts))
+                 <- toks]
+  tbl <- foldM handleListLevel mempty (zip [0..] levels)
+  updateState $ \s -> s{ sListTable = IntMap.insert listid tbl $ sListTable s }
+handleList _ = return ()
+
+handleListLevel :: PandocMonad m
+                => ListLevelTable
+                -> (Int, [Tok])
+                -> RTFParser m ListLevelTable
+handleListLevel levelTable (lvl, toks) = do
+  let start = headDef 1
+                [n | Tok _ (ControlWord "levelstartat" (Just n)) <- toks]
+  let mbNumberStyle =
+        case [n | Tok _ (ControlWord "levelnfc" (Just n)) <- toks] of
+          [] -> Nothing
+          (0:_) -> Just Decimal
+          (1:_) -> Just UpperRoman
+          (2:_) -> Just LowerRoman
+          (3:_) -> Just UpperAlpha
+          (4:_) -> Just LowerAlpha
+          (23:_) -> Nothing
+          (255:_) -> Nothing
+          _ -> Just DefaultStyle
+  let listType = case mbNumberStyle of
+                   Nothing -> Bullet
+                   Just numStyle -> Ordered (start,numStyle,Period)
+  return $ IntMap.insert lvl listType levelTable
+
+handleListOverrideTable :: PandocMonad m => [Tok] -> RTFParser m ()
+handleListOverrideTable toks = mapM_ handleListOverride toks
+
+handleListOverride :: PandocMonad m => Tok -> RTFParser m ()
+handleListOverride
+ (Tok _ (Grouped (Tok _ (ControlWord "listoverride" _) : toks))) = do
+  let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks]
+  let lsn = headDef 0 [n | Tok _ (ControlWord "ls" (Just n)) <- toks]
+  -- TODO override stuff, esp. start num -- for now we just handle indirection
+  listTable <- sListTable <$> getState
+  case IntMap.lookup listid listTable of
+    Nothing -> return ()
+    Just tbl -> updateState $ \s ->
+                   s{ sListOverrideTable = IntMap.insert lsn tbl $
+                        sListOverrideTable s }
+handleListOverride _ = return ()
+
+handleStylesheet :: PandocMonad m => [Tok] -> RTFParser m ()
+handleStylesheet toks = do
+  let styles = mapMaybe parseStyle toks
+  updateState $ \s -> s{ sStylesheet = IntMap.fromList
+                                     $ zip (map styleNum styles) styles }
+
+parseStyle :: Tok -> Maybe Style
+parseStyle (Tok _ (Grouped toks)) = do
+  let (styType, styNum, rest) =
+        case toks of
+          Tok _ (ControlWord "s" (Just n)) : ts -> (ParagraphStyle, n, ts)
+          Tok _ (ControlWord "ds" (Just n)) : ts -> (SectionStyle, n, ts)
+          Tok _ (ControlWord "cs" (Just n)) : ts -> (CharStyle, n, ts)
+          Tok _ (ControlWord "ts" (Just n)) : ts -> (TableStyle, n, ts)
+          _ -> (ParagraphStyle, 0, toks)
+  let styName = case lastMay rest of
+                  Just (Tok _ (UnformattedText t)) -> T.dropWhileEnd (==';') t
+                  _ -> mempty
+  let isBasedOn (Tok _ (ControlWord "sbasedon" (Just _))) = True
+      isBasedOn _ = False
+  let styBasedOn = case find isBasedOn toks of
+                     Just (Tok _ (ControlWord "sbasedon" (Just i))) -> Just i
+                     _ -> Nothing
+  let isStyleControl (Tok _ (ControlWord x _)) =
+         x `elem` ["cs", "s", "ds", "additive", "sbasedon", "snext",
+                   "sautoupd", "shidden", "keycode", "alt", "shift",
+                   "ctrl", "fn"]
+      isStyleControl _ = False
+  let styFormatting = filter (not . isStyleControl) (initSafe rest)
+  return $ Style{ styleNum = styNum
+                , styleType = styType
+                , styleBasedOn = styBasedOn
+                , styleName = styName
+                , styleFormatting = styFormatting
+                }
+parseStyle _ = Nothing
+
+hexToWord  :: Text -> Word8
+hexToWord t = case TR.hexadecimal t of
+                Left _ -> 0
+                Right (x,_) -> x
+
+
+handlePict :: PandocMonad m => [Tok] -> RTFParser m ()
+handlePict toks = do
+  let pict = foldl' getPictData def toks
+  let altText = "image"
+  let bytes =
+        if picBinary pict
+           then picBytes pict
+           else BL.pack $ map hexToWord $ T.chunksOf 2 $ picData pict
+  let (mimetype, ext) =
+        case picType pict of
+          Just Emfblip -> (Just "image/x-emf", ".emf")
+          Just Pngblip -> (Just "image/png", ".png")
+          Just Jpegblip -> (Just "image/jpeg", ".jpg")
+          Nothing -> (Nothing, "")
+  case mimetype of
+    Just mt -> do
+      let pictname = showDigest (sha1 bytes) <> ext
+      insertMedia pictname (Just mt) bytes
+      modifyGroup $ \g -> g{ gImage = Just pict{ picName = T.pack pictname,
+                                                 picBytes = bytes } }
+      addText altText
+      modifyGroup $ \g -> g{ gImage = Nothing }
+    _ -> return ()
+ where
+  getPictData :: Pict -> Tok -> Pict
+  getPictData pict (Tok _ tok') =
+    case tok' of
+      ControlWord "emfblip" _-> pict{ picType = Just Emfblip }
+      ControlWord "pngblip" _-> pict{ picType = Just Pngblip }
+      ControlWord "jpegblip" _-> pict{ picType = Just Jpegblip }
+      ControlWord "picw" (Just w) -> pict{ picWidth = Just w }
+      ControlWord "pich" (Just h) -> pict{ picHeight = Just h }
+      ControlWord "picwgoal" (Just w) -> pict{ picWidthGoal = Just w }
+      ControlWord "pichgoal" (Just h) -> pict{ picHeightGoal = Just h }
+      BinData d | not (BL.null d)
+                  -> pict{ picBinary = True, picBytes = picBytes pict <> d }
+      UnformattedText t -> pict{ picData = t }
+      _ -> pict
+
+
+getHyperlink :: Text -> Maybe Text
+getHyperlink t =
+  case T.stripPrefix "HYPERLINK" (T.strip t) of
+    Nothing -> Nothing
+    Just rest -> Just $ unquote rest
+
+processFontTable :: [Tok] -> FontTable
+processFontTable = snd . foldl' go (0, mempty)
+ where
+  go (fontnum, tbl) (Tok _ tok') =
+    case tok' of
+     (ControlWord "f" (Just i)) -> (i, tbl)
+     (ControlWord "fnil" _) -> (fontnum, tbl)
+     (ControlWord "froman" _) -> (fontnum, IntMap.insert fontnum Roman tbl)
+     (ControlWord "fswiss" _) -> (fontnum, IntMap.insert fontnum Swiss tbl)
+     (ControlWord "fmodern" _) -> (fontnum, IntMap.insert fontnum Modern tbl)
+     (ControlWord "fscript" _) -> (fontnum, IntMap.insert fontnum Script tbl)
+     (ControlWord "fdecor" _) -> (fontnum, IntMap.insert fontnum Decor tbl)
+     (ControlWord "ftech" _) -> (fontnum, IntMap.insert fontnum Tech tbl)
+     (ControlWord "fbidi" _) -> (fontnum, IntMap.insert fontnum Bidi tbl)
+     (Grouped ts) -> foldl' go (fontnum, tbl) ts
+     _ -> (fontnum, tbl)
+
+
+ansiToChar :: Word8 -> Char
+ansiToChar i = chr $
+  case i of
+    128 -> 8364
+    130 -> 8218
+    131 -> 402
+    132 -> 8222
+    133 -> 8230
+    134 -> 8224
+    135 -> 8225
+    136 -> 710
+    137 -> 8240
+    138 -> 352
+    139 -> 8249
+    140 -> 338
+    142 -> 381
+    145 -> 8216
+    146 -> 8217
+    147 -> 8220
+    148 -> 8221
+    149 -> 8226
+    150 -> 8211
+    151 -> 8212
+    152 -> 732
+    153 -> 8482
+    154 -> 353
+    155 -> 8250
+    156 -> 339
+    158 -> 382
+    159 -> 376
+    173 -> 0xAD
+    _ -> fromIntegral i
+
+macToChar :: Word8 -> Char
+macToChar i = chr $
+  case i of
+    0x80 -> 0xC4
+    0x81 -> 0xC5
+    0x82 -> 0xC7
+    0x83 -> 0xC9
+    0x84 -> 0xD1
+    0x85 -> 0xD6
+    0x86 -> 0xDC
+    0x87 -> 0xE1
+    0x88 -> 0xE0
+    0x89 -> 0xE2
+    0x8A -> 0xE4
+    0x8B -> 0xE3
+    0x8C -> 0xE5
+    0x8D -> 0xE7
+    0x8E -> 0xE9
+    0x8F -> 0xE8
+    0x90 -> 0xEA
+    0x91 -> 0xEB
+    0x92 -> 0xED
+    0x93 -> 0xEC
+    0x94 -> 0xEE
+    0x95 -> 0xEF
+    0x96 -> 0xF1
+    0x97 -> 0xF3
+    0x98 -> 0xF2
+    0x99 -> 0xF4
+    0x9A -> 0xF6
+    0x9B -> 0xF5
+    0x9C -> 0xFA
+    0x9D -> 0xF9
+    0x9E -> 0xFB
+    0x9F -> 0xFC
+    0xA0 -> 0xDD
+    0xA1 -> 0xB0
+    0xA2 -> 0xA2
+    0xA3 -> 0xA3
+    0xA4 -> 0xA7
+    0xA5 -> 0xD7
+    0xA6 -> 0xB6
+    0xA7 -> 0xDF
+    0xA8 -> 0xAE
+    0xA9 -> 0xA9
+    0xAA -> 0xB2
+    0xAB -> 0xB4
+    0xAC -> 0xA8
+    0xAD -> 0xB3
+    0xAE -> 0xC6
+    0xAF -> 0xD8
+    0xB0 -> 0xB9
+    0xB1 -> 0xB1
+    0xB2 -> 0xBC
+    0xB3 -> 0xBD
+    0xB4 -> 0xA5
+    0xB5 -> 0xB5
+    0xBA -> 0xBE
+    0xBB -> 0xAA
+    0xBC -> 0xBA
+    0xBE -> 0xE6
+    0xBF -> 0xF8
+    0xC0 -> 0xBF
+    0xC1 -> 0xA1
+    0xC2 -> 0xAC
+    0xC3 -> 0x0141
+    0xC4 -> 0x0192
+    0xC5 -> 0x02CB
+    0xC7 -> 0xAB
+    0xC8 -> 0xBB
+    0xC9 -> 0xA6
+    0xCA -> 0xA0
+    0xCB -> 0xC0
+    0xCC -> 0xC3
+    0xCD -> 0xD5
+    0xCE -> 0x0152
+    0xCF -> 0x0153
+    0xD0 -> 0xAD
+    0xD4 -> 0x0142
+    0xD6 -> 0xF7
+    0xD8 -> 0xFF
+    0xD9 -> 0x0178
+    0xDB -> 0xA4
+    0xDC -> 0xD0
+    0xDD -> 0xF0
+    0xDE -> 0xDE
+    0xDF -> 0xFE
+    0xE0 -> 0xFD
+    0xE1 -> 0xB7
+    0xE5 -> 0xC2
+    0xE6 -> 0xCA
+    0xE7 -> 0xC1
+    0xE8 -> 0xCB
+    0xE9 -> 0xC8
+    0xEA -> 0xCD
+    0xEB -> 0xCE
+    0xEC -> 0xCF
+    0xED -> 0xCC
+    0xEE -> 0xD3
+    0xEF -> 0xD4
+    0xF1 -> 0xD2
+    0xF2 -> 0xDA
+    0xF3 -> 0xDB
+    0xF4 -> 0xD9
+    0xF5 -> 0x0131
+    0xF6 -> 0x02C6
+    0xF7 -> 0x02DC
+    0xF8 -> 0xAF
+    0xF9 -> 0x02D8
+    0xFA -> 0x02D9
+    0xFB -> 0x02DA
+    0xFC -> 0xB8
+    0xFD -> 0x02DD
+    0xFE -> 0x02DB
+    0xFF -> 0x02C7
+    _ -> fromIntegral i
+
+pcToChar :: Word8 -> Char
+pcToChar i = chr $
+  case i of
+    0x80 -> 0xc7
+    0x81 -> 0xfc
+    0x82 -> 0xe9
+    0x83 -> 0xe2
+    0x84 -> 0xe4
+    0x85 -> 0xe0
+    0x86 -> 0xe5
+    0x87 -> 0xe7
+    0x88 -> 0xea
+    0x89 -> 0xeb
+    0x8a -> 0xe8
+    0x8b -> 0xef
+    0x8c -> 0xee
+    0x8d -> 0xec
+    0x8e -> 0xc4
+    0x8f -> 0xc5
+    0x90 -> 0xc9
+    0x91 -> 0xe6
+    0x92 -> 0xc6
+    0x93 -> 0xf4
+    0x94 -> 0xf6
+    0x95 -> 0xf2
+    0x96 -> 0xfb
+    0x97 -> 0xf9
+    0x98 -> 0xff
+    0x99 -> 0xd6
+    0x9a -> 0xdc
+    0x9b -> 0xa2
+    0x9c -> 0xa3
+    0x9d -> 0xa5
+    0x9e -> 0x20a7
+    0x9f -> 0x0192
+    0xa0 -> 0xe1
+    0xa1 -> 0xed
+    0xa2 -> 0xf3
+    0xa3 -> 0xfa
+    0xa4 -> 0xf1
+    0xa5 -> 0xd1
+    0xa6 -> 0xaa
+    0xa7 -> 0xba
+    0xa8 -> 0xbf
+    0xa9 -> 0x2310
+    0xaa -> 0xac
+    0xab -> 0xbd
+    0xac -> 0xbc
+    0xad -> 0xa1
+    0xae -> 0xab
+    0xaf -> 0xbb
+    0xb0 -> 0x2591
+    0xb1 -> 0x2592
+    0xb2 -> 0x2593
+    0xb3 -> 0x2502
+    0xb4 -> 0x2524
+    0xb5 -> 0x2561
+    0xb6 -> 0x2562
+    0xb7 -> 0x2556
+    0xb8 -> 0x2555
+    0xb9 -> 0x2563
+    0xba -> 0x2551
+    0xbb -> 0x2557
+    0xbc -> 0x255d
+    0xbd -> 0x255c
+    0xbe -> 0x255b
+    0xbf -> 0x2510
+    0xc0 -> 0x2514
+    0xc1 -> 0x2534
+    0xc2 -> 0x252c
+    0xc3 -> 0x251c
+    0xc4 -> 0x2500
+    0xc5 -> 0x253c
+    0xc6 -> 0x255e
+    0xc7 -> 0x255f
+    0xc8 -> 0x255a
+    0xc9 -> 0x2554
+    0xca -> 0x2569
+    0xcb -> 0x2566
+    0xcc -> 0x2560
+    0xcd -> 0x2550
+    0xce -> 0x256c
+    0xcf -> 0x2567
+    0xd0 -> 0x2568
+    0xd1 -> 0x2564
+    0xd2 -> 0x2565
+    0xd3 -> 0x2559
+    0xd4 -> 0x2558
+    0xd5 -> 0x2552
+    0xd6 -> 0x2553
+    0xd7 -> 0x256b
+    0xd8 -> 0x256a
+    0xd9 -> 0x2518
+    0xda -> 0x250c
+    0xdb -> 0x2588
+    0xdc -> 0x2584
+    0xdd -> 0x258c
+    0xde -> 0x2590
+    0xdf -> 0x2580
+    0xe0 -> 0x03b1
+    0xe1 -> 0xdf
+    0xe2 -> 0x0393
+    0xe3 -> 0x03c0
+    0xe4 -> 0x03a3
+    0xe5 -> 0x03c3
+    0xe6 -> 0xb5
+    0xe7 -> 0x03c4
+    0xe8 -> 0x03a6
+    0xe9 -> 0x0398
+    0xea -> 0x03a9
+    0xeb -> 0x03b4
+    0xec -> 0x221e
+    0xed -> 0x03c6
+    0xee -> 0x03b5
+    0xef -> 0x2229
+    0xf0 -> 0x2261
+    0xf1 -> 0xb1
+    0xf2 -> 0x2265
+    0xf3 -> 0x2264
+    0xf4 -> 0x2320
+    0xf5 -> 0x2321
+    0xf6 -> 0xf7
+    0xf7 -> 0x2248
+    0xf8 -> 0xb0
+    0xf9 -> 0x2219
+    0xfa -> 0xb7
+    0xfb -> 0x221a
+    0xfc -> 0x207f
+    0xfd -> 0xb2
+    0xfe -> 0x25a0
+    0xff -> 0xa0
+    _    -> fromIntegral i
+
+pcaToChar :: Word8 -> Char
+pcaToChar i = chr $
+  case i of
+    0x80 -> 0x00c7
+    0x81 -> 0x00fc
+    0x82 -> 0x00e9
+    0x83 -> 0x00e2
+    0x84 -> 0x00e4
+    0x85 -> 0x00e0
+    0x86 -> 0x00e5
+    0x87 -> 0x00e7
+    0x88 -> 0x00ea
+    0x89 -> 0x00eb
+    0x8a -> 0x00e8
+    0x8b -> 0x00ef
+    0x8c -> 0x00ee
+    0x8d -> 0x00ec
+    0x8e -> 0x00c4
+    0x8f -> 0x00c5
+    0x90 -> 0x00c9
+    0x91 -> 0x00e6
+    0x92 -> 0x00c6
+    0x93 -> 0x00f4
+    0x94 -> 0x00f6
+    0x95 -> 0x00f2
+    0x96 -> 0x00fb
+    0x97 -> 0x00f9
+    0x98 -> 0x00ff
+    0x99 -> 0x00d6
+    0x9a -> 0x00dc
+    0x9b -> 0x00f8
+    0x9c -> 0x00a3
+    0x9d -> 0x00d8
+    0x9e -> 0x00d7
+    0x9f -> 0x0192
+    0xa0 -> 0x00e1
+    0xa1 -> 0x00ed
+    0xa2 -> 0x00f3
+    0xa3 -> 0x00fa
+    0xa4 -> 0x00f1
+    0xa5 -> 0x00d1
+    0xa6 -> 0x00aa
+    0xa7 -> 0x00ba
+    0xa8 -> 0x00bf
+    0xa9 -> 0x00ae
+    0xaa -> 0x00ac
+    0xab -> 0x00bd
+    0xac -> 0x00bc
+    0xad -> 0x00a1
+    0xae -> 0x00ab
+    0xaf -> 0x00bb
+    0xb0 -> 0x2591
+    0xb1 -> 0x2592
+    0xb2 -> 0x2593
+    0xb3 -> 0x2502
+    0xb4 -> 0x2524
+    0xb5 -> 0x00c1
+    0xb6 -> 0x00c2
+    0xb7 -> 0x00c0
+    0xb8 -> 0x00a9
+    0xb9 -> 0x2563
+    0xba -> 0x2551
+    0xbb -> 0x2557
+    0xbc -> 0x255d
+    0xbd -> 0x00a2
+    0xbe -> 0x00a5
+    0xbf -> 0x2510
+    0xc0 -> 0x2514
+    0xc1 -> 0x2534
+    0xc2 -> 0x252c
+    0xc3 -> 0x251c
+    0xc4 -> 0x2500
+    0xc5 -> 0x253c
+    0xc6 -> 0x00e3
+    0xc7 -> 0x00c3
+    0xc8 -> 0x255a
+    0xc9 -> 0x2554
+    0xca -> 0x2569
+    0xcb -> 0x2566
+    0xcc -> 0x2560
+    0xcd -> 0x2550
+    0xce -> 0x256c
+    0xcf -> 0x00a4
+    0xd0 -> 0x00f0
+    0xd1 -> 0x00d0
+    0xd2 -> 0x00ca
+    0xd3 -> 0x00cb
+    0xd4 -> 0x00c8
+    0xd5 -> 0x0131
+    0xd6 -> 0x00cd
+    0xd7 -> 0x00ce
+    0xd8 -> 0x00cf
+    0xd9 -> 0x2518
+    0xda -> 0x250c
+    0xdb -> 0x2588
+    0xdc -> 0x2584
+    0xdd -> 0x00a6
+    0xde -> 0x00cc
+    0xdf -> 0x2580
+    0xe0 -> 0x00d3
+    0xe1 -> 0x00df
+    0xe2 -> 0x00d4
+    0xe3 -> 0x00d2
+    0xe4 -> 0x00f5
+    0xe5 -> 0x00d5
+    0xe6 -> 0x00b5
+    0xe7 -> 0x00fe
+    0xe8 -> 0x00de
+    0xe9 -> 0x00da
+    0xea -> 0x00db
+    0xeb -> 0x00d9
+    0xec -> 0x00fd
+    0xed -> 0x00dd
+    0xee -> 0x00af
+    0xef -> 0x00b4
+    0xf0 -> 0x00ad
+    0xf1 -> 0x00b1
+    0xf2 -> 0x2017
+    0xf3 -> 0x00be
+    0xf4 -> 0x00b6
+    0xf5 -> 0x00a7
+    0xf6 -> 0x00f7
+    0xf7 -> 0x00b8
+    0xf8 -> 0x00b0
+    0xf9 -> 0x00a8
+    0xfa -> 0x00b7
+    0xfb -> 0x00b9
+    0xfc -> 0x00b3
+    0xfd -> 0x00b2
+    0xfe -> 0x25a0
+    0xff -> 0x00a0
+    _    -> fromIntegral i