17 files changed, 2743 insertions, 1074 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index cf1d5132e..1e119e729 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -769,7 +769,12 @@ parseBlock (Elem e) =
                                 ""   -> []
                                 x    -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ strContent e
+                  $ trimNl $ strContentRecursive e
+         strContentRecursive = strContent . (\e' -> e'{ elContent =
+                                               map elementToStr $ elContent e' })
+         elementToStr :: Content -> Content
+         elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
+         elementToStr x = x
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 09c2330fb..085ee01fc 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE PatternGuards #-}
+
 {-
 Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
 
@@ -76,48 +78,188 @@ import Codec.Archive.Zip
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Builder (text, toList)
-import Text.Pandoc.Generic (bottomUp)
-import Text.Pandoc.MIME (getMimeType)
-import Text.Pandoc.UTF8 (toString)
+import Text.Pandoc.Walk
 import Text.Pandoc.Readers.Docx.Parse
 import Text.Pandoc.Readers.Docx.Lists
-import Data.Maybe (mapMaybe, isJust, fromJust)
-import Data.List (delete, isPrefixOf, (\\), intersect)
-import qualified Data.ByteString as BS
+import Text.Pandoc.Readers.Docx.Reducible
+import Text.Pandoc.Shared
+import Text.Pandoc.MediaBag (insertMedia, MediaBag)
+import Data.Maybe (mapMaybe)
+import Data.List (delete, stripPrefix, (\\), intersect)
+import Data.Monoid
+import Text.TeXMath (writeTeX)
+import Data.Default (Default)
 import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Base64 (encode)
-import System.FilePath (combine)
+import qualified Data.Map as M
+import Control.Monad.Reader
+import Control.Monad.State
+import Control.Applicative ((<$>))
 
 readDocx :: ReaderOptions
          -> B.ByteString
-         -> Pandoc
+         -> (Pandoc, MediaBag)
 readDocx opts bytes =
   case archiveToDocx (toArchive bytes) of
-    Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
-    Nothing   -> error $ "couldn't parse docx file"
-
-runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)])
-runStyleToSpanAttr rPr = ("",
-                          mapMaybe id [
-                            if isBold rPr then (Just "strong") else Nothing,
-                            if isItalic rPr then (Just "emph") else Nothing,
-                            if isSmallCaps rPr then (Just "smallcaps") else Nothing,
-                            if isStrike rPr then (Just "strike") else Nothing,
-                            if isSuperScript rPr then (Just "superscript") else Nothing,
-                            if isSubScript rPr then (Just "subscript") else Nothing,
-                            rStyle rPr],
-                          case underline rPr of
-                            Just fmt -> [("underline", fmt)]
-                            _        -> []
-                         )
-
-parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)])
-parStyleToDivAttr pPr = ("",
-                          pStyle pPr,
-                          case indent pPr of
-                            Just n  -> [("indent", (show n))]
-                            Nothing -> []
-                         )
+    Right docx -> (Pandoc meta blks, mediaBag) where
+      (meta, blks, mediaBag) = (docxToOutput opts docx)
+    Left _   -> error $ "couldn't parse docx file"
+
+data DState = DState { docxAnchorMap :: M.Map String String
+                     , docxMediaBag      :: MediaBag }
+
+instance Default DState where
+  def = DState { docxAnchorMap = M.empty
+               , docxMediaBag  = mempty }
+
+data DEnv = DEnv { docxOptions  :: ReaderOptions
+                 , docxInHeaderBlock :: Bool }
+
+instance Default DEnv where
+  def = DEnv def False
+
+type DocxContext = ReaderT DEnv (State DState)
+
+evalDocxContext :: DocxContext a -> DEnv -> DState -> a
+evalDocxContext ctx env st = evalState (runReaderT ctx env) st
+
+concatMapM        :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+concatMapM f xs   =  liftM concat (mapM f xs)
+
+-- This is empty, but we put it in for future-proofing.
+spansToKeep :: [String]
+spansToKeep = []
+
+divsToKeep :: [String]
+divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
+
+metaStyles :: M.Map String String
+metaStyles = M.fromList [ ("Title", "title")
+                        , ("Subtitle", "subtitle")
+                        , ("Author", "author")
+                        , ("Date", "date")
+                        , ("Abstract", "abstract")]
+
+sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
+sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp))
+
+isMetaPar :: BodyPart -> Bool
+isMetaPar (Paragraph pPr _) =
+  not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
+isMetaPar _ = False
+
+isEmptyPar :: BodyPart -> Bool
+isEmptyPar (Paragraph _ parParts) =
+  all isEmptyParPart parParts
+  where
+    isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
+    isEmptyParPart _ = False
+    isEmptyElem (TextRun s) = trim s == ""
+    isEmptyElem _           = True
+isEmptyPar _ = False
+
+bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue)
+bodyPartsToMeta' [] = return M.empty
+bodyPartsToMeta' (bp : bps)
+  | (Paragraph pPr parParts) <- bp
+  , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
+  , (Just metaField) <- M.lookup c metaStyles = do
+    inlines <- parPartsToInlines parParts
+    remaining <- bodyPartsToMeta' bps
+    let
+      f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
+      f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks)
+      f m (MetaList mv) = MetaList (m : mv)
+      f m n             = MetaList [m, n]
+    return $ M.insertWith f metaField (MetaInlines inlines) remaining
+bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
+
+bodyPartsToMeta :: [BodyPart] -> DocxContext Meta
+bodyPartsToMeta bps = do
+  mp <- bodyPartsToMeta' bps
+  let mp' =
+        case M.lookup "author" mp of
+          Just mv -> M.insert "author" (fixAuthors mv) mp
+          Nothing -> mp
+  return $ Meta mp'
+
+fixAuthors :: MetaValue -> MetaValue
+fixAuthors (MetaBlocks blks) =
+  MetaList $ map g $ filter f blks
+    where f (Para _) = True
+          f _          = False
+          g (Para ils) = MetaInlines ils
+          g _          = MetaInlines []
+fixAuthors mv = mv
+
+runStyleToContainers :: RunStyle -> [Container Inline]
+runStyleToContainers rPr =
+  let spanClassToContainers :: String -> [Container Inline]
+      spanClassToContainers s | s `elem` codeSpans =
+        [Container $ (\ils -> Code ("", [], []) (concatMap ilToCode ils))]
+      spanClassToContainers s | s `elem` spansToKeep =
+        [Container $ Span ("", [s], [])]
+      spanClassToContainers _ = []
+
+      classContainers = case rStyle rPr of
+        Nothing -> []
+        Just s  -> spanClassToContainers s
+
+      formatters = map Container $ mapMaybe id
+                   [ if isBold rPr then (Just Strong) else Nothing
+                   , if isItalic rPr then (Just Emph) else Nothing
+                   , if isSmallCaps rPr then (Just SmallCaps) else Nothing
+                   , if isStrike rPr then (Just Strikeout) else Nothing
+                   , if isSuperScript rPr then (Just Superscript) else Nothing
+                   , if isSubScript rPr then (Just Subscript) else Nothing
+                   , rUnderline rPr >>=
+                     (\f -> if f == "single" then (Just Emph) else Nothing)
+                 ]
+  in
+   classContainers ++ formatters
+
+parStyleToContainers :: ParagraphStyle -> [Container Block]
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, Just n <- isHeaderClass c =
+  [Container $ \_ -> Header n ("", delete ("Heading" ++ show n) cs, []) []]
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` divsToKeep =
+  let pPr' = pPr { pStyle = cs }
+  in
+   (Container $ Div ("", [c], [])) : (parStyleToContainers pPr')
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` codeDivs =
+  -- This is a bit of a cludge. We make the codeblock from the raw
+  -- parparts in bodyPartToBlocks. But we need something to match against.
+  let pPr' = pPr { pStyle = cs }
+  in
+   (Container $ \_ -> CodeBlock ("", [], []) "") : (parStyleToContainers pPr')
+parStyleToContainers pPr | (c:cs) <- pStyle pPr,  c `elem` listParagraphDivs =
+  let pPr' = pPr { pStyle = cs, indentation = Nothing}
+  in
+   (Container $ Div ("", [c], [])) : (parStyleToContainers pPr')
+
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` blockQuoteDivs =
+  let pPr' = pPr { pStyle = cs \\ blockQuoteDivs }
+  in
+   (Container BlockQuote) : (parStyleToContainers pPr')
+parStyleToContainers pPr | (_:cs) <- pStyle pPr =
+  let pPr' = pPr { pStyle = cs}
+  in
+    parStyleToContainers pPr'
+parStyleToContainers pPr | null (pStyle pPr),
+                          Just left <- indentation pPr >>= leftParIndent,
+                          Just hang <- indentation pPr >>= hangingParIndent =
+  let pPr' = pPr { indentation = Nothing }
+  in
+   case (left - hang) > 0 of
+     True -> (Container BlockQuote) : (parStyleToContainers pPr')
+     False -> parStyleToContainers pPr'
+parStyleToContainers pPr | null (pStyle pPr),
+                          Just left <- indentation pPr >>= leftParIndent =
+  let pPr' = pPr { indentation = Nothing }
+  in
+   case left > 0 of
+     True -> (Container BlockQuote) : (parStyleToContainers pPr')
+     False -> parStyleToContainers pPr'
+parStyleToContainers _ = []
+
 
 strToInlines :: String -> [Inline]
 strToInlines = toList . text
@@ -144,115 +286,103 @@ runElemToString (Tab) = ['\t']
 runElemsToString :: [RunElem] -> String
 runElemsToString = concatMap runElemToString
 
---- We use this instead of the more general
---- Text.Pandoc.Shared.normalize for reasons of efficiency. For
---- whatever reason, `normalize` makes a run take almost twice as
---- long. (It does more, but this does what we need)
-inlineNormalize :: [Inline] -> [Inline]
-inlineNormalize [] = []
-inlineNormalize (Str "" : ils) = inlineNormalize ils
-inlineNormalize ((Str s) : (Str s') : l) =
-  inlineNormalize (Str (s++s') : l)
-inlineNormalize ((Emph ils) : (Emph ils') : l) =
-  inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Emph ils) : l) =
-  Emph (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Strong ils) : (Strong ils') : l) = 
-  inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Strong ils) : l) =
-  Strong (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = 
-  inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Strikeout ils) : l) =
-  Strikeout (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Superscript ils) : (Superscript ils') : l) = 
-  inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Superscript ils) : l) =
-  Superscript (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Subscript ils) : (Subscript ils') : l) = 
-  inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Subscript ils) : l) =
-  Subscript (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Space : Space : l)) =
-  inlineNormalize $ (Space : l)
-inlineNormalize ((Quoted qt ils) : l) =
-  Quoted qt (inlineNormalize ils) : inlineNormalize l
-inlineNormalize ((Cite cits ils) : l) =
-  let
-    f :: Citation -> Citation
-    f (Citation s pref suff mode num hash) =
-      Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash
-  in
-   Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Link ils s) : l) =
-  Link (inlineNormalize ils) s : (inlineNormalize l)
-inlineNormalize ((Image ils s) : l) =
-  Image (inlineNormalize ils) s : (inlineNormalize l)
-inlineNormalize ((Note blks) : l) =
-  Note (map blockNormalize blks) : (inlineNormalize l)
-inlineNormalize ((Span attr ils) : l) =
-  Span attr (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize (il : l) = il : (inlineNormalize l)
-
-stripSpaces :: [Inline] -> [Inline]
-stripSpaces ils =
-  reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils
-
-blockNormalize :: Block -> Block
-blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils
-blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils
-blockNormalize (Header n attr ils) =
-  Header n attr $ stripSpaces $ inlineNormalize ils
-blockNormalize (Table ils align width hdr cells) =
-  Table (stripSpaces $ inlineNormalize ils) align width hdr cells
-blockNormalize (DefinitionList pairs) =
-  DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs
-blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks)
-blockNormalize (OrderedList attr blkslst) =
-  OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst
-blockNormalize (BulletList blkslst) =
-  BulletList $ map (\blks -> map blockNormalize blks) blkslst
-blockNormalize (Div attr blks) = Div attr (map blockNormalize blks)
-blockNormalize blk = blk
-
-runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
-runToInlines _ _ (Run rs runElems)
-  | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
-    case runStyleToSpanAttr rs == ("", [], []) of
-      True -> [Str (runElemsToString runElems)]
-      False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]]
-  | otherwise = case runStyleToSpanAttr rs == ("", [], []) of
-      True -> concatMap runElemToInlines runElems
-      False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)]
-runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) =
-  case (getFootNote fnId notes) of
-    Just bodyParts ->
-      [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
-    Nothing        ->
-      [Note [Div ("", ["footnote"], []) []]]
-runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
-  case (getEndNote fnId notes) of
-    Just bodyParts ->
-      [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
-    Nothing        ->
-      [Note [Div ("", ["endnote"], []) []]]
-
-parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
-parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
-parPartToInlines _ _ (BookMark _ anchor) =
-  [Span (anchor, ["anchor"], []) []]
-parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
-  case lookupRelationship relid rels of
-    Just target -> [Image [] (combine "word" target, "")]
-    Nothing     -> [Image [] ("", "")]
-parPartToInlines opts docx (InternalHyperLink anchor runs) =
-  [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")]
-parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) =
-  case lookupRelationship relid rels of
-    Just target ->
-      [Link (concatMap (runToInlines opts docx) runs) (target, "")]
-    Nothing ->
-      [Link (concatMap (runToInlines opts docx) runs) ("", "")]
+runToString :: Run -> String
+runToString (Run _ runElems) = runElemsToString runElems
+runToString _ = ""
+
+parPartToString :: ParPart -> String
+parPartToString (PlainRun run) = runToString run
+parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
+parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
+parPartToString _ = ""
+
+
+inlineCodeContainer :: Container Inline -> Bool
+inlineCodeContainer (Container f) = case f [] of
+  Code _ "" -> True
+  _         -> False
+inlineCodeContainer _ = False
+
+
+runToInlines :: Run -> DocxContext [Inline]
+runToInlines (Run rs runElems)
+  | any inlineCodeContainer (runStyleToContainers rs) =
+      return $
+      rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems]
+  | otherwise =
+      return $
+      rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems)
+runToInlines (Footnote bps) =
+  concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
+runToInlines (Endnote bps) =
+  concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
+runToInlines (InlineDrawing fp bs) = do
+  mediaBag <- gets docxMediaBag
+  modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
+  return [Image [] (fp, "")]
+
+
+
+
+parPartToInlines :: ParPart -> DocxContext [Inline]
+parPartToInlines (PlainRun r) = runToInlines r
+parPartToInlines (Insertion _ author date runs) = do
+  opts <- asks docxOptions
+  case readerTrackChanges opts of
+    AcceptChanges -> concatMapM runToInlines runs >>= return
+    RejectChanges -> return []
+    AllChanges    -> do
+      ils <- (concatMapM runToInlines runs)
+      return [Span
+              ("", ["insertion"], [("author", author), ("date", date)])
+              ils]
+parPartToInlines (Deletion _ author date runs) = do
+  opts <- asks docxOptions
+  case readerTrackChanges opts of
+    AcceptChanges -> return []
+    RejectChanges -> concatMapM runToInlines runs >>= return
+    AllChanges    -> do
+      ils <- concatMapM runToInlines runs
+      return [Span
+              ("", ["deletion"], [("author", author), ("date", date)])
+              ils]
+parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return []
+parPartToInlines (BookMark _ anchor) =
+  -- We record these, so we can make sure not to overwrite
+  -- user-defined anchor links with header auto ids.
+  do
+    -- get whether we're in a header.
+    inHdrBool <- asks docxInHeaderBlock
+    -- Get the anchor map.
+    anchorMap <- gets docxAnchorMap
+    -- We don't want to rewrite if we're in a header, since we'll take
+    -- care of that later, when we make the header anchor. If the
+    -- bookmark were already in uniqueIdent form, this would lead to a
+    -- duplication. Otherwise, we check to see if the id is already in
+    -- there. Rewrite if necessary. This will have the possible effect
+    -- of rewriting user-defined anchor links. However, since these
+    -- are not defined in pandoc, it seems like a necessary evil to
+    -- avoid an extra pass.
+    let newAnchor =
+          if not inHdrBool && anchor `elem` (M.elems anchorMap)
+          then uniqueIdent [Str anchor] (M.elems anchorMap)
+          else anchor
+    unless inHdrBool
+      (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
+    return [Span (newAnchor, ["anchor"], []) []]
+parPartToInlines (Drawing fp bs) = do
+  mediaBag <- gets docxMediaBag
+  modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
+  return [Image [] (fp, "")]
+parPartToInlines (InternalHyperLink anchor runs) = do
+  ils <- concatMapM runToInlines runs
+  return [Link ils ('#' : anchor, "")]
+parPartToInlines (ExternalHyperLink target runs) = do
+  ils <- concatMapM runToInlines runs
+  return [Link ils (target, "")]
+parPartToInlines (PlainOMath exps) = do
+  return [Math InlineMath (writeTeX exps)]
+
 
 isAnchorSpan :: Inline -> Bool
 isAnchorSpan (Span (ident, classes, kvs) ils) =
@@ -265,74 +395,106 @@ isAnchorSpan _ = False
 dummyAnchors :: [String]
 dummyAnchors = ["_GoBack"]
 
-makeHeaderAnchors :: Block -> Block
-makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
-  case filter isAnchorSpan ils of
-    []   -> h
-    (x@(Span (ident, _, _) _) : xs) ->
-      case ident `elem` dummyAnchors of
-        True -> h
-        False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
-    _ -> h
-makeHeaderAnchors blk = blk
-
-
-parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline]
-parPartsToInlines opts docx parparts =
-  --
-  -- We're going to skip data-uri's for now. It should be an option,
-  -- not mandatory.
-  --
-  (if False -- TODO depend on option
-      then bottomUp (makeImagesSelfContained docx)
-      else id) $
-  bottomUp spanTrim $
-  bottomUp spanCorrect $
-  bottomUp spanReduce $
-  concatMap (parPartToInlines opts docx) parparts
-
-cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block]
-cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps
-
-rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
-rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
-
-bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block
-bodyPartToBlock opts docx (Paragraph pPr parparts) =
-  Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)]
-bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
+makeHeaderAnchor :: Block -> DocxContext Block
+-- If there is an anchor already there (an anchor span in the header,
+-- to be exact), we rename and associate the new id with the old one.
+makeHeaderAnchor (Header n (_, classes, kvs) ils)
+  | (x : xs) <- filter isAnchorSpan ils
+  , (Span (ident, _, _) _) <- x
+  , notElem ident dummyAnchors =
+    do
+      hdrIDMap <- gets docxAnchorMap
+      let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+      modify $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap}
+      return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs))
+-- Otherwise we just give it a name, and register that name (associate
+-- it with itself.)
+makeHeaderAnchor (Header n (_, classes, kvs) ils) =
+  do
+    hdrIDMap <- gets docxAnchorMap
+    let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+    modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
+    return $ Header n (newIdent, classes, kvs) ils
+makeHeaderAnchor blk = return blk
+
+
+parPartsToInlines :: [ParPart] -> DocxContext [Inline]
+parPartsToInlines parparts = do
+  ils <- concatMapM parPartToInlines parparts
+  return $ reduceList $ ils
+
+cellToBlocks :: Cell -> DocxContext [Block]
+cellToBlocks (Cell bps) = concatMapM bodyPartToBlocks bps
+
+rowToBlocksList :: Row -> DocxContext [[Block]]
+rowToBlocksList (Row cells) = mapM cellToBlocks cells
+
+isBlockCodeContainer :: Container Block -> Bool
+isBlockCodeContainer (Container f) | CodeBlock _ _ <- f [] = True
+isBlockCodeContainer _ = False
+
+isHeaderContainer :: Container Block -> Bool
+isHeaderContainer (Container f) | Header _ _ _ <- f [] = True
+isHeaderContainer _ = False
+
+bodyPartToBlocks :: BodyPart -> DocxContext [Block]
+bodyPartToBlocks (Paragraph pPr parparts)
+  | any isBlockCodeContainer (parStyleToContainers pPr) =
+    let
+      otherConts = filter (not . isBlockCodeContainer) (parStyleToContainers pPr)
+    in
+     return $
+     rebuild
+     otherConts
+     [CodeBlock ("", [], []) (concatMap parPartToString parparts)]
+bodyPartToBlocks (Paragraph pPr parparts)
+  | any isHeaderContainer (parStyleToContainers pPr) = do
+    ils <- normalizeSpaces <$> local (\s -> s{docxInHeaderBlock = True})
+                                (parPartsToInlines parparts)
+    let (Container hdrFun) = head $ filter isHeaderContainer (parStyleToContainers pPr)
+        Header n attr _ = hdrFun []
+    hdr <- makeHeaderAnchor $ Header n attr ils
+    return [hdr]
+bodyPartToBlocks (Paragraph pPr parparts) = do
+  ils <- parPartsToInlines parparts >>= (return . normalizeSpaces)
+  case ils of
+    [] -> return []
+    _ -> do
+      return $
+       rebuild
+       (parStyleToContainers pPr)
+       [Para ils]
+bodyPartToBlocks (ListItem pPr numId lvl levelInfo parparts) = do
   let
-    kvs = case lookupLevel numId lvl numbering of
-      Just (_, fmt, txt, Just start) -> [ ("level", lvl)
-                                        , ("num-id", numId)
-                                        , ("format", fmt)
-                                        , ("text", txt)
-                                        , ("start", (show start))
-                                        ]
-
-      Just (_, fmt, txt, Nothing)    -> [ ("level", lvl)
-                                        , ("num-id", numId)
-                                        , ("format", fmt)
-                                        , ("text", txt)
-                                        ]
-      Nothing                        -> []
-  in
-   Div
-   ("", ["list-item"], kvs)
-   [bodyPartToBlock opts docx (Paragraph pPr parparts)]
-bodyPartToBlock _ _ (Tbl _ _ _ []) =
-  Para []
-bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
+    kvs = case levelInfo of
+      (_, fmt, txt, Just start) -> [ ("level", lvl)
+                                   , ("num-id", numId)
+                                   , ("format", fmt)
+                                   , ("text", txt)
+                                   , ("start", (show start))
+                                   ]
+
+      (_, fmt, txt, Nothing)    -> [ ("level", lvl)
+                                   , ("num-id", numId)
+                                   , ("format", fmt)
+                                   , ("text", txt)
+                                   ]
+  blks <- bodyPartToBlocks (Paragraph pPr parparts)
+  return $ [Div ("", ["list-item"], kvs) blks]
+bodyPartToBlocks (Tbl _ _ _ []) =
+  return [Para []]
+bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
   let caption = strToInlines cap
       (hdr, rows) = case firstRowFormatting look of
         True -> (Just r, rs)
         False -> (Nothing, r:rs)
-      hdrCells = case hdr of
-        Just r' -> rowToBlocksList opts docx r'
-        Nothing -> []
-      cells = map (rowToBlocksList opts docx) rows
+  hdrCells <- case hdr of
+    Just r' -> rowToBlocksList r'
+    Nothing -> return []
 
-      size = case null hdrCells of
+  cells <- mapM rowToBlocksList rows
+
+  let size = case null hdrCells of
         True -> length $ head cells
         False -> length $ hdrCells
       --
@@ -341,208 +503,49 @@ bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
       -- moment. Width information is in the TblGrid field of the Tbl,
       -- so should be possible. Alignment might be more difficult,
       -- since there doesn't seem to be a column entity in docx.
-      alignments = take size (repeat AlignDefault)
-      widths = take size (repeat 0) :: [Double]
-  in
-   Table caption alignments widths hdrCells cells
-
-makeImagesSelfContained :: Docx -> Inline -> Inline
-makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) =
-  case lookup uri media of
-    Just bs -> case getMimeType uri of
-      Just mime ->  let data_uri =
-                          "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs)
-                    in
-                     Image alt (data_uri, title)
-      Nothing  -> i
-    Nothing -> i
-makeImagesSelfContained _ inline = inline
-
-bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
-bodyToBlocks opts docx (Body bps) =
-  bottomUp removeEmptyPars $
-  map blockNormalize $
-  bottomUp spanRemove $
-  bottomUp divRemove $
-  map (makeHeaderAnchors) $
-  bottomUp divCorrect $
-  bottomUp divReduce $
-  bottomUp divCorrectPreReduce $
-  bottomUp blocksToDefinitions $
-  blocksToBullets $
-  map (bodyPartToBlock opts docx) bps
-
-docxToBlocks :: ReaderOptions -> Docx -> [Block]
-docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
-
-spanReduce :: [Inline] -> [Inline]
-spanReduce [] = []
-spanReduce ((Span (id1, classes1, kvs1) ils1) : ils)
-  | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils)
-spanReduce (s1@(Span (id1, classes1, kvs1) ils1) :
-            s2@(Span (id2, classes2, kvs2) ils2) :
-            ils) =
-  let classes'  = classes1 `intersect` classes2
-      kvs'      = kvs1 `intersect` kvs2
-      classes1' = classes1 \\ classes'
-      kvs1'     = kvs1 \\ kvs'
-      classes2' = classes2 \\ classes'
-      kvs2'     = kvs2 \\ kvs'
-  in
-   case null classes' && null kvs' of
-     True -> s1 : (spanReduce (s2 : ils))
-     False -> let attr'  = ("", classes', kvs')
-                  attr1' = (id1, classes1', kvs1')
-                  attr2' = (id2, classes2', kvs2')
-              in
-               spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] :
-                           ils)
-spanReduce (il:ils) = il : (spanReduce ils)
+      alignments = replicate size AlignDefault
+      widths = replicate size 0 :: [Double]
+
+  return [Table caption alignments widths hdrCells cells]
+bodyPartToBlocks (OMathPara e) = do
+  return [Para [Math DisplayMath (writeTeX e)]]
+
+
+-- replace targets with generated anchors.
+rewriteLink :: Inline -> DocxContext Inline
+rewriteLink l@(Link ils ('#':target, title)) = do
+  anchorMap <- gets docxAnchorMap
+  return $ case M.lookup target anchorMap of
+    Just newTarget -> (Link ils ('#':newTarget, title))
+    Nothing        -> l
+rewriteLink il = return il
+
+bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
+bodyToOutput (Body bps) = do
+  let (metabps, blkbps) = sepBodyParts bps
+  meta <- bodyPartsToMeta metabps
+  blks <- concatMapM bodyPartToBlocks blkbps >>=
+          walkM rewriteLink
+  mediaBag <- gets docxMediaBag
+  return $ (meta,
+            blocksToDefinitions $ blocksToBullets $ blks,
+            mediaBag)
+
+docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput opts (Docx (Document _ body)) =
+  let dEnv   = def { docxOptions  = opts} in
+   evalDocxContext (bodyToOutput body) dEnv def
+
 
 ilToCode :: Inline -> String
 ilToCode (Str s) = s
-ilToCode _ = ""
-
-spanRemove' :: Inline -> [Inline]
-spanRemove' s@(Span (ident, classes, _) [])
-  -- "_GoBack" is automatically inserted. We don't want to keep it.
-  | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s]
-spanRemove' (Span (_, _, kvs) ils) =
-  case lookup "underline" kvs of
-    Just val -> [Span ("", [], [("underline", val)]) ils]
-    Nothing  -> ils
-spanRemove' il = [il]
-
-spanRemove :: [Inline] -> [Inline]
-spanRemove = concatMap spanRemove'
-
-spanTrim' :: Inline -> [Inline]
-spanTrim' il@(Span _ []) = [il]
-spanTrim' il@(Span attr (il':[]))
-  | il' == Space = [Span attr [], Space]
-  | otherwise = [il]
-spanTrim' (Span attr ils)
-  | head ils == Space && last ils == Space =
-    [Space, Span attr (init $ tail ils), Space]
-  | head ils == Space = [Space, Span attr (tail ils)]
-  | last ils == Space = [Span attr (init ils), Space]
-spanTrim' il = [il]
-
-spanTrim :: [Inline] -> [Inline]
-spanTrim = concatMap spanTrim'
-
-spanCorrect' :: Inline -> [Inline]
-spanCorrect' (Span ("", [], []) ils) = ils
-spanCorrect' (Span (ident, classes, kvs) ils)
-  | "emph" `elem` classes =
-    [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils]
-  | "strong" `elem` classes =
-      [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils]
-  | "smallcaps" `elem` classes =
-      [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils]
-  | "strike" `elem` classes =
-      [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils]
-  | "superscript" `elem` classes =
-      [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils]
-  | "subscript" `elem` classes =
-      [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils]
-  | (not . null) (codeSpans `intersect` classes) =
-         [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)]
-  | otherwise =
-      [Span (ident, classes, kvs) ils]
-spanCorrect' il = [il]
-
-spanCorrect :: [Inline] -> [Inline]
-spanCorrect = concatMap spanCorrect'
-
-removeEmptyPars :: [Block] -> [Block]
-removeEmptyPars blks = filter (\b -> b /= (Para [])) blks
-
-divReduce :: [Block] -> [Block]
-divReduce [] = []
-divReduce ((Div (id1, classes1, kvs1) blks1) : blks)
-  | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks)
-divReduce (d1@(Div (id1, classes1, kvs1) blks1) :
-           d2@(Div (id2, classes2, kvs2) blks2) :
-            blks) =
-  let classes'  = classes1 `intersect` classes2
-      kvs'      = kvs1 `intersect` kvs2
-      classes1' = classes1 \\ classes'
-      kvs1'     = kvs1 \\ kvs'
-      classes2' = classes2 \\ classes'
-      kvs2'     = kvs2 \\ kvs'
-  in
-   case null classes' && null kvs' of
-     True -> d1 : (divReduce (d2 : blks))
-     False -> let attr'  = ("", classes', kvs')
-                  attr1' = (id1, classes1', kvs1')
-                  attr2' = (id2, classes2', kvs2')
-              in
-               divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] :
-                           blks)
-divReduce (blk:blks) = blk : (divReduce blks)
+ilToCode Space = " "
+ilToCode _     = ""
 
 isHeaderClass :: String -> Maybe Int
-isHeaderClass s | "Heading" `isPrefixOf` s =
-  case reads (drop (length "Heading") s) :: [(Int, String)] of
+isHeaderClass s | Just s' <- stripPrefix "Heading" s =
+  case reads s' :: [(Int, String)] of
     [] -> Nothing
     ((n, "") : []) -> Just n
     _       -> Nothing
 isHeaderClass _ = Nothing
-
-findHeaderClass :: [String] -> Maybe Int
-findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of
-  [] -> Nothing
-  n : _ -> Just n
-
-blksToInlines :: [Block] -> [Inline]
-blksToInlines (Para ils : _) = ils
-blksToInlines (Plain ils : _) = ils
-blksToInlines _ = []
-
-divCorrectPreReduce' :: Block -> [Block]
-divCorrectPreReduce' (Div (ident, classes, kvs) blks)
-  | isJust $ findHeaderClass classes =
-    let n = fromJust $ findHeaderClass classes
-    in
-    [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)]
-  | otherwise = [Div (ident, classes, kvs) blks]
-divCorrectPreReduce' blk = [blk]
-
-divCorrectPreReduce :: [Block] -> [Block]
-divCorrectPreReduce = concatMap divCorrectPreReduce'
-
-blkToCode :: Block -> String
-blkToCode (Para []) = ""
-blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils))
-blkToCode (Para ((Span (_, classes, _) ils'): ils))
-  | (not . null) (codeSpans `intersect` classes) =
-    (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
-blkToCode _ = ""
-
-divRemove' :: Block -> [Block]
-divRemove' (Div (_, _, kvs) blks) =
-  case lookup "indent" kvs of
-    Just val -> [Div ("", [], [("indent", val)]) blks]
-    Nothing  -> blks
-divRemove' blk = [blk]
-
-divRemove :: [Block] -> [Block]
-divRemove = concatMap divRemove'
-
-divCorrect' :: Block -> [Block]
-divCorrect' b@(Div (ident, classes, kvs) blks)
-  | (not . null) (blockQuoteDivs `intersect` classes) =
-    [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]]
-  | (not . null) (codeDivs `intersect` classes) =
-    [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)]
-  | otherwise =
-      case lookup "indent" kvs of
-        Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]
-        Just _   ->
-          [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]]
-        Nothing  -> [b]
-divCorrect' blk = [blk]
-
-divCorrect :: [Block] -> [Block]
-divCorrect = concatMap divCorrect'
diff --git a/src/Text/Pandoc/Readers/Docx/Fonts.hs b/src/Text/Pandoc/Readers/Docx/Fonts.hs
new file mode 100644
index 000000000..b44c71412
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Fonts.hs
@@ -0,0 +1,238 @@
+{-
+Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Docx.Fonts
+   Copyright   : Copyright (C) 2014 Matthew Pickering
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Matthew Pickering <matthewtpickering@gmail.com>
+   Stability   : alpha
+   Portability : portable
+
+Utilities to convert between font codepoints and unicode characters.
+-}
+module Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) where
+
+
+-- | Enumeration of recognised fonts
+data Font = Symbol -- ^ <http://en.wikipedia.org/wiki/Symbol_(typeface) Adobe Symbol>
+          deriving (Show, Eq)
+
+-- | Given a font and codepoint, returns the corresponding unicode
+-- character
+getUnicode :: Font -> Char -> Maybe Char
+getUnicode Symbol c = lookup c symbol
+
+-- Generated from lib/fonts/symbol.txt
+symbol :: [(Char, Char)]
+symbol =
+  [ (' ',' ')
+  , (' ','\160')
+  , ('!','!')
+  , ('"','\8704')
+  , ('#','#')
+  , ('$','\8707')
+  , ('%','%')
+  , ('&','&')
+  , ('\'','\8715')
+  , ('(','(')
+  , (')',')')
+  , ('*','\8727')
+  , ('+','+')
+  , (',',',')
+  , ('-','\8722')
+  , ('.','.')
+  , ('/','/')
+  , ('0','0')
+  , ('1','1')
+  , ('2','2')
+  , ('3','3')
+  , ('4','4')
+  , ('5','5')
+  , ('6','6')
+  , ('7','7')
+  , ('8','8')
+  , ('9','9')
+  , (':',':')
+  , (';',';')
+  , ('<','<')
+  , ('=','=')
+  , ('>','>')
+  , ('?','?')
+  , ('@','\8773')
+  , ('A','\913')
+  , ('B','\914')
+  , ('C','\935')
+  , ('D','\916')
+  , ('D','\8710')
+  , ('E','\917')
+  , ('F','\934')
+  , ('G','\915')
+  , ('H','\919')
+  , ('I','\921')
+  , ('J','\977')
+  , ('K','\922')
+  , ('L','\923')
+  , ('M','\924')
+  , ('N','\925')
+  , ('O','\927')
+  , ('P','\928')
+  , ('Q','\920')
+  , ('R','\929')
+  , ('S','\931')
+  , ('T','\932')
+  , ('U','\933')
+  , ('V','\962')
+  , ('W','\937')
+  , ('W','\8486')
+  , ('X','\926')
+  , ('Y','\936')
+  , ('Z','\918')
+  , ('[','[')
+  , ('\\','\8756')
+  , (']',']')
+  , ('^','\8869')
+  , ('_','_')
+  , ('`','\63717')
+  , ('a','\945')
+  , ('b','\946')
+  , ('c','\967')
+  , ('d','\948')
+  , ('e','\949')
+  , ('f','\966')
+  , ('g','\947')
+  , ('h','\951')
+  , ('i','\953')
+  , ('j','\981')
+  , ('k','\954')
+  , ('l','\955')
+  , ('m','\181')
+  , ('m','\956')
+  , ('n','\957')
+  , ('o','\959')
+  , ('p','\960')
+  , ('q','\952')
+  , ('r','\961')
+  , ('s','\963')
+  , ('t','\964')
+  , ('u','\965')
+  , ('v','\982')
+  , ('w','\969')
+  , ('x','\958')
+  , ('y','\968')
+  , ('z','\950')
+  , ('{','{')
+  , ('|','|')
+  , ('}','}')
+  , ('~','\8764')
+  , ('\160','\8364')
+  , ('\161','\978')
+  , ('\162','\8242')
+  , ('\163','\8804')
+  , ('\164','\8260')
+  , ('\164','\8725')
+  , ('\165','\8734')
+  , ('\166','\402')
+  , ('\167','\9827')
+  , ('\168','\9830')
+  , ('\169','\9829')
+  , ('\170','\9824')
+  , ('\171','\8596')
+  , ('\172','\8592')
+  , ('\173','\8593')
+  , ('\174','\8594')
+  , ('\175','\8595')
+  , ('\176','\176')
+  , ('\177','\177')
+  , ('\178','\8243')
+  , ('\179','\8805')
+  , ('\180','\215')
+  , ('\181','\8733')
+  , ('\182','\8706')
+  , ('\183','\8226')
+  , ('\184','\247')
+  , ('\185','\8800')
+  , ('\186','\8801')
+  , ('\187','\8776')
+  , ('\188','\8230')
+  , ('\189','\63718')
+  , ('\190','\63719')
+  , ('\191','\8629')
+  , ('\192','\8501')
+  , ('\193','\8465')
+  , ('\194','\8476')
+  , ('\195','\8472')
+  , ('\196','\8855')
+  , ('\197','\8853')
+  , ('\198','\8709')
+  , ('\199','\8745')
+  , ('\200','\8746')
+  , ('\201','\8835')
+  , ('\202','\8839')
+  , ('\203','\8836')
+  , ('\204','\8834')
+  , ('\205','\8838')
+  , ('\206','\8712')
+  , ('\207','\8713')
+  , ('\208','\8736')
+  , ('\209','\8711')
+  , ('\210','\63194')
+  , ('\211','\63193')
+  , ('\212','\63195')
+  , ('\213','\8719')
+  , ('\214','\8730')
+  , ('\215','\8901')
+  , ('\216','\172')
+  , ('\217','\8743')
+  , ('\218','\8744')
+  , ('\219','\8660')
+  , ('\220','\8656')
+  , ('\221','\8657')
+  , ('\222','\8658')
+  , ('\223','\8659')
+  , ('\224','\9674')
+  , ('\225','\9001')
+  , ('\226','\63720')
+  , ('\227','\63721')
+  , ('\228','\63722')
+  , ('\229','\8721')
+  , ('\230','\63723')
+  , ('\231','\63724')
+  , ('\232','\63725')
+  , ('\233','\63726')
+  , ('\234','\63727')
+  , ('\235','\63728')
+  , ('\236','\63729')
+  , ('\237','\63730')
+  , ('\238','\63731')
+  , ('\239','\63732')
+  , ('\241','\9002')
+  , ('\242','\8747')
+  , ('\243','\8992')
+  , ('\244','\63733')
+  , ('\245','\8993')
+  , ('\246','\63734')
+  , ('\247','\63735')
+  , ('\248','\63736')
+  , ('\249','\63737')
+  , ('\250','\63738')
+  , ('\251','\63739')
+  , ('\252','\63740')
+  , ('\253','\63741')
+  , ('\254','\63742')]
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index 68559d98b..ea195c14a 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -29,9 +29,12 @@ Functions for converting flat docx paragraphs into nested lists.
 -}
 
 module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
-                                      , blocksToDefinitions) where
+                                      , blocksToDefinitions
+                                      , listParagraphDivs
+                                      ) where
 
 import Text.Pandoc.JSON
+import Text.Pandoc.Generic (bottomUp)
 import Text.Pandoc.Shared (trim)
 import Control.Monad
 import Data.List
@@ -118,7 +121,7 @@ handleListParagraphs (
       in
        handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
 handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
-                  
+
 separateBlocks' :: Block -> [[Block]] -> [[Block]]
 separateBlocks' blk ([] : []) = [[blk]]
 separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
@@ -136,7 +139,7 @@ flatToBullets' :: Integer -> [Block] -> [Block]
 flatToBullets' _ [] = []
 flatToBullets' num xs@(b : elems)
   | getLevelN b == num = b : (flatToBullets' num elems)
-  | otherwise = 
+  | otherwise =
     let bNumId = getNumIdN b
         bLevel = getLevelN b
         (children, remaining) =
@@ -159,10 +162,9 @@ flatToBullets elems = flatToBullets' (-1) elems
 
 blocksToBullets :: [Block] -> [Block]
 blocksToBullets blks =
-  -- bottomUp removeListItemDivs $ 
+  bottomUp removeListDivs $
   flatToBullets $ (handleListParagraphs blks)
 
-
 plainParaInlines :: Block -> [Inline]
 plainParaInlines (Plain ils) = ils
 plainParaInlines (Para ils) = ils
@@ -199,10 +201,27 @@ blocksToDefinitions' [] acc (b:blks) =
 blocksToDefinitions' defAcc acc (b:blks) =
   blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
 
+removeListDivs' :: Block -> [Block]
+removeListDivs' (Div (ident, classes, kvs) blks)
+  | "list-item" `elem` classes =
+    case delete "list-item" classes of
+      [] -> blks
+      classes' -> [Div (ident, classes', kvs) $ blks]
+removeListDivs' (Div (ident, classes, kvs) blks)
+  | not $ null $ listParagraphDivs `intersect` classes =
+    case classes \\ listParagraphDivs of
+      [] -> blks
+      classes' -> [Div (ident, classes', kvs) blks]
+removeListDivs' blk = [blk]
+
+removeListDivs :: [Block] -> [Block]
+removeListDivs = concatMap removeListDivs'
+
+
 
 blocksToDefinitions :: [Block] -> [Block]
 blocksToDefinitions = blocksToDefinitions' [] []
 
-    
-    
-    
+
+
+
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 18200bcf9..1abd4bc6b 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE PatternGuards, ViewPatterns #-}
+
 {-
 Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
 
@@ -8,49 +10,46 @@ the Free Software Foundation; either version 2 of the License, or
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 -}
 
 {- |
-   Module      : Text.Pandoc.Readers.Docx.Parse
-   Copyright   : Copyright (C) 2014 Jesse Rosenthal
-   License     : GNU GPL, version 2 or above
+ Module : Text.Pandoc.Readers.Docx.Parse
+ Copyright : Copyright (C) 2014 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
 
-   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
-   Stability   : alpha
-   Portability : portable
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
 
 Conversion of docx archive into Docx haskell type
 -}
 
+module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
+                                      , Document(..)
+                                      , Body(..)
+                                      , BodyPart(..)
+                                      , TblLook(..)
+                                      , ParPart(..)
+                                      , Run(..)
+                                      , RunElem(..)
+                                      , Notes
+                                      , Numbering
+                                      , Relationship
+                                      , Media
+                                      , RunStyle(..)
+                                      , ParIndentation(..)
+                                      , ParagraphStyle(..)
+                                      , Row(..)
+                                      , Cell(..)
+                                      , archiveToDocx
+                                      ) where
 
-module Text.Pandoc.Readers.Docx.Parse (  Docx(..)
-                                       , Document(..)
-                                       , Body(..)
-                                       , BodyPart(..)
-                                       , TblLook(..)
-                                       , ParPart(..)
-                                       , Run(..)
-                                       , RunElem(..)
-                                       , Notes
-                                       , Numbering
-                                       , Relationship
-                                       , Media
-                                       , RunStyle(..)
-                                       , ParagraphStyle(..)
-                                       , Row(..)
-                                       , Cell(..)
-                                       , getFootNote
-                                       , getEndNote
-                                       , lookupLevel
-                                       , lookupRelationship
-                                       , archiveToDocx
-                                       ) where
 import Codec.Archive.Zip
 import Text.XML.Light
 import Data.Maybe
@@ -59,39 +58,244 @@ import System.FilePath
 import Data.Bits ((.|.))
 import qualified Data.ByteString.Lazy as B
 import qualified Text.Pandoc.UTF8 as UTF8
+import Control.Monad.Reader
+import Control.Applicative ((<$>), (<|>))
+import qualified Data.Map as M
+import Text.Pandoc.Compat.Except
+import Text.TeXMath.Readers.OMML (readOMML)
+import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
+import Text.TeXMath (Exp)
+import Data.Char (readLitChar, ord, chr)
+
+data ReaderEnv = ReaderEnv { envNotes         :: Notes
+                           , envNumbering     :: Numbering
+                           , envRelationships :: [Relationship]
+                           , envMedia         :: Media
+                           , envFont          :: Maybe Font
+                           }
+               deriving Show
+
+data DocxError = DocxError | WrongElem
+               deriving Show
+
+instance Error DocxError where
+  noMsg = WrongElem
+
+type D = ExceptT DocxError (Reader ReaderEnv)
+
+runD :: D a -> ReaderEnv -> Either DocxError a
+runD dx re = runReader (runExceptT dx ) re
+
+maybeToD :: Maybe a -> D a
+maybeToD (Just a) = return a
+maybeToD Nothing = throwError DocxError
+
+eitherToD :: Either a b -> D b
+eitherToD (Right b) = return b
+eitherToD (Left _)  = throwError DocxError
+
+concatMapM        :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+concatMapM f xs   =  liftM concat (mapM f xs)
+
+
+-- This is similar to `mapMaybe`: it maps a function returning the D
+-- monad over a list, and only keeps the non-erroring return values.
+mapD :: (a -> D b) -> [a] -> D [b]
+mapD f xs =
+  let handler x = (f x >>= (\y-> return [y])) `catchError` (\_ -> return [])
+  in
+   concatMapM handler xs
 
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
+type NameSpaces = [(String, String)]
 
+data Docx = Docx Document
+          deriving Show
 
-type NameSpaces = [(String, String)]
+data Document = Document NameSpaces Body
+          deriving Show
 
-data Docx = Docx Document Notes Numbering [Relationship] Media
+data Body = Body [BodyPart]
           deriving Show
 
-archiveToDocx :: Archive -> Maybe Docx
+type Media = [(FilePath, B.ByteString)]
+
+data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
+                 deriving Show
+
+data Numb = Numb String String           -- right now, only a key to an abstract num
+            deriving Show
+
+data AbstractNumb = AbstractNumb String [Level]
+                    deriving Show
+
+-- (ilvl, format, string, start)
+type Level = (String, String, String, Maybe Integer)
+
+data Relationship = Relationship (RelId, Target)
+                  deriving Show
+
+data Notes = Notes NameSpaces
+             (Maybe (M.Map String Element))
+             (Maybe (M.Map String Element))
+           deriving Show
+
+data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
+                                     , rightParIndent :: Maybe Integer
+                                     , hangingParIndent :: Maybe Integer}
+                      deriving Show
+
+data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+                                     , indentation :: Maybe ParIndentation
+                                     }
+                      deriving Show
+
+defaultParagraphStyle :: ParagraphStyle
+defaultParagraphStyle = ParagraphStyle { pStyle = []
+                                       , indentation = Nothing
+                                       }
+
+
+data BodyPart = Paragraph ParagraphStyle [ParPart]
+              | ListItem ParagraphStyle String String Level [ParPart]
+              | Tbl String TblGrid TblLook [Row]
+              | OMathPara [Exp]
+              deriving Show
+
+type TblGrid = [Integer]
+
+data TblLook = TblLook {firstRowFormatting::Bool}
+              deriving Show
+
+defaultTblLook :: TblLook
+defaultTblLook = TblLook{firstRowFormatting = False}
+
+data Row = Row [Cell]
+           deriving Show
+
+data Cell = Cell [BodyPart]
+            deriving Show
+
+data ParPart = PlainRun Run
+             | Insertion ChangeId Author ChangeDate [Run]
+             | Deletion ChangeId Author ChangeDate [Run]
+             | BookMark BookMarkId Anchor
+             | InternalHyperLink Anchor [Run]
+             | ExternalHyperLink URL [Run]
+             | Drawing FilePath B.ByteString
+             | PlainOMath [Exp]
+             deriving Show
+
+data Run = Run RunStyle [RunElem]
+         | Footnote [BodyPart]
+         | Endnote [BodyPart]
+         | InlineDrawing FilePath B.ByteString
+           deriving Show
+
+data RunElem = TextRun String | LnBrk | Tab
+             deriving Show
+
+data RunStyle = RunStyle { isBold :: Bool
+                         , isItalic :: Bool
+                         , isSmallCaps :: Bool
+                         , isStrike :: Bool
+                         , isSuperScript :: Bool
+                         , isSubScript :: Bool
+                         , rUnderline :: Maybe String
+                         , rStyle :: Maybe String }
+                deriving Show
+
+defaultRunStyle :: RunStyle
+defaultRunStyle = RunStyle { isBold = False
+                           , isItalic = False
+                           , isSmallCaps = False
+                           , isStrike = False
+                           , isSuperScript = False
+                           , isSubScript = False
+                           , rUnderline = Nothing
+                           , rStyle = Nothing
+                           }
+
+
+type Target = String
+type Anchor = String
+type URL = String
+type BookMarkId = String
+type RelId = String
+type ChangeId = String
+type Author = String
+type ChangeDate = String
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+archiveToDocx :: Archive -> Either DocxError Docx
 archiveToDocx archive = do
-  let notes = archiveToNotes archive
-      rels = archiveToRelationships archive
-      media = archiveToMedia archive
-  doc <- archiveToDocument archive
-  numbering <- archiveToNumbering archive
-  return $ Docx doc notes numbering rels media
-
-data Document = Document NameSpaces Body 
-          deriving Show
+  let notes     = archiveToNotes archive
+      numbering = archiveToNumbering archive
+      rels      = archiveToRelationships archive
+      media     = archiveToMedia archive
+      rEnv = ReaderEnv notes numbering rels media Nothing
+  doc <- runD (archiveToDocument archive) rEnv
+  return $ Docx doc
+
 
-archiveToDocument :: Archive -> Maybe Document
+archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
-  entry <- findEntryByPath "word/document.xml" zf
-  docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
-  let namespaces = mapMaybe attrToNSPair (elAttribs docElem) 
-  bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem
+  entry <- maybeToD $ findEntryByPath "word/document.xml" zf
+  docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+  bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
   body <- elemToBody namespaces bodyElem
   return $ Document namespaces body
 
-type Media = [(FilePath, B.ByteString)]
+elemToBody :: NameSpaces -> Element -> D Body
+elemToBody ns element | isElem ns "w" "body" element =
+  mapD (elemToBodyPart ns) (elChildren element) >>=
+  (\bps -> return $ Body bps)
+elemToBody _ _ = throwError WrongElem
+
+archiveToNotes :: Archive -> Notes
+archiveToNotes zf =
+  let fnElem = findEntryByPath "word/footnotes.xml" zf
+               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+      enElem = findEntryByPath "word/endnotes.xml" zf
+               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+      fn_namespaces = case fnElem of
+        Just e -> mapMaybe attrToNSPair (elAttribs e)
+        Nothing -> []
+      en_namespaces = case enElem of
+        Just e -> mapMaybe attrToNSPair (elAttribs e)
+        Nothing -> []
+      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+      fn = fnElem >>= (elemToNotes ns "footnote")
+      en = enElem >>= (elemToNotes ns "endnote")
+  in
+   Notes ns fn en
+
+filePathIsRel :: FilePath -> Bool
+filePathIsRel fp =
+  let (dir, name) = splitFileName fp
+  in
+   (dir == "word/_rels/") && ((takeExtension name) == ".rels")
+
+relElemToRelationship :: Element -> Maybe Relationship
+relElemToRelationship element | qName (elName element) == "Relationship" =
+  do
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
+    return $ Relationship (relId, target)
+relElemToRelationship _ = Nothing
+
+
+archiveToRelationships :: Archive -> [Relationship]
+archiveToRelationships archive =
+  let relPaths = filter filePathIsRel (filesInArchive archive)
+      entries  = mapMaybe (\f -> findEntryByPath f archive) relPaths
+      relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
+      rels =     mapMaybe relElemToRelationship $ concatMap elChildren relElems
+  in
+   rels
 
 filePathIsMedia :: FilePath -> Bool
 filePathIsMedia fp =
@@ -109,18 +313,6 @@ archiveToMedia :: Archive -> Media
 archiveToMedia zf =
   mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf))
 
-data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
-                 deriving Show
-
-data Numb = Numb String String           -- right now, only a key to an abstract num
-            deriving Show
-
-data AbstractNumb = AbstractNumb String [Level]
-                    deriving Show
-
--- (ilvl, format, string, start)
-type Level = (String, String, String, Maybe Integer)
-
 lookupLevel :: String -> String -> Numbering -> Maybe Level
 lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
   absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
@@ -148,7 +340,7 @@ absNumElemToAbsNum ns element |
     let levelElems = findChildren
                  (QName "lvl" (lookup "w" ns) (Just "w"))
                  element
-        levels = mapMaybe id $ map (levelElemToLevel ns) levelElems
+        levels = mapMaybe (levelElemToLevel ns) levelElems
     return $ AbstractNumb absNumId levels
 absNumElemToAbsNum _ _ = Nothing
 
@@ -167,8 +359,8 @@ levelElemToLevel ns element |
       return (ilvl, fmt, txt, start)
 levelElemToLevel _ _ = Nothing
 
-archiveToNumbering :: Archive -> Maybe Numbering
-archiveToNumbering zf =
+archiveToNumbering' :: Archive -> Maybe Numbering
+archiveToNumbering' zf = do
   case findEntryByPath "word/numbering.xml" zf of
     Nothing -> Just $ Numbering [] [] []
     Just entry -> do
@@ -180,321 +372,281 @@ archiveToNumbering zf =
           absNumElems = findChildren
                         (QName "abstractNum" (lookup "w" namespaces) (Just "w"))
                         numberingElem
-          nums = mapMaybe id $ map (numElemToNum namespaces) numElems
-          absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems
+          nums = mapMaybe (numElemToNum namespaces) numElems
+          absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems
       return $ Numbering namespaces nums absNums
 
-data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])])
-           deriving Show
-
-noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart])
-noteElemToNote ns element
-  | qName (elName element) `elem` ["endnote", "footnote"] &&
-    qURI (elName element) == (lookup "w" ns) =
-      do
-        noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
-        let bps = map fromJust
-                  $ filter isJust
-                  $ map (elemToBodyPart ns)
-                  $ filterChildrenName (isParOrTbl ns) element
-        return $ (noteId, bps)
-noteElemToNote _ _ = Nothing
-
-getFootNote :: String -> Notes -> Maybe [BodyPart]
-getFootNote s (Notes _ fns _) = fns >>= (lookup s)
-
-getEndNote :: String -> Notes -> Maybe [BodyPart]
-getEndNote s (Notes _ _ ens) = ens >>= (lookup s)
+archiveToNumbering :: Archive -> Numbering
+archiveToNumbering archive =
+  fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
 
-elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])]
+elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
 elemToNotes ns notetype element
-  | qName (elName element) == (notetype ++ "s") &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ map fromJust
-      $ filter isJust
-      $ map (noteElemToNote ns)
-      $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element
+  | isElem ns "w" (notetype ++ "s") element =
+      let pairs = mapMaybe
+                  (\e -> findAttr (elemName ns "w" "id") e >>=
+                         (\a -> Just (a, e)))
+                  (findChildren (elemName ns "w" notetype) element)
+      in
+       Just $ M.fromList $ pairs
 elemToNotes _ _ _ = Nothing
 
-archiveToNotes :: Archive -> Notes
-archiveToNotes zf =
-  let fnElem = findEntryByPath "word/footnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-      enElem = findEntryByPath "word/endnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-      fn_namespaces = case fnElem of
-        Just e -> mapMaybe attrToNSPair (elAttribs e)
-        Nothing -> []
-      en_namespaces = case enElem of
-        Just e -> mapMaybe attrToNSPair (elAttribs e)
-        Nothing -> []
-      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
-      fn = fnElem >>= (elemToNotes ns "footnote")
-      en = enElem >>= (elemToNotes ns "endnote")
-  in
-   Notes ns fn en
+---------------------------------------------
+---------------------------------------------
 
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
 
-data Relationship = Relationship (RelId, Target)
-                  deriving Show
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+  qName (elName element) == name &&
+  qURI (elName element) == (lookup prefix ns)
 
-lookupRelationship :: RelId -> [Relationship] -> Maybe Target
-lookupRelationship relid rels =
-  lookup relid (map (\(Relationship pair) -> pair) rels)
 
-filePathIsRel :: FilePath -> Bool
-filePathIsRel fp =
-  let (dir, name) = splitFileName fp
+elemToTblGrid :: NameSpaces -> Element -> D TblGrid
+elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
+  let cols = findChildren (elemName ns "w" "gridCol") element
   in
-   (dir == "word/_rels/") && ((takeExtension name) == ".rels")
-
-relElemToRelationship :: Element -> Maybe Relationship
-relElemToRelationship element | qName (elName element) == "Relationship" =
-  do
-    relId <- findAttr (QName "Id" Nothing Nothing) element
-    target <- findAttr (QName "Target" Nothing Nothing) element
-    return $ Relationship (relId, target)
-relElemToRelationship _ = Nothing
-  
-
-archiveToRelationships :: Archive -> [Relationship]
-archiveToRelationships archive = 
-  let relPaths = filter filePathIsRel (filesInArchive archive)
-      entries  = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths
-      relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
-      rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems
+   mapD (\e -> maybeToD (findAttr (elemName ns "w" "val") e >>= stringToInteger))
+   cols
+elemToTblGrid _ _ = throwError WrongElem
+
+elemToTblLook :: NameSpaces -> Element -> D TblLook
+elemToTblLook ns element | isElem ns "w" "tblLook" element =
+  let firstRow = findAttr (elemName ns "w" "firstRow") element
+      val = findAttr (elemName ns "w" "val") element
+      firstRowFmt =
+        case firstRow of
+          Just "1" -> True
+          Just  _  -> False
+          Nothing -> case val of
+            Just bitMask -> testBitMask bitMask 0x020
+            Nothing      -> False
   in
-   rels
-   
-data Body = Body [BodyPart]
-          deriving Show
+   return $ TblLook{firstRowFormatting = firstRowFmt}
+elemToTblLook _ _ = throwError WrongElem
 
-isParOrTbl :: NameSpaces -> QName -> Bool
-isParOrTbl ns q = qName q `elem` ["p", "tbl"] &&
-                  qURI q == (lookup "w" ns)
+elemToRow :: NameSpaces -> Element -> D Row
+elemToRow ns element | isElem ns "w" "tr" element =
+  do
+    let cellElems = findChildren (elemName ns "w" "tc") element
+    cells <- mapD (elemToCell ns) cellElems
+    return $ Row cells
+elemToRow _ _ = throwError WrongElem
 
-elemToBody :: NameSpaces -> Element ->  Maybe Body
-elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) =
-  Just $ Body
-  $ map fromJust
-  $ filter isJust
-  $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element
-elemToBody _ _ = Nothing
+elemToCell :: NameSpaces -> Element -> D Cell
+elemToCell ns element | isElem ns "w" "tc" element =
+  do
+    cellContents <- mapD (elemToBodyPart ns) (elChildren element)
+    return $ Cell cellContents
+elemToCell _ _ = throwError WrongElem
+
+elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
+elemToParIndentation ns element | isElem ns "w" "ind" element =
+  Just $ ParIndentation {
+    leftParIndent =
+       findAttr (QName "left" (lookup "w" ns) (Just "w")) element >>=
+       stringToInteger
+    , rightParIndent =
+      findAttr (QName "right" (lookup "w" ns) (Just "w")) element >>=
+      stringToInteger
+    , hangingParIndent =
+      findAttr (QName "hanging" (lookup "w" ns) (Just "w")) element >>=
+      stringToInteger}
+elemToParIndentation _ _ = Nothing
 
-isRunOrLinkOrBookmark :: NameSpaces -> QName ->  Bool
-isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] &&
-                   qURI q == (lookup "w" ns)
 
 elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-elemToNumInfo ns element
-  | qName (elName element) == "p" &&
-    qURI (elName element) == (lookup "w" ns) =
-      do
-        pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element
-        numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr
-        lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>=
-                findAttr (QName "val" (lookup "w" ns) (Just "w"))
-        numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>=
-                 findAttr (QName "val" (lookup "w" ns) (Just "w"))
-        return (numId, lvl)
+elemToNumInfo ns element  | isElem ns "w" "p" element = do
+  let pPr = findChild (elemName ns "w" "pPr") element
+      numPr = pPr >>= findChild (elemName ns "w" "numPr")
+  lvl <- numPr >>=
+         findChild (elemName ns "w" "ilvl") >>=
+         findAttr (elemName ns "w" "val")
+  numId <- numPr >>=
+           findChild (elemName ns "w" "numId") >>=
+           findAttr (elemName ns "w" "val")
+  return (numId, lvl)
 elemToNumInfo _ _ = Nothing
 
--- isBookMarkTag :: NameSpaces -> QName -> Bool
--- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] &&
---                      qURI q == (lookup "w" ns)
-
--- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark
--- parChildrenToBookmark ns (bms : bme : _)
---   | qName (elName bms) == "bookmarkStart" &&
---     qURI (elName bms) == (lookup "w" ns) &&
---     qName (elName bme) == "bookmarkEnd" &&
---     qURI (elName bme) == (lookup "w" ns) = do
---       bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms
---       bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms
---       return $ (bmId, bmName)
--- parChildrenToBookmark _ _ = Nothing
-
-elemToBodyPart :: NameSpaces -> Element ->  Maybe BodyPart
-elemToBodyPart ns element
-  | qName (elName element) == "p" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let parstyle = elemToParagraphStyle ns element
-          parparts = mapMaybe id
-                     $ map (elemToParPart ns)
-                     $ filterChildrenName (isRunOrLinkOrBookmark ns) element
-      in
-       case elemToNumInfo ns element of
-         Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts
-         Nothing -> Just $ Paragraph parstyle parparts
-  | qName (elName element) == "tbl" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let
-        caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
-                  >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w"))
-                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
-        grid = case
-          findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element
-          of
-            Just g -> elemToTblGrid ns g
-            Nothing -> []
-        tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
-                  >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w"))
-                  >>= elemToTblLook ns
-      in
-       Just $ Tbl
-       (fromMaybe "" caption)
-       grid
-       (fromMaybe defaultTblLook tblLook)
-       (mapMaybe (elemToRow ns) (elChildren element))
-  | otherwise = Nothing
-
-elemToTblLook :: NameSpaces -> Element -> Maybe TblLook
-elemToTblLook ns element
-  | qName (elName element) == "tblLook" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element
-          val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element
-          firstRowFmt = 
-            case firstRow of
-              Just "1" -> True
-              Just  _  -> False
-              Nothing -> case val of
-                Just bitMask -> testBitMask bitMask 0x020
-                Nothing      -> False
-      in
-       Just $ TblLook{firstRowFormatting = firstRowFmt}
-elemToTblLook _ _ = Nothing
-
 testBitMask :: String -> Int -> Bool
 testBitMask bitMaskS n =
   case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
     []            -> False
     ((n', _) : _) -> ((n' .|. n) /= 0)
 
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
-                                     , indent :: Maybe Integer
-                                     }
-                      deriving Show
-
-defaultParagraphStyle :: ParagraphStyle
-defaultParagraphStyle = ParagraphStyle { pStyle = []
-                                       , indent = Nothing
-                                       }
-
-elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
-elemToParagraphStyle ns element =
-  case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of
-    Just pPr ->
-      ParagraphStyle
-      {pStyle =
-          mapMaybe id $
-          map
-          (findAttr (QName "val" (lookup "w" ns) (Just "w")))
-          (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr)
-      , indent =
-        findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>=
-        findAttr (QName "left" (lookup "w" ns) (Just "w")) >>=
-        stringToInteger
-        }
-    Nothing -> defaultParagraphStyle
-
-
-data BodyPart = Paragraph ParagraphStyle [ParPart]
-              | ListItem ParagraphStyle String String [ParPart]
-              | Tbl String TblGrid TblLook [Row]
-
-              deriving Show
-
-type TblGrid = [Integer]
-
-data TblLook = TblLook {firstRowFormatting::Bool}
-              deriving Show
-
-defaultTblLook :: TblLook
-defaultTblLook = TblLook{firstRowFormatting = False}
-
 stringToInteger :: String -> Maybe Integer
 stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
 
-elemToTblGrid :: NameSpaces -> Element -> TblGrid
-elemToTblGrid ns element
-  | qName (elName element) == "tblGrid" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let
-        cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element
-      in
-       mapMaybe (\e ->
-                  findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e
-                  >>= stringToInteger
-                )
-       cols
-elemToTblGrid _ _ = []
-
-data Row = Row [Cell]
-           deriving Show
-
-
-elemToRow :: NameSpaces -> Element -> Maybe Row
-elemToRow ns element
-  | qName (elName element) == "tr" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let 
-        cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element
-      in
-       Just $ Row (mapMaybe (elemToCell ns) cells)
-elemToRow _ _ = Nothing
+elemToBodyPart :: NameSpaces -> Element -> D BodyPart
+elemToBodyPart ns element
+  | isElem ns "w" "p" element
+  , (c:_) <- findChildren (elemName ns "m" "oMathPara") element =
+      do
+        expsLst <- eitherToD $ readOMML $ showElement c
+        return $ OMathPara expsLst
+elemToBodyPart ns element
+  | isElem ns "w" "p" element
+  , Just (numId, lvl) <- elemToNumInfo ns element = do
+    let parstyle = elemToParagraphStyle ns element
+    parparts <- mapD (elemToParPart ns) (elChildren element)
+    num <- asks envNumbering
+    case lookupLevel numId lvl num of
+      Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
+      Nothing         -> throwError WrongElem
+elemToBodyPart ns element
+  | isElem ns "w" "p" element = do
+    let parstyle = elemToParagraphStyle ns element
+    parparts <- mapD (elemToParPart ns) (elChildren element)
+    return $ Paragraph parstyle parparts
+elemToBodyPart ns element
+  | isElem ns "w" "tbl" element = do
+    let caption' = findChild (elemName ns "w" "tblPr") element
+                   >>= findChild (elemName ns "w" "tblCaption")
+                   >>= findAttr (elemName ns "w" "val")
+        caption = (fromMaybe "" caption')
+        grid' = case findChild (elemName ns "w" "tblGrid") element of
+          Just g -> elemToTblGrid ns g
+          Nothing -> return []
+        tblLook' = case findChild (elemName ns "w" "tblPr") element >>=
+                          findChild (elemName ns "w" "tblLook")
+                     of
+                       Just l -> elemToTblLook ns l
+                       Nothing -> return defaultTblLook
+
+    grid <- grid'
+    tblLook <- tblLook'
+    rows <- mapD (elemToRow ns) (elChildren element)
+    return $ Tbl caption grid tblLook rows
+elemToBodyPart _ _ = throwError WrongElem
 
-data Cell = Cell [BodyPart]
-            deriving Show
+lookupRelationship :: RelId -> [Relationship] -> Maybe Target
+lookupRelationship relid rels =
+  lookup relid (map (\(Relationship pair) -> pair) rels)
 
-elemToCell :: NameSpaces -> Element -> Maybe Cell
-elemToCell ns element
-  | qName (elName element) == "tc" &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element))
-elemToCell _ _ = Nothing
+expandDrawingId :: String -> D (FilePath, B.ByteString)
+expandDrawingId s = do
+  target <- asks (lookupRelationship s . envRelationships)
+  case target of
+    Just filepath -> do
+      bytes <- asks (lookup (combine "word" filepath) . envMedia)
+      case bytes of
+        Just bs -> return (filepath, bs)
+        Nothing -> throwError DocxError
+    Nothing -> throwError DocxError
+
+elemToParPart :: NameSpaces -> Element -> D ParPart
+elemToParPart ns element
+  | isElem ns "w" "r" element
+  , Just _ <- findChild (elemName ns "w" "drawing") element =
+    let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+        drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
+                  >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+    in
+     case drawing of
+       Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs)
+       Nothing -> throwError WrongElem
+elemToParPart ns element
+  | isElem ns "w" "r" element =
+    elemToRun ns element >>= (\r -> return $ PlainRun r)
+elemToParPart ns element
+  | isElem ns "w" "ins" element
+  , Just cId <- findAttr (elemName ns "w" "id") element
+  , Just cAuthor <- findAttr (elemName ns "w" "author") element
+  , Just cDate <- findAttr (elemName ns "w" "date") element = do
+    runs <- mapD (elemToRun ns) (elChildren element)
+    return $ Insertion cId cAuthor cDate runs
+elemToParPart ns element
+  | isElem ns "w" "del" element
+  , Just cId <- findAttr (elemName ns "w" "id") element
+  , Just cAuthor <- findAttr (elemName ns "w" "author") element
+  , Just cDate <- findAttr (elemName ns "w" "date") element = do
+    runs <- mapD (elemToRun ns) (elChildren element)
+    return $ Deletion cId cAuthor cDate runs
+elemToParPart ns element
+  | isElem ns "w" "bookmarkStart" element
+  , Just bmId <- findAttr (elemName ns "w" "id") element
+  , Just bmName <- findAttr (elemName ns "w" "name") element =
+    return $ BookMark bmId bmName
+elemToParPart ns element
+  | isElem ns "w" "hyperlink" element
+  , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
+    runs <- mapD (elemToRun ns) (elChildren element)
+    return $ InternalHyperLink anchor runs
+elemToParPart ns element
+  | isElem ns "w" "hyperlink" element
+  , Just relId <- findAttr (elemName ns "r" "id") element = do
+    runs <- mapD (elemToRun ns) (elChildren element)
+    rels <- asks envRelationships
+    return $ case lookupRelationship relId rels of
+      Just target -> ExternalHyperLink target runs
+      Nothing     -> ExternalHyperLink "" runs
+elemToParPart ns element
+  | isElem ns "m" "oMath" element =
+    (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
+elemToParPart _ _ = throwError WrongElem
 
-data ParPart = PlainRun Run
-             | BookMark BookMarkId Anchor
-             | InternalHyperLink Anchor [Run]
-             | ExternalHyperLink RelId [Run]
-             | Drawing String
-             deriving Show
+lookupFootnote :: String -> Notes -> Maybe Element
+lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
 
-data Run = Run RunStyle [RunElem]
-         | Footnote String 
-         | Endnote String
-           deriving Show
+lookupEndnote :: String -> Notes -> Maybe Element
+lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s)
 
-data RunElem = TextRun String | LnBrk | Tab
-             deriving Show
+elemToRun :: NameSpaces -> Element -> D Run
+elemToRun ns element
+  | isElem ns "w" "r" element
+  , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
+    let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+        drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem
+                  >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+    in
+     case drawing of
+       Just s -> expandDrawingId s >>=
+                 (\(fp, bs) -> return $ InlineDrawing fp bs)
+       Nothing -> throwError WrongElem
+elemToRun ns element
+  | isElem ns "w" "r" element
+  , Just ref <- findChild (elemName ns "w" "footnoteReference") element
+  , Just fnId <- findAttr (elemName ns "w" "id") ref = do
+    notes <- asks envNotes
+    case lookupFootnote fnId notes of
+      Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+                   return $ Footnote bps
+      Nothing  -> return $ Footnote []
+elemToRun ns element
+  | isElem ns "w" "r" element
+  , Just ref <- findChild (elemName ns "w" "endnoteReference") element
+  , Just enId <- findAttr (elemName ns "w" "id") ref = do
+    notes <- asks envNotes
+    case lookupEndnote enId notes of
+      Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+                   return $ Endnote bps
+      Nothing  -> return $ Endnote []
+elemToRun ns element
+  | isElem ns "w" "r" element = do
+    runElems <- elemToRunElems ns element
+    return $ Run (elemToRunStyle ns element) runElems
+elemToRun _ _ = throwError WrongElem
 
-data RunStyle = RunStyle { isBold :: Bool
-                         , isItalic :: Bool
-                         , isSmallCaps :: Bool
-                         , isStrike :: Bool
-                         , isSuperScript :: Bool
-                         , isSubScript :: Bool
-                         , underline :: Maybe String
-                         , rStyle :: Maybe String }
-                deriving Show
+elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
+elemToParagraphStyle ns element
+  | Just pPr <- findChild (elemName ns "w" "pPr") element =
+    ParagraphStyle
+      {pStyle =
+          mapMaybe
+          (findAttr (elemName ns "w" "val"))
+          (findChildren (elemName ns "w" "pStyle") pPr)
+      , indentation =
+            findChild (elemName ns "w" "ind") pPr >>=
+            elemToParIndentation ns
+      }
+elemToParagraphStyle _ _ =  defaultParagraphStyle
 
-defaultRunStyle :: RunStyle
-defaultRunStyle = RunStyle { isBold = False
-                           , isItalic = False
-                           , isSmallCaps = False
-                           , isStrike = False
-                           , isSuperScript = False
-                           , isSubScript = False
-                           , underline = Nothing
-                           , rStyle = Nothing
-                           } 
 
 elemToRunStyle :: NameSpaces -> Element -> RunStyle
-elemToRunStyle ns element =
-  case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of
-    Just rPr ->
-      RunStyle
+elemToRunStyle ns element
+  | Just rPr <- findChild (elemName ns "w" "rPr") element =
+    RunStyle
       {
         isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr
       , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr
@@ -508,100 +660,65 @@ elemToRunStyle ns element =
         (Just "subscript" ==
         (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
          findAttr (QName "val" (lookup "w" ns) (Just "w"))))
-      , underline =
+      , rUnderline =
         findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>=
         findAttr (QName "val" (lookup "w" ns) (Just "w"))
       , rStyle =
         findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>=
         findAttr (QName "val" (lookup "w" ns) (Just "w"))
         }
-    Nothing -> defaultRunStyle
+elemToRunStyle _ _ = defaultRunStyle
 
-elemToRun :: NameSpaces -> Element -> Maybe Run
-elemToRun ns element
-  | qName (elName element) == "r" &&
-    qURI (elName element) == (lookup "w" ns) =
-      case
-        findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>=
-        findAttr (QName "id" (lookup "w" ns) (Just "w"))
-      of
-        Just s -> Just $ Footnote s
-        Nothing ->
-          case
-            findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>=
-            findAttr (QName "id" (lookup "w" ns) (Just "w"))
-          of
-            Just s -> Just $ Endnote s 
-            Nothing ->  Just $
-                        Run (elemToRunStyle ns element)
-                        (elemToRunElems ns element)
-elemToRun _ _ = Nothing
-
-elemToRunElem :: NameSpaces -> Element -> Maybe RunElem
+elemToRunElem :: NameSpaces -> Element -> D RunElem
 elemToRunElem ns element
-  | qName (elName element) == "t" &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ TextRun (strContent element)
-  | qName (elName element) == "br" &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ LnBrk
-  | qName (elName element) == "tab" &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ Tab
-  | otherwise = Nothing
-
-
-elemToRunElems :: NameSpaces -> Element -> [RunElem]
+  | isElem ns "w" "t" element
+    || isElem ns "w" "delText" element
+    || isElem ns "m" "t" element = do
+    let str = strContent element
+    font <- asks envFont
+    case font of
+      Nothing -> return $ TextRun str
+      Just f  -> return . TextRun $
+                  map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
+  | isElem ns "w" "br" element = return LnBrk
+  | isElem ns "w" "tab" element = return Tab
+  | isElem ns "w" "sym" element = return (getSymChar ns element)
+  | otherwise = throwError WrongElem
+  where
+    lowerFromPrivate (ord -> c)
+      | c >= ord '\xF000' = chr $ c - ord '\xF000'
+      | otherwise = chr c
+
+-- The char attribute is a hex string
+getSymChar :: NameSpaces -> Element -> RunElem
+getSymChar ns element
+  | Just s <- lowerFromPrivate <$> getCodepoint
+  , Just font <- getFont =
+  let [(char, _)] = readLitChar ("\\x" ++ s) in
+    TextRun . maybe "" (:[]) $ getUnicode font char
+  where
+    getCodepoint = findAttr (elemName ns "w" "char") element
+    getFont = stringToFont =<< findAttr (elemName ns "w" "font") element
+    lowerFromPrivate ('F':xs) = '0':xs
+    lowerFromPrivate xs = xs
+getSymChar _ _ = TextRun ""
+
+stringToFont :: String -> Maybe Font
+stringToFont "Symbol" = Just Symbol
+stringToFont _ = Nothing
+
+elemToRunElems :: NameSpaces -> Element -> D [RunElem]
 elemToRunElems ns element
-  | qName (elName element) == "r" &&
-    qURI (elName element) == (lookup "w" ns) =
-      mapMaybe (elemToRunElem ns) (elChildren element)
-  | otherwise = []
-
-elemToDrawing :: NameSpaces -> Element -> Maybe ParPart
-elemToDrawing ns element
-  | qName (elName element) == "drawing" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
-      in
-       findElement (QName "blip" (Just a_ns) (Just "a")) element
-       >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
-       >>= (\s -> Just $ Drawing s)
-elemToDrawing _ _ = Nothing
-
-
-elemToParPart :: NameSpaces -> Element -> Maybe ParPart
-elemToParPart ns element
-  | qName (elName element) == "r" &&
-    qURI (elName element) == (lookup "w" ns) =
-      case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of
-        Just drawingElem -> elemToDrawing ns drawingElem
-        Nothing -> do
-          r <- elemToRun ns element
-          return $ PlainRun r
-elemToParPart ns element
-  | qName (elName element) == "bookmarkStart" &&
-    qURI (elName element) == (lookup "w" ns) = do
-      bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
-      bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element
-      return $ BookMark bmId bmName
-elemToParPart ns element
-  | qName (elName element) == "hyperlink" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let runs = map fromJust $ filter isJust $ map (elemToRun ns)
-                 $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element
-      in
-       case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of
-         Just anchor ->
-          Just $ InternalHyperLink anchor runs
-         Nothing ->
-           case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of
-             Just relId -> Just $ ExternalHyperLink relId runs
-             Nothing    -> Nothing
-elemToParPart _ _ = Nothing
-
-type Target = String
-type Anchor = String
-type BookMarkId = String
-type RelId = String
-               
+  |  isElem ns "w" "r" element
+     || isElem ns "m" "r" element = do
+       let qualName = elemName ns "w"
+       let font = do
+                    fontElem <- findElement (qualName "rFonts") element
+                    stringToFont =<<
+                      (foldr (<|>) Nothing $
+                        map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
+       local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
+elemToRunElems _ _ = throwError WrongElem
+
+setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
+setFont f s = s{envFont = f}
diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
new file mode 100644
index 000000000..2dbef4131
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -0,0 +1,183 @@
+{-# LANGUAGE OverloadedStrings, PatternGuards #-}
+
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Docx.Reducible
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Typeclass for combining adjacent blocks and inlines correctly.
+-}
+
+
+module Text.Pandoc.Readers.Docx.Reducible ((<++>),
+                                           (<+++>),
+                                           Reducible,
+                                           Container(..),
+                                           container,
+                                           innards,
+                                           reduceList,
+                                           reduceListB,
+                                           rebuild)
+       where
+
+import Text.Pandoc.Builder
+import Data.List ((\\), intersect)
+
+data Container a = Container ([a] -> a) | NullContainer
+
+instance (Eq a) => Eq (Container a) where
+  (Container x) == (Container y) = ((x []) == (y []))
+  NullContainer == NullContainer = True
+  _ == _ = False
+
+instance (Show a) => Show (Container a) where
+  show (Container x) = "Container {" ++
+                       (reverse $ drop 3 $ reverse $ show $ x []) ++
+                       "}"
+  show (NullContainer) = "NullContainer"
+
+class Reducible a where
+  (<++>) :: a -> a -> [a]
+  container :: a -> Container a
+  innards :: a -> [a]
+  isSpace :: a -> Bool
+
+(<+++>) :: (Reducible a) => Many a -> Many a -> Many a
+mr <+++> ms = fromList $ reduceList $ toList mr ++ toList ms
+
+reduceListB :: (Reducible a) => Many a -> Many a
+reduceListB = fromList . reduceList . toList
+
+reduceList' :: (Reducible a) => [a] -> [a] -> [a]
+reduceList' acc [] = acc
+reduceList' [] (x:xs) = reduceList' [x] xs
+reduceList' as (x:xs) = reduceList' (init as ++ (last as <++> x) ) xs
+
+reduceList :: (Reducible a) => [a] -> [a]
+reduceList = reduceList' []
+
+combineReducibles :: (Reducible a, Eq a) => a -> a -> [a]
+combineReducibles r s =
+  let (conts, rs) = topLevelContainers r
+      (conts', ss) = topLevelContainers s
+      shared = conts `intersect` conts'
+      remaining = conts \\ shared
+      remaining' = conts' \\ shared
+  in
+   case null shared of
+     True | (x : xs) <- reverse rs
+          , isSpace x ->
+             rebuild conts (reverse xs) ++ [x, s]
+          | (x : xs) <- ss
+          , isSpace x ->
+             [r, x] ++ rebuild conts' (xs)
+     True  -> [r,s]
+     False -> rebuild
+              shared $
+              reduceList $
+              (rebuild remaining rs) ++ (rebuild remaining' ss)
+
+instance Reducible Inline where
+  s1@(Span (id1, classes1, kvs1) ils1) <++> s2@(Span (id2, classes2, kvs2) ils2) =
+    let classes'  = classes1 `intersect` classes2
+        kvs'      = kvs1 `intersect` kvs2
+        classes1' = classes1 \\ classes'
+        kvs1'     = kvs1 \\ kvs'
+        classes2' = classes2 \\ classes'
+        kvs2'     = kvs2 \\ kvs'
+    in
+     case null classes' && null kvs' of
+       True -> [s1,s2]
+       False -> let attr'  = ("", classes', kvs')
+                    attr1' = (id1, classes1', kvs1')
+                    attr2' = (id2, classes2', kvs2')
+                    s1' = case null classes1' && null kvs1' of
+                      True -> ils1
+                      False -> [Span attr1' ils1]
+                    s2' = case null classes2' && null kvs2' of
+                      True -> ils2
+                      False -> [Span attr2' ils2]
+                in
+                 [Span attr' $ reduceList $ s1' ++ s2']
+
+  (Str x) <++> (Str y) = [Str (x++y)]
+  il <++> il' = combineReducibles il il'
+
+  container (Emph _) = Container Emph
+  container (Strong _) = Container Strong
+  container (Strikeout _) = Container Strikeout
+  container (Subscript _) = Container Subscript
+  container (Superscript _) = Container Superscript
+  container (Quoted qt _) = Container $ Quoted qt
+  container (Cite cs _) = Container $ Cite cs
+  container (Span attr _) = Container $ Span attr
+  container _ = NullContainer
+
+  innards (Emph ils) = ils
+  innards (Strong ils) = ils
+  innards (Strikeout ils) = ils
+  innards (Subscript ils) = ils
+  innards (Superscript ils) = ils
+  innards (Quoted _ ils) = ils
+  innards (Cite _ ils) = ils
+  innards (Span _ ils) = ils
+  innards _ = []
+
+  isSpace Space = True
+  isSpace _     = False
+
+instance Reducible Block where
+  (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes =
+    [Div (ident, classes, kvs) (reduceList blks), blk]
+
+  blk <++> blk' = combineReducibles blk blk'
+
+  container (BlockQuote _) = Container BlockQuote
+  container (Div attr _) = Container $ Div attr
+  container _            = NullContainer
+
+  innards (BlockQuote bs) = bs
+  innards (Div _ bs) = bs
+  innards _          = []
+
+  isSpace _          = False
+
+
+topLevelContainers' :: (Reducible a) => [a] -> ([Container a], [a])
+topLevelContainers' (r : []) = case container r of
+  NullContainer -> ([], [r])
+  _             ->
+    let (conts, inns) = topLevelContainers' (innards r)
+    in
+    ((container r) : conts, inns)
+topLevelContainers' rs = ([], rs)
+
+topLevelContainers :: (Reducible a) => a -> ([Container a], [a])
+topLevelContainers il = topLevelContainers' [il]
+
+rebuild :: [Container a] -> [a] -> [a]
+rebuild [] xs = xs
+rebuild ((Container f) : cs) xs = rebuild cs $ [f xs]
+rebuild (NullContainer : cs) xs = rebuild cs $ xs
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
new file mode 100644
index 000000000..f900c0adc
--- /dev/null
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -0,0 +1,285 @@
+{-# LANGUAGE
+   ViewPatterns
+ , StandaloneDeriving
+ , TupleSections
+ , FlexibleContexts  #-}
+
+module Text.Pandoc.Readers.EPUB
+  (readEPUB)
+  where
+
+import Text.XML.Light
+import Text.Pandoc.Definition hiding (Attr)
+import Text.Pandoc.Walk (walk, query)
+import Text.Pandoc.Generic(bottomUp)
+import Text.Pandoc.Readers.HTML (readHtml)
+import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
+import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
+import Text.Pandoc.MediaBag (MediaBag, insertMedia)
+import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
+import qualified Text.Pandoc.Builder as B
+import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry
+                         , findEntryByPath, Entry)
+import qualified Data.ByteString.Lazy as BL (ByteString)
+import System.FilePath ( takeFileName, (</>), dropFileName, normalise
+                       , dropFileName
+                       , splitFileName )
+import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import Control.Applicative ((<$>))
+import Control.Monad (guard, liftM, when)
+import Data.Monoid (mempty, (<>))
+import Data.List (isPrefixOf, isInfixOf)
+import Data.Maybe (mapMaybe, fromMaybe)
+import qualified Data.Map as M (Map, lookup, fromList, elems)
+import Control.DeepSeq.Generics (deepseq, NFData)
+
+import Debug.Trace (trace)
+
+type MIME = String
+
+type Items = M.Map String (FilePath, MIME)
+
+readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)
+readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes)
+
+runEPUB :: Except String a -> a
+runEPUB = either error id . runExcept
+
+-- Note that internal reference are aggresively normalised so that all ids
+-- are of the form "filename#id"
+--
+archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
+archiveToEPUB os archive = do
+  -- root is path to folder with manifest file in
+  (root, content) <- getManifest archive
+  meta  <- parseMeta content
+  (cover, items) <- parseManifest content
+  -- No need to collapse here as the image path is from the manifest file
+  let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
+  spine <- parseSpine items content
+  let escapedSpine = map (escapeURI . takeFileName . fst) spine
+  Pandoc _ bs <-
+      foldM' (\a b -> ((a <>) . bottomUp (prependHash escapedSpine))
+        `liftM` parseSpineElem root b) mempty spine
+  let ast = coverDoc <> (Pandoc meta bs)
+  let mediaBag = fetchImages (M.elems items) root archive ast
+  return $ (ast, mediaBag)
+  where
+    os' = os {readerParseRaw = True}
+    parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc
+    parseSpineElem (normalise -> r) (normalise -> path, mime) = do
+      when (readerTrace os) (traceM path)
+      doc <- mimeToReader mime r path
+      let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
+      return $ docSpan <> doc
+    mimeToReader :: MonadError String m => MIME -> FilePath -> FilePath ->  m Pandoc
+    mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do
+      fname <- findEntryByPathE (root </> path) archive
+      return $ fixInternalReferences path .
+                readHtml os' .
+                  UTF8.toStringLazy $
+                    fromEntry fname
+    mimeToReader s _ path
+      | s `elem` imageMimes = return $ imageToPandoc path
+      | otherwise = return $ mempty
+
+-- paths should be absolute when this function is called
+-- renameImages should do this
+fetchImages :: [(FilePath, MIME)]
+            -> FilePath -- ^ Root
+            -> Archive
+            -> Pandoc
+            -> MediaBag
+fetchImages mimes root arc (query iq -> links) =
+    foldr (uncurry3 insertMedia) mempty
+      (mapMaybe getEntry links)
+  where
+    getEntry link =
+        let abslink = root </> link in
+        (link , lookup link mimes, ) . fromEntry
+          <$> findEntryByPath abslink arc
+
+iq :: Inline -> [FilePath]
+iq (Image _ (url, _)) = [url]
+iq _ = []
+
+-- Remove relative paths
+renameImages :: FilePath -> Inline -> Inline
+renameImages root (Image a (url, b)) = Image a (collapseFilePath (root </> url), b)
+renameImages _ x = x
+
+imageToPandoc :: FilePath -> Pandoc
+imageToPandoc s = B.doc . B.para $ B.image s "" mempty
+
+imageMimes :: [String]
+imageMimes = ["image/gif", "image/jpeg", "image/png"]
+
+type CoverImage = FilePath
+
+parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items)
+parseManifest content = do
+  manifest <- findElementE (dfName "manifest") content
+  let items = findChildren (dfName "item") manifest
+  r <- mapM parseItem items
+  let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
+  return (cover, (M.fromList r))
+  where
+    findCover e = maybe False (isInfixOf "cover-image")
+                  (findAttr (emptyName "properties") e)
+    parseItem e = do
+      uid <- findAttrE (emptyName "id") e
+      href <- findAttrE (emptyName "href") e
+      mime <- findAttrE (emptyName "media-type") e
+      return (uid, (href, mime))
+
+parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MIME)]
+parseSpine is e = do
+  spine <- findElementE (dfName "spine") e
+  let itemRefs = findChildren (dfName "itemref") spine
+  mapM (mkE "parseSpine" . (flip M.lookup is)) $ mapMaybe parseItemRef itemRefs
+  where
+    parseItemRef ref = do
+      let linear = maybe True (== "yes") (findAttr (emptyName "linear") ref)
+      guard linear
+      findAttr (emptyName "idref") ref
+
+parseMeta :: MonadError String m => Element -> m Meta
+parseMeta content = do
+  meta <- findElementE (dfName "metadata") content
+  let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
+      dcspace _ = False
+  let dcs = filterChildrenName dcspace meta
+  let r = foldr parseMetaItem nullMeta dcs
+  return r
+
+-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
+parseMetaItem :: Element -> Meta -> Meta
+parseMetaItem e@(stripNamespace . elName -> field) meta =
+  addMetaField (renameMeta field) (B.str $ strContent e) meta
+
+renameMeta :: String -> String
+renameMeta "creator" = "author"
+renameMeta s = s
+
+getManifest :: MonadError String m => Archive -> m (String, Element)
+getManifest archive = do
+  metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
+  docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+  let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+  ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
+  as <- liftM ((map attrToPair) . elAttribs)
+    (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
+  manifestFile <- mkE "Root not found" (lookup "full-path" as)
+  let rootdir = dropFileName manifestFile
+  --mime <- lookup "media-type" as
+  manifest <- findEntryByPathE manifestFile archive
+  liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+
+-- Fixup
+
+fixInternalReferences :: FilePath -> Pandoc -> Pandoc
+fixInternalReferences pathToFile =
+  (walk $ renameImages root)
+  . (walk normalisePath)
+  . (walk $ fixBlockIRs filename)
+  . (walk $ fixInlineIRs filename)
+  where
+    (root, escapeURI -> filename) = splitFileName pathToFile
+
+fixInlineIRs :: String -> Inline -> Inline
+fixInlineIRs s (Span as v) =
+  Span (fixAttrs s as) v
+fixInlineIRs s (Code as code) =
+  Code (fixAttrs s as) code
+fixInlineIRs s (Link t ('#':url, tit)) =
+  Link t (addHash s url, tit)
+fixInlineIRs _ v = v
+
+normalisePath :: Inline -> Inline
+normalisePath (Link t (url, tit)) =
+  let (path, uid) = span (/= '#') url in
+  Link t (takeFileName path ++ uid, tit)
+normalisePath s = s
+
+prependHash :: [String] -> Inline -> Inline
+prependHash ps l@(Link is (url, tit))
+  | or [s `isPrefixOf` url | s <- ps] =
+    Link is ('#':url, tit)
+  | otherwise = l
+prependHash _ i = i
+
+fixBlockIRs :: String -> Block -> Block
+fixBlockIRs s (Div as b) =
+  Div (fixAttrs s as) b
+fixBlockIRs s (Header i as b) =
+  Header i (fixAttrs s as) b
+fixBlockIRs s (CodeBlock as code) =
+  CodeBlock (fixAttrs s as) code
+fixBlockIRs _ b = b
+
+fixAttrs :: FilePath -> B.Attr -> B.Attr
+fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs)
+
+addHash :: String -> String -> String
+addHash _ "" = ""
+addHash s ident = s ++ "#" ++ ident
+
+removeEPUBAttrs :: [(String, String)] -> [(String, String)]
+removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
+
+isEPUBAttr :: (String, String) -> Bool
+isEPUBAttr (k, _) = "epub:" `isPrefixOf` k
+
+-- Library
+
+-- Strict version of foldM
+foldM' :: (Monad m, NFData a) => (a -> b -> m a) -> a -> [b] -> m a
+foldM' _ z [] = return z
+foldM' f z (x:xs) = do
+  z' <- f z x
+  z' `deepseq` foldM' f z' xs
+
+uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
+uncurry3 f (a, b, c) = f a b c
+
+traceM :: Monad m => String -> m ()
+traceM = flip trace (return ())
+
+-- Utility
+
+stripNamespace :: QName -> String
+stripNamespace (QName v _ _) = v
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
+attrToNSPair _ = Nothing
+
+attrToPair :: Attr -> (String, String)
+attrToPair (Attr (QName name _ _) val) = (name, val)
+
+defaultNameSpace :: Maybe String
+defaultNameSpace = Just "http://www.idpf.org/2007/opf"
+
+dfName :: String -> QName
+dfName s = QName s defaultNameSpace Nothing
+
+emptyName :: String -> QName
+emptyName s = QName s Nothing Nothing
+
+-- Convert Maybe interface to Either
+
+findAttrE :: MonadError String m => QName -> Element -> m String
+findAttrE q e = mkE "findAttr" $ findAttr q e
+
+findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry
+findEntryByPathE (normalise -> path) a =
+  mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+
+parseXMLDocE :: MonadError String m => String -> m Element
+parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+
+findElementE :: MonadError String m => QName -> Element -> m Element
+findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+
+mkE :: MonadError String m => String -> Maybe a -> m a
+mkE s = maybe (throwError s) return
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 552e8a251..1789b865f 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
 {-
 Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
@@ -40,41 +41,69 @@ import Text.HTML.TagSoup
 import Text.HTML.TagSoup.Match
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
-import Text.Pandoc.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Parsing
-import Data.Maybe ( fromMaybe, isJust )
-import Data.List ( intercalate )
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
+import Text.Pandoc.Shared ( extractSpaces, renderTags'
+                          , escapeURI, safeRead )
+import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
+                           , Extension (Ext_epub_html_exts,
+                               Ext_native_divs, Ext_native_spans))
+import Text.Pandoc.Parsing hiding ((<|>))
+import Text.Pandoc.Walk
+import Data.Maybe ( fromMaybe, isJust)
+import Data.List ( intercalate, isInfixOf )
 import Data.Char ( isDigit )
-import Control.Monad ( liftM, guard, when, mzero )
-import Control.Applicative ( (<$>), (<$), (<*) )
-import Data.Monoid
+import Control.Monad ( liftM, guard, when, mzero, void, unless )
+import Control.Arrow ((***))
+import Control.Applicative ( (<$>), (<$), (<*), (*>), (<|>))
+import Data.Monoid (mconcat, Monoid, mempty, (<>), First (..))
 import Text.Printf (printf)
 import Debug.Trace (trace)
+import Text.TeXMath (readMathML, writeTeX)
+import Data.Default (Default (..), def)
+import Control.Monad.Reader (Reader,ask, asks, local, runReader)
 
-isSpace :: Char -> Bool
-isSpace ' '  = True
-isSpace '\t' = True
-isSpace '\n' = True
-isSpace _    = False
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
 readHtml :: ReaderOptions -- ^ Reader options
          -> String        -- ^ String to parse (assumes @'\n'@ line endings)
          -> Pandoc
 readHtml opts inp =
-  case runParser parseDoc def{ stateOptions = opts } "source" tags of
+  case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } [])  "source" tags of
           Left err'    -> error $ "\nError at " ++ show  err'
           Right result -> result
-    where tags = canonicalizeTags $
+    where tags = stripPrefixes . canonicalizeTags $
                    parseTagsOptions parseOptions{ optTagPosition = True } inp
           parseDoc = do
              blocks <- (fixPlains False) . mconcat <$> manyTill block eof
-             meta <- stateMeta <$> getState
-             return $ Pandoc meta (B.toList blocks)
+             meta <- stateMeta . parserState <$> getState
+             bs' <- replaceNotes (B.toList blocks)
+             return $ Pandoc meta bs'
+
+replaceNotes :: [Block] -> TagParser [Block]
+replaceNotes = walkM replaceNotes'
+
+replaceNotes' :: Inline -> TagParser Inline
+replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes
+  where
+    getNotes = noteTable <$> getState
+replaceNotes' x = return x
 
-type TagParser = Parser [Tag String] ParserState
+data HTMLState =
+  HTMLState
+  {  parserState :: ParserState,
+     noteTable :: [(String, Blocks)]
+  }
+
+data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
+                           , inChapter :: Bool -- ^ Set if in chapter section
+                           }
+
+setInChapter :: HTMLParser s a -> HTMLParser s a
+setInChapter = local (\s -> s {inChapter = True})
+
+type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
+
+type TagParser = HTMLParser [Tag String]
 
 pBody :: TagParser Blocks
 pBody = pInTags "body" block
@@ -98,7 +127,11 @@ block = do
   tr <- getOption readerTrace
   pos <- getPosition
   res <- choice
-            [ pPara
+            [ eSection
+            , eSwitch B.para block
+            , mempty <$ eFootnote
+            , mempty <$ eTOC
+            , pPara
             , pHeader
             , pBlockQuote
             , pCodeBlock
@@ -115,6 +148,63 @@ block = do
              (take 60 $ show $ B.toList res)) (return ())
   return res
 
+namespaces :: [(String, TagParser Inlines)]
+namespaces = [(mathMLNamespace, pMath True)]
+
+mathMLNamespace :: String
+mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
+
+eSwitch :: Monoid a => (Inlines -> a) -> TagParser a -> TagParser a
+eSwitch constructor parser = try $ do
+  guardEnabled Ext_epub_html_exts
+  pSatisfy (~== TagOpen "switch" [])
+  cases <- getFirst . mconcat <$>
+            manyTill (First <$> (eCase <* skipMany pBlank) )
+              (lookAhead $ try $ pSatisfy (~== TagOpen "default" []))
+  skipMany pBlank
+  fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank)
+  skipMany pBlank
+  pSatisfy (~== TagClose "switch")
+  return $ maybe fallback constructor cases
+
+eCase :: TagParser (Maybe Inlines)
+eCase = do
+  skipMany pBlank
+  TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" [])
+  case (flip lookup namespaces) =<< lookup "required-namespace" attr of
+    Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank))
+    Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case"))
+
+eFootnote :: TagParser ()
+eFootnote = try $ do
+  let notes = ["footnote", "rearnote"]
+  guardEnabled Ext_epub_html_exts
+  (TagOpen tag attr) <- lookAhead $ pAnyTag
+  guard (maybe False (flip elem notes) (lookup "type" attr))
+  let ident = fromMaybe "" (lookup "id" attr)
+  content <- pInTags tag block
+  addNote ident content
+
+addNote :: String -> Blocks -> TagParser ()
+addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)})
+
+eNoteref :: TagParser Inlines
+eNoteref = try $ do
+  guardEnabled Ext_epub_html_exts
+  TagOpen tag attr <- lookAhead $ pAnyTag
+  guard (maybe False (== "noteref") (lookup "type" attr))
+  let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr)
+  guard (not (null ident))
+  pInTags tag block
+  return $ B.rawInline "noteref" ident
+
+-- Strip TOC if there is one, better to generate again
+eTOC :: TagParser ()
+eTOC = try $ do
+  guardEnabled Ext_epub_html_exts
+  (TagOpen tag attr) <- lookAhead $ pAnyTag
+  guard (maybe False (== "toc") (lookup "type" attr))
+  void (pInTags tag block)
 
 pList :: TagParser Blocks
 pList = pBulletList <|> pOrderedList <|> pDefinitionList
@@ -128,9 +218,15 @@ pBulletList = try $ do
   -- note: if they have an <ol> or <ul> not in scope of a <li>,
   -- treat it as a list item, though it's not valid xhtml...
   skipMany nonItem
-  items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
+  items <- manyTill (pListItem nonItem) (pCloses "ul")
   return $ B.bulletList $ map (fixPlains True) items
 
+pListItem :: TagParser a -> TagParser Blocks
+pListItem nonItem = do
+  TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" [])
+  let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
+  (liDiv <>) <$> pInTags "li" block <* skipMany nonItem
+
 pOrderedList :: TagParser Blocks
 pOrderedList = try $ do
   TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@@ -156,7 +252,7 @@ pOrderedList = try $ do
   -- note: if they have an <ol> or <ul> not in scope of a <li>,
   -- treat it as a list item, though it's not valid xhtml...
   skipMany nonItem
-  items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
+  items <- manyTill (pListItem nonItem) (pCloses "ol")
   return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
 
 pDefinitionList :: TagParser Blocks
@@ -194,14 +290,14 @@ fixPlains inList bs = if any isParaish bs'
 pRawTag :: TagParser String
 pRawTag = do
   tag <- pAnyTag
-  let ignorable x = x `elem` ["html","head","body"]
+  let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
   if tagOpen ignorable (const True) tag || tagClose ignorable tag
      then return []
      else return $ renderTags' [tag]
 
 pDiv :: TagParser Blocks
 pDiv = try $ do
-  getOption readerParseRaw >>= guard
+  guardEnabled Ext_native_divs
   TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
   contents <- pInTags "div" block
   return $ B.divWith (mkAttr attr) contents
@@ -220,13 +316,35 @@ pHtmlBlock t = try $ do
   contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
   return $ renderTags' $ [open] ++ contents ++ [TagClose t]
 
+-- Sets chapter context
+eSection :: TagParser Blocks
+eSection = try $ do
+  let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as)
+  let sectTag = tagOpen (`elem` sectioningContent) matchChapter
+  TagOpen tag _ <- lookAhead $ pSatisfy sectTag
+  setInChapter (pInTags tag block)
+
+headerLevel :: String -> TagParser Int
+headerLevel tagtype = do
+  let level = read (drop 1 tagtype)
+  (try $ do
+    guardEnabled Ext_epub_html_exts
+    asks inChapter >>= guard
+    return (level - 1))
+    <|>
+      return level
+
+
+
+
+
 pHeader :: TagParser Blocks
 pHeader = try $ do
   TagOpen tagtype attr <- pSatisfy $
                            tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
                            (const True)
   let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
-  let level = read (drop 1 tagtype)
+  level <- headerLevel tagtype
   contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
   let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] words $ lookup "class" attr
@@ -244,7 +362,7 @@ pTable :: TagParser Blocks
 pTable = try $ do
   TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
   skipMany pBlank
-  caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank
+  caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
   -- TODO actually read these and take width information from them
   widths' <- pColgroup <|> many pCol
   head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
@@ -326,7 +444,9 @@ pCodeBlock = try $ do
 
 inline :: TagParser Inlines
 inline = choice
-           [ pTagText
+           [ eNoteref
+           , eSwitch id inline
+           , pTagText
            , pQ
            , pEmph
            , pStrong
@@ -338,6 +458,7 @@ inline = choice
            , pImage
            , pCode
            , pSpan
+           , pMath False
            , pRawHtmlInline
            ]
 
@@ -366,8 +487,8 @@ pSelfClosing f g = do
 
 pQ :: TagParser Inlines
 pQ = do
-  quoteContext <- stateQuoteContext `fmap` getState
-  let quoteType = case quoteContext of
+  context <- asks quoteContext
+  let quoteType = case context of
                        InDoubleQuote -> SingleQuote
                        _             -> DoubleQuote
   let innerQuoteContext = if quoteType == SingleQuote
@@ -406,12 +527,24 @@ pLineBreak = do
   return B.linebreak
 
 pLink :: TagParser Inlines
-pLink = try $ do
+pLink = pRelLink <|> pAnchor
+
+pAnchor :: TagParser Inlines
+pAnchor = try $ do
+  tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "id"))
+  return $ B.spanWith (fromAttrib "id" tag , [], []) mempty
+
+pRelLink :: TagParser Inlines
+pRelLink = try $ do
   tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
   let url = fromAttrib "href" tag
   let title = fromAttrib "title" tag
+  let uid = fromAttrib "id" tag
+  let spanC = case uid of
+              [] -> id
+              s  -> B.spanWith (s, [], [])
   lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
-  return $ B.link (escapeURI url) title lab
+  return $ spanC $ B.link (escapeURI url) title lab
 
 pImage :: TagParser Inlines
 pImage = do
@@ -429,7 +562,7 @@ pCode = try $ do
 
 pSpan :: TagParser Inlines
 pSpan = try $ do
-  getOption readerParseRaw >>= guard
+  guardEnabled Ext_native_spans
   TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
   contents <- pInTags "span" inline
   return $ B.spanWith (mkAttr attr) contents
@@ -442,6 +575,22 @@ pRawHtmlInline = do
      then return $ B.rawInline "html" $ renderTags' [result]
      else return mempty
 
+mathMLToTeXMath :: String -> Either String String
+mathMLToTeXMath s = writeTeX <$> readMathML s
+
+pMath :: Bool -> TagParser Inlines
+pMath inCase = try $ do
+  open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True)
+  unless (inCase) (guard (maybe False  (== mathMLNamespace) (lookup "xmlns" attr)))
+  contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math"))
+  let math = mathMLToTeXMath $
+              (renderTags $ [open] ++ contents ++ [TagClose "math"])
+  let constructor =
+        maybe B.math (\x -> if (x == "inline") then B.math else B.displayMath)
+          (lookup "display" attr)
+  return $ either (const mempty)
+            (\x -> if null x then mempty else constructor x) math
+
 pInlinesInTags :: String -> (Inlines -> Inlines)
                -> TagParser Inlines
 pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
@@ -479,7 +628,8 @@ pTagText :: TagParser Inlines
 pTagText = try $ do
   (TagText str) <- pSatisfy isTagText
   st <- getState
-  case runParser (many pTagContents) st "text" str of
+  qu <- ask
+  case flip runReader qu $ runParserT (many pTagContents) st "text" str of
        Left _        -> fail $ "Could not parse `" ++ str ++ "'"
        Right result  -> return $ mconcat result
 
@@ -488,7 +638,9 @@ pBlank = try $ do
   (TagText str) <- pSatisfy isTagText
   guard $ all isSpace str
 
-pTagContents :: Parser [Char] ParserState Inlines
+type InlinesParser = HTMLParser String
+
+pTagContents :: InlinesParser Inlines
 pTagContents =
       B.displayMath <$> mathDisplay
   <|> B.math        <$> mathInline
@@ -498,12 +650,11 @@ pTagContents =
   <|> pSymbol
   <|> pBad
 
-pStr :: Parser [Char] ParserState Inlines
+pStr :: InlinesParser Inlines
 pStr = do
   result <- many1 $ satisfy $ \c ->
                      not (isSpace c) && not (isSpecial c) && not (isBad c)
-  pos <- getPosition
-  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  updateLastStrPos
   return $ B.str result
 
 isSpecial :: Char -> Bool
@@ -518,13 +669,13 @@ isSpecial '\8220' = True
 isSpecial '\8221' = True
 isSpecial _ = False
 
-pSymbol :: Parser [Char] ParserState Inlines
+pSymbol :: InlinesParser Inlines
 pSymbol = satisfy isSpecial >>= return . B.str . (:[])
 
 isBad :: Char -> Bool
 isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
 
-pBad :: Parser [Char] ParserState Inlines
+pBad :: InlinesParser Inlines
 pBad = do
   c <- satisfy isBad
   let c' = case c of
@@ -558,7 +709,7 @@ pBad = do
                 _      -> '?'
   return $ B.str [c']
 
-pSpace :: Parser [Char] ParserState Inlines
+pSpace :: InlinesParser Inlines
 pSpace = many1 (satisfy isSpace) >> return B.space
 
 --
@@ -566,8 +717,10 @@ pSpace = many1 (satisfy isSpace) >> return B.space
 --
 
 eitherBlockOrInline :: [String]
-eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
-                  "map", "area", "object"]
+eitherBlockOrInline = ["audio", "applet", "button", "iframe",
+                       "del", "ins",
+                       "progress", "map", "area", "noscript", "script",
+                       "object", "svg", "video", "source"]
 
 {-
 inlineHtmlTags :: [[Char]]
@@ -579,15 +732,17 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
 -}
 
 blockHtmlTags :: [String]
-blockHtmlTags = ["address", "article", "aside", "blockquote", "body", "button", "canvas",
+blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
+                 "blockquote", "body", "button", "canvas",
                  "caption", "center", "col", "colgroup", "dd", "dir", "div",
-                 "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer",
-                 "form", "h1", "h2", "h3", "h4",
-                 "h5", "h6", "head", "header", "hgroup", "hr", "html", "isindex", "map", "menu",
-                 "noframes", "noscript", "object", "ol", "output", "p", "pre", "progress",
-                 "section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd",
+                 "dl", "dt", "embed", "fieldset", "figcaption", "figure",
+                 "footer", "form", "h1", "h2", "h3", "h4",
+                 "h5", "h6", "head", "header", "hgroup", "hr", "html",
+                 "isindex", "menu", "noframes", "ol", "output", "p", "pre",
+                 "section", "table", "tbody", "textarea",
+                 "thead", "tfoot", "ul", "dd",
                  "dt", "frameset", "li", "tbody", "td", "tfoot",
-                 "th", "thead", "tr", "script", "style", "svg", "video"]
+                 "th", "thead", "tr", "script", "style"]
 
 -- We want to allow raw docbook in markdown documents, so we
 -- include docbook block tags here too.
@@ -605,8 +760,11 @@ blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
                     "classsynopsis", "blockquote", "epigraph", "msgset",
                     "sidebar", "title"]
 
+epubTags :: [String]
+epubTags = ["case", "switch", "default"]
+
 blockTags :: [String]
-blockTags = blockHtmlTags ++ blockDocBookTags
+blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags
 
 isInlineTag :: Tag String -> Bool
 isInlineTag t = tagOpen isInlineTagName (const True) t ||
@@ -670,19 +828,23 @@ _ `closes` _ = False
 --- parsers for use in markdown, textile readers
 
 -- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
+htmlInBalanced :: (Monad m)
+               => (Tag String -> Bool)
+               -> ParserT String st m String
 htmlInBalanced f = try $ do
   (TagOpen t _, tag) <- htmlTag f
   guard $ '/' `notElem` tag      -- not a self-closing tag
   let stopper = htmlTag (~== TagClose t)
-  let anytag = liftM snd $ htmlTag (const True)
+  let anytag = snd <$> htmlTag (const True)
   contents <- many $ notFollowedBy' stopper >>
                      (htmlInBalanced f <|> anytag <|> count 1 anyChar)
   endtag <- liftM snd stopper
   return $ tag ++ concat contents ++ endtag
 
 -- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
+htmlTag :: Monad m
+        => (Tag String -> Bool)
+        -> ParserT [Char] st m (Tag String, String)
 htmlTag f = try $ do
   lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
   (next : _) <- getInput >>= return . canonicalizeTags . parseTags
@@ -701,7 +863,78 @@ htmlTag f = try $ do
 mkAttr :: [(String, String)] -> Attr
 mkAttr attr = (attribsId, attribsClasses, attribsKV)
   where attribsId = fromMaybe "" $ lookup "id" attr
-        attribsClasses = words $ fromMaybe "" $ lookup "class" attr
+        attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes
         attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+        epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr
+
+-- Strip namespace prefixes
+stripPrefixes :: [Tag String] -> [Tag String]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag String -> Tag String
+stripPrefix (TagOpen s as) =
+    TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as)
+stripPrefix (TagClose s) = TagClose (stripPrefix' s)
+stripPrefix x = x
+
+stripPrefix' :: String -> String
+stripPrefix' s =
+  case span (/= ':') s of
+    (_, "") -> s
+    (_, (_:ts)) -> ts
+
+isSpace :: Char -> Bool
+isSpace ' '  = True
+isSpace '\t' = True
+isSpace '\n' = True
+isSpace '\r' = True
+isSpace _    = False
+
+-- Instances
 
+-- This signature should be more general
+-- MonadReader HTMLLocal m => HasQuoteContext st m
+instance HasQuoteContext st (Reader HTMLLocal) where
+  getQuoteContext = asks quoteContext
+  withQuoteContext q = local (\s -> s{quoteContext = q})
 
+instance HasReaderOptions HTMLState where
+    extractReaderOptions = extractReaderOptions . parserState
+
+instance Default HTMLState where
+  def = HTMLState def []
+
+instance HasMeta HTMLState where
+  setMeta s b st = st {parserState = setMeta s b $ parserState st}
+  deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
+
+instance Default HTMLLocal where
+  def = HTMLLocal NoQuote False
+
+instance HasLastStrPosition HTMLState where
+  setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
+  getLastStrPos = getLastStrPos . parserState
+
+
+-- EPUB Specific
+--
+--
+sectioningContent :: [String]
+sectioningContent = ["article", "aside", "nav", "section"]
+
+{-
+groupingContent :: [String]
+groupingContent = ["p", "hr", "pre", "blockquote", "ol"
+                  , "ul", "li", "dl", "dt", "dt", "dd"
+                  , "figure", "figcaption", "div", "main"]
+
+
+
+types :: [(String, ([String], Int))]
+types =  -- Document divisions
+   map (\s -> (s, (["section", "body"], 0)))
+    ["volume", "part", "chapter", "division"]
+  ++ -- Document section and components
+  [
+    ("abstract",  ([], 0))]
+-}
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index a3dfb7c3c..4b46c869d 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -43,11 +43,8 @@ docHToBlocks d' =
             (docHToInlines False $ headerTitle h)
     DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
     DocString _ -> inlineFallback
-    DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h)
     DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h
-    DocParagraph x -> let (ils, rest) = getInlines x
-                       in (B.para $ docHToInlines False ils)
-                          <> docHToBlocks rest
+    DocParagraph x -> B.para $ docHToInlines False x
     DocIdentifier _ -> inlineFallback
     DocIdentifierUnchecked _ -> inlineFallback
     DocModule s -> B.plain $ docHToInlines False $ DocModule s
@@ -115,40 +112,6 @@ docHToInlines isCode d' =
     DocProperty _ -> mempty
     DocExamples _ -> mempty
 
-getInlines :: DocH String Identifier -> (DocH String Identifier, DocH String Identifier)
-getInlines (DocAppend x y) = if isInline x
-                                then let (a, b) = getInlines y
-                                     in  (DocAppend x a, b)
-                                else (DocEmpty, DocAppend x y)
-getInlines x = if isInline x
-                  then (x, DocEmpty)
-                  else (DocEmpty, x)
-
-isInline :: DocH String Identifier -> Bool
-isInline d' =
-  case d' of
-    DocEmpty -> True
-    DocAppend d1 _ -> isInline d1
-    DocString _ -> True
-    DocParagraph _ -> False
-    DocIdentifier _ -> True
-    DocIdentifierUnchecked _ -> True
-    DocModule _ -> True
-    DocWarning _ -> True
-    DocEmphasis _ -> True
-    DocMonospaced _ -> True
-    DocBold _ -> True
-    DocHeader _ -> False
-    DocUnorderedList _ -> False
-    DocOrderedList _ -> False
-    DocDefList _ -> False
-    DocCodeBlock _ -> False
-    DocHyperlink _ -> True
-    DocPic _ -> True
-    DocAName _ -> True
-    DocProperty _ -> False
-    DocExamples _ -> False
-
 -- | Create an 'Example', stripping superfluous characters as appropriate
 makeExample :: String -> String -> [String] -> Blocks
 makeExample prompt expression result =
@@ -173,4 +136,3 @@ makeExample prompt expression result =
         substituteBlankLine "<BLANKLINE>" = ""
         substituteBlankLine line          = line
     coder = B.codeWith ([], ["result"], [])
-
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 97bfaa455..9f51e9a8f 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -41,7 +41,6 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
                                    mathDisplay, mathInline)
-import Text.Parsec.Prim (ParsecT, runParserT)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
 import Control.Monad.Trans (lift)
@@ -104,7 +103,7 @@ dimenarg = try $ do
 
 sp :: LP ()
 sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
-        <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline)
+        <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline)
 
 isLowerHex :: Char -> Bool
 isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
@@ -808,7 +807,7 @@ rawEnv name = do
 
 ----
 
-type IncludeParser = ParsecT [Char] [String] IO String
+type IncludeParser = ParserT [Char] [String] IO String
 
 -- | Replace "include" commands with file contents.
 handleIncludes :: String -> IO String
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 6c710c8ff..861f81b23 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -353,8 +353,7 @@ referenceKey = try $ do
                     notFollowedBy' referenceTitle
                     notFollowedBy' (() <$ reference)
                     many1 $ notFollowedBy space >> litChar
-  let betweenAngles = try $ char '<' >>
-                       manyTill (escapedChar' <|> litChar) (char '>')
+  let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
   src <- try betweenAngles <|> sourceURL
   tit <- option "" referenceTitle
   -- currently we just ignore MMD-style link/image attributes
@@ -571,7 +570,7 @@ attributes :: MarkdownParser Attr
 attributes = try $ do
   char '{'
   spnl
-  attrs <- many (attribute >>~ spnl)
+  attrs <- many (attribute <* spnl)
   char '}'
   return $ foldl (\x f -> f x) nullAttr attrs
 
@@ -688,7 +687,7 @@ birdTrackLine c = try $ do
 --
 
 emailBlockQuoteStart :: MarkdownParser Char
-emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ')
+emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
 
 emailBlockQuote :: MarkdownParser [String]
 emailBlockQuote = try $ do
@@ -752,7 +751,7 @@ listLine = try $ do
   notFollowedBy' (do indentSpaces
                      many spaceChar
                      listStart)
-  notFollowedBy' $ htmlTag (~== TagClose "div")
+  notFollowedByHtmlCloser
   optional (() <$ indentSpaces)
   chunks <- manyTill
               (  many1 (satisfy $ \c -> c /= '\n' && c /= '<')
@@ -781,11 +780,18 @@ listContinuation = try $ do
   blanks <- many blankline
   return $ concat result ++ blanks
 
+notFollowedByHtmlCloser :: MarkdownParser ()
+notFollowedByHtmlCloser = do
+  inHtmlBlock <- stateInHtmlBlock <$> getState
+  case inHtmlBlock of
+        Just t  -> notFollowedBy' $ htmlTag (~== TagClose t)
+        Nothing -> return ()
+
 listContinuationLine :: MarkdownParser String
 listContinuationLine = try $ do
   notFollowedBy blankline
   notFollowedBy' listStart
-  notFollowedBy' $ htmlTag (~== TagClose "div")
+  notFollowedByHtmlCloser
   optional indentSpaces
   result <- anyLine
   return $ result ++ "\n"
@@ -840,38 +846,53 @@ defListMarker = do
      else mzero
   return ()
 
-definitionListItem :: MarkdownParser (F (Inlines, [Blocks]))
-definitionListItem = try $ do
-  -- first, see if this has any chance of being a definition list:
-  lookAhead (anyLine >> optional blankline >> defListMarker)
-  term <- trimInlinesF . mconcat <$> manyTill inline newline
-  optional blankline
-  raw <- many1 defRawBlock
-  state <- getState
-  let oldContext = stateParserContext state
-  -- parse the extracted block, which may contain various block elements:
+definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks]))
+definitionListItem compact = try $ do
+  rawLine' <- anyLine
+  raw <- many1 $ defRawBlock compact
+  term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
   contents <- mapM (parseFromString parseBlocks) raw
-  updateState (\st -> st {stateParserContext = oldContext})
+  optional blanklines
   return $ liftM2 (,) term (sequence contents)
 
-defRawBlock :: MarkdownParser String
-defRawBlock = try $ do
+defRawBlock :: Bool -> MarkdownParser String
+defRawBlock compact = try $ do
+  hasBlank <- option False $ blankline >> return True
   defListMarker
   firstline <- anyLine
-  rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
-  trailing <- option "" blanklines
-  cont <- liftM concat $ many $ do
-            lns <- many1 $ notFollowedBy blankline >> indentSpaces >> anyLine
-            trl <- option "" blanklines
-            return $ unlines lns ++ trl
-  return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont
+  let dline = try
+               ( do notFollowedBy blankline
+                    if compact -- laziness not compatible with compact
+                       then () <$ indentSpaces
+                       else (() <$ indentSpaces)
+                             <|> notFollowedBy defListMarker
+                    anyLine )
+  rawlines <- many dline
+  cont <- liftM concat $ many $ try $ do
+            trailing <- option "" blanklines
+            ln <- indentSpaces >> notFollowedBy blankline >> anyLine
+            lns <- many dline
+            return $ trailing ++ unlines (ln:lns)
+  return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
+            if hasBlank || not (null cont) then "\n\n" else ""
 
 definitionList :: MarkdownParser (F Blocks)
-definitionList = do
-  guardEnabled Ext_definition_lists
-  items <- fmap sequence $ many1 definitionListItem
+definitionList = try $ do
+  lookAhead (anyLine >> optional blankline >> defListMarker)
+  compactDefinitionList <|> normalDefinitionList
+
+compactDefinitionList :: MarkdownParser (F Blocks)
+compactDefinitionList = do
+  guardEnabled Ext_compact_definition_lists
+  items <- fmap sequence $ many1 $ definitionListItem True
   return $ B.definitionList <$> fmap compactify'DL items
 
+normalDefinitionList :: MarkdownParser (F Blocks)
+normalDefinitionList = do
+  guardEnabled Ext_definition_lists
+  items <- fmap sequence $ many1 $ definitionListItem False
+  return $ B.definitionList <$> items
+
 --
 -- paragraph block
 --
@@ -914,16 +935,34 @@ htmlElement = rawVerbatimBlock
 htmlBlock :: MarkdownParser (F Blocks)
 htmlBlock = do
   guardEnabled Ext_raw_html
-  res <- (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)
-          <|> htmlBlock'
-  return $ return $ B.rawBlock "html" res
-
-htmlBlock' :: MarkdownParser String
+  try (do
+      (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
+      (guard (t `elem` ["pre","style","script"]) >>
+          (return . B.rawBlock "html") <$> rawVerbatimBlock)
+        <|> (do guardEnabled Ext_markdown_attribute
+                oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
+                markdownAttribute <-
+                   case lookup "markdown" attrs of
+                        Just "0" -> False <$ updateState (\st -> st{
+                                       stateMarkdownAttribute = False })
+                        Just _   -> True <$ updateState (\st -> st{
+                                       stateMarkdownAttribute = True })
+                        Nothing  -> return oldMarkdownAttribute
+                res <- if markdownAttribute
+                          then rawHtmlBlocks
+                          else htmlBlock'
+                updateState $ \st -> st{ stateMarkdownAttribute =
+                                         oldMarkdownAttribute }
+                return res)
+        <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
+    <|> htmlBlock'
+
+htmlBlock' :: MarkdownParser (F Blocks)
 htmlBlock' = try $ do
     first <- htmlElement
-    finalSpace <- many spaceChar
-    finalNewlines <- many newline
-    return $ first ++ finalSpace ++ finalNewlines
+    skipMany spaceChar
+    optional blanklines
+    return $ return $ B.rawBlock "html" first
 
 strictHtmlBlock :: MarkdownParser String
 strictHtmlBlock = htmlInBalanced (not . isInlineTag)
@@ -934,48 +973,36 @@ rawVerbatimBlock = try $ do
                                                   ["pre", "style", "script"])
                               (const True))
   contents <- manyTill anyChar (htmlTag (~== TagClose tag))
-  return $ open ++ contents ++ renderTags [TagClose tag]
+  return $ open ++ contents ++ renderTags' [TagClose tag]
 
 rawTeXBlock :: MarkdownParser (F Blocks)
 rawTeXBlock = do
   guardEnabled Ext_raw_tex
-  result <- (B.rawBlock "latex" <$> rawLaTeXBlock)
-        <|> (B.rawBlock "context" <$> rawConTeXtEnvironment)
+  result <- (B.rawBlock "latex" . concat <$>
+                  rawLaTeXBlock `sepEndBy1` blankline)
+        <|> (B.rawBlock "context" . concat <$>
+                  rawConTeXtEnvironment `sepEndBy1` blankline)
   spaces
   return $ return result
 
-rawHtmlBlocks :: MarkdownParser String
+rawHtmlBlocks :: MarkdownParser (F Blocks)
 rawHtmlBlocks = do
-  htmlBlocks <- many1 $ try $ do
-                          s <- rawVerbatimBlock <|> try (
-                                do (t,raw) <- htmlTag isBlockTag
-                                   guard $ t ~/= TagOpen "div" [] &&
-                                           t ~/= TagClose "div"
-                                   exts <- getOption readerExtensions
-                                   -- if open tag, need markdown="1" if
-                                   -- markdown_attributes extension is set
-                                   case t of
-                                        TagOpen _ as
-                                          | Ext_markdown_attribute `Set.member`
-                                              exts ->
-                                                if "markdown" `notElem`
-                                                   map fst as
-                                                   then mzero
-                                                   else return $
-                                                     stripMarkdownAttribute raw
-                                          | otherwise -> return raw
-                                        _ -> return raw )
-                          sps <- do sp1 <- many spaceChar
-                                    sp2 <- option "" (blankline >> return "\n")
-                                    sp3 <- many spaceChar
-                                    sp4 <- option "" blanklines
-                                    return $ sp1 ++ sp2 ++ sp3 ++ sp4
-                          -- note: we want raw html to be able to
-                          -- precede a code block, when separated
-                          -- by a blank line
-                          return $ s ++ sps
-  let combined = concat htmlBlocks
-  return $ if last combined == '\n' then init combined else combined
+  (TagOpen tagtype _, raw) <- htmlTag isBlockTag
+  -- try to find closing tag
+  -- we set stateInHtmlBlock so that closing tags that can be either block or
+  -- inline will not be parsed as inline tags
+  oldInHtmlBlock <- stateInHtmlBlock <$> getState
+  updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
+  let closer = htmlTag (\x -> x ~== TagClose tagtype)
+  contents <- mconcat <$> many (notFollowedBy' closer >> block)
+  result <-
+    (closer >>= \(_, rawcloser) -> return (
+                return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
+                contents <>
+                return (B.rawBlock "html" rawcloser)))
+      <|> return (return (B.rawBlock "html" raw) <> contents)
+  updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+  return result
 
 -- remove markdown="1" attribute
 stripMarkdownAttribute :: String -> String
@@ -1163,7 +1190,7 @@ gridPart ch = do
   return (length dashes, length dashes + 1)
 
 gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
 
 removeFinalBar :: String -> String
 removeFinalBar =
@@ -1388,8 +1415,7 @@ escapedChar = do
 ltSign :: MarkdownParser (F Inlines)
 ltSign = do
   guardDisabled Ext_raw_html
-    <|> guardDisabled Ext_markdown_in_html_blocks
-    <|> (notFollowedBy' (htmlTag isBlockTag) >> return ())
+    <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
   char '<'
   return $ return $ B.str "<"
 
@@ -1434,52 +1460,60 @@ math =  (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
 enclosure :: Char
           -> MarkdownParser (F Inlines)
 enclosure c = do
+  -- we can't start an enclosure with _ if after a string and
+  -- the intraword_underscores extension is enabled:
+  guardDisabled Ext_intraword_underscores
+    <|> guard (c == '*')
+    <|> (guard =<< notAfterString)
   cs <- many1 (char c)
   (return (B.str cs) <>) <$> whitespace
-    <|> case length cs of
+    <|> do
+        case length cs of
              3  -> three c
              2  -> two   c mempty
              1  -> one   c mempty
              _  -> return (return $ B.str cs)
 
+ender :: Char -> Int -> MarkdownParser ()
+ender c n = try $ do
+  count n (char c)
+  guard (c == '*')
+    <|> guardDisabled Ext_intraword_underscores
+    <|> notFollowedBy alphaNum
+
 -- Parse inlines til you hit one c or a sequence of two cs.
 -- If one c, emit emph and then parse two.
 -- If two cs, emit strong and then parse one.
 -- Otherwise, emit ccc then the results.
 three :: Char -> MarkdownParser (F Inlines)
 three c = do
-  contents <- mconcat <$> many (notFollowedBy (char c) >> inline)
-  (try (string [c,c,c]) >> return ((B.strong . B.emph) <$> contents))
-    <|> (try (string [c,c]) >> one c (B.strong <$> contents))
-    <|> (char c >> two c (B.emph <$> contents))
+  contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
+  (ender c 3 >> return ((B.strong . B.emph) <$> contents))
+    <|> (ender c 2 >> one c (B.strong <$> contents))
+    <|> (ender c 1 >> two c (B.emph <$> contents))
     <|> return (return (B.str [c,c,c]) <> contents)
 
 -- Parse inlines til you hit two c's, and emit strong.
 -- If you never do hit two cs, emit ** plus inlines parsed.
 two :: Char -> F Inlines -> MarkdownParser (F Inlines)
 two c prefix' = do
-  let ender = try $ string [c,c]
-  contents <- mconcat <$> many (try $ notFollowedBy ender >> inline)
-  (ender >> return (B.strong <$> (prefix' <> contents)))
+  contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
+  (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
     <|> return (return (B.str [c,c]) <> (prefix' <> contents))
 
 -- Parse inlines til you hit a c, and emit emph.
 -- If you never hit a c, emit * plus inlines parsed.
 one :: Char -> F Inlines -> MarkdownParser (F Inlines)
 one c prefix' = do
-  contents <- mconcat <$> many (  (notFollowedBy (char c) >> inline)
+  contents <- mconcat <$> many (  (notFollowedBy (ender c 1) >> inline)
                            <|> try (string [c,c] >>
-                                    notFollowedBy (char c) >>
+                                    notFollowedBy (ender c 1) >>
                                     two c mempty) )
-  (char c >> return (B.emph <$> (prefix' <> contents)))
+  (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
     <|> return (return (B.str [c]) <> (prefix' <> contents))
 
 strongOrEmph :: MarkdownParser (F Inlines)
-strongOrEmph =  enclosure '*' <|> (checkIntraword >> enclosure '_')
-  where  checkIntraword = do
-           exts <- getOption readerExtensions
-           when (Ext_intraword_underscores `Set.member` exts) $ do
-             guard =<< notAfterString
+strongOrEmph =  enclosure '*' <|> enclosure '_'
 
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (Show b)
@@ -1489,7 +1523,7 @@ inlinesBetween :: (Show b)
 inlinesBetween start end =
   (trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
     where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
-          innerSpace = try $ whitespace >>~ notFollowedBy' end
+          innerSpace = try $ whitespace <* notFollowedBy' end
 
 strikeout :: MarkdownParser (F Inlines)
 strikeout = fmap B.strikeout <$>
@@ -1730,7 +1764,7 @@ inBrackets parser = do
 
 spanHtml :: MarkdownParser (F Inlines)
 spanHtml = try $ do
-  guardEnabled Ext_markdown_in_html_blocks
+  guardEnabled Ext_native_spans
   (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
   contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
   let ident = fromMaybe "" $ lookup "id" attrs
@@ -1745,14 +1779,19 @@ spanHtml = try $ do
 
 divHtml :: MarkdownParser (F Blocks)
 divHtml = try $ do
-  guardEnabled Ext_markdown_in_html_blocks
+  guardEnabled Ext_native_divs
   (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+  -- we set stateInHtmlBlock so that closing tags that can be either block or
+  -- inline will not be parsed as inline tags
+  oldInHtmlBlock <- stateInHtmlBlock <$> getState
+  updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
   bls <- option "" (blankline >> option "" blanklines)
   contents <- mconcat <$>
               many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
   closed <- option False (True <$ htmlTag (~== TagClose "div"))
   if closed
      then do
+       updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
        let ident = fromMaybe "" $ lookup "id" attrs
        let classes = maybe [] words $ lookup "class" attrs
        let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
@@ -1763,10 +1802,17 @@ divHtml = try $ do
 rawHtmlInline :: MarkdownParser (F Inlines)
 rawHtmlInline = do
   guardEnabled Ext_raw_html
+  inHtmlBlock <- stateInHtmlBlock <$> getState
+  let isCloseBlockTag t = case inHtmlBlock of
+                               Just t' -> t ~== TagClose t'
+                               Nothing -> False
   mdInHtml <- option False $
-                guardEnabled Ext_markdown_in_html_blocks >> return True
+                (    guardEnabled Ext_markdown_in_html_blocks
+                 <|> guardEnabled Ext_markdown_attribute
+                ) >> return True
   (_,result) <- htmlTag $ if mdInHtml
-                             then isInlineTag
+                             then (\x -> isInlineTag x &&
+                                         not (isCloseBlockTag x))
                              else not . isTextTag
   return $ return $ B.rawInline "html" result
 
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index f1dcce8f7..e43b8a86c 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -569,7 +569,8 @@ endline = () <$ try (newline <*
 
 imageIdentifiers :: [MWParser ()]
 imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
-    where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier"]
+    where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
+                         "Bild"]
 
 image :: MWParser Inlines
 image = try $ do
@@ -634,7 +635,7 @@ inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines
 inlinesBetween start end =
   (trimInlines . mconcat) <$> try (start >> many1Till inner end)
     where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
-          innerSpace = try $ whitespace >>~ notFollowedBy' end
+          innerSpace = try $ whitespace <* notFollowedBy' end
 
 emph :: MWParser Inlines
 emph = B.emph <$> nested (inlinesBetween start end)
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7a35e2ca0..e1c29d1e8 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -41,10 +41,10 @@ import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             )
 import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import           Text.Pandoc.Shared (compactify', compactify'DL)
-import           Text.TeXMath (texMathToPandoc, DisplayType(..))
+import           Text.TeXMath (readTeX, writePandoc, DisplayType(..))
 
 import           Control.Applicative ( Applicative, pure
-                                     , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
+                                     , (<$>), (<$), (<*>), (<*), (*>) )
 import           Control.Arrow (first)
 import           Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
@@ -274,7 +274,7 @@ optionalAttributes parser = try $
 parseBlockAttributes :: OrgParser ()
 parseBlockAttributes = do
   attrs <- many attribute
-  () <$ mapM (uncurry parseAndAddAttribute) attrs
+  mapM_ (uncurry parseAndAddAttribute) attrs
  where
    attribute :: OrgParser (String, String)
    attribute = try $ do
@@ -341,14 +341,36 @@ verseBlock blkProp = try $ do
   fmap B.para . mconcat . intersperse (pure B.linebreak)
     <$> mapM (parseFromString parseInlines) (lines content)
 
+exportsCode :: [(String, String)] -> Bool
+exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
+                         || ("rundoc-exports", "results") `elem` attrs)
+
+exportsResults :: [(String, String)] -> Bool
+exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
+                       || ("rundoc-exports", "both") `elem` attrs
+
+followingResultsBlock :: OrgParser (Maybe String)
+followingResultsBlock =
+       optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
+                                     *> blankline
+                                     *> (unlines <$> many1 exampleLine))
+
 codeBlock :: BlockProperties -> OrgParser (F Blocks)
 codeBlock blkProp = do
   skipSpaces
-  (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
-  id'           <- fromMaybe "" <$> lookupBlockAttribute "name"
-  content       <- rawBlockContent blkProp
-  let codeBlck  = B.codeBlockWith ( id', classes, kv ) content
-  maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption"
+  (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+  id'               <- fromMaybe "" <$> lookupBlockAttribute "name"
+  content           <- rawBlockContent blkProp
+  resultsContent    <- followingResultsBlock
+  let includeCode    = exportsCode kv
+  let includeResults = exportsResults kv
+  let codeBlck       = B.codeBlockWith ( id', classes, kv ) content
+  labelledBlck      <- maybe (pure codeBlck)
+                             (labelDiv codeBlck)
+                             <$> lookupInlinesAttr "caption"
+  let resultBlck     = pure $ maybe mempty (exampleCode) resultsContent
+  return $ (if includeCode then labelledBlck else mempty)
+           <> (if includeResults then resultBlck else mempty)
  where
    labelDiv blk value =
        B.divWith nullAttr <$> (mappend <$> labelledBlock value
@@ -780,8 +802,12 @@ noteBlock = try $ do
 
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser (F Blocks)
-paraOrPlain = try $
-  parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para))
+paraOrPlain = try $ do
+  ils <- parseInlines
+  nl <- option False (newline >> return True)
+  try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >>
+           return (B.para <$> ils))
+    <|>  (return (B.plain <$> ils))
 
 inlinesTillNewline :: OrgParser (F Inlines)
 inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
@@ -1357,7 +1383,7 @@ inlineLaTeX = try $ do
   maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd
  where
    parseAsMath :: String -> Maybe Inlines
-   parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs
+   parseAsMath cs = B.fromList <$> texMathToPandoc cs
 
    parseAsInlineLaTeX :: String -> Maybe Inlines
    parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
@@ -1365,6 +1391,9 @@ inlineLaTeX = try $ do
    state :: ParserState
    state = def{ stateOptions = def{ readerParseRaw = True }}
 
+   texMathToPandoc inp = (maybeRight $ readTeX inp) >>=
+                         writePandoc DisplayInline
+
 maybeRight :: Either a b -> Maybe b
 maybeRight = either (const Nothing) Just
 
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index fa8438e70..e5eccb116 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -47,7 +47,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
 import qualified Text.Pandoc.Builder as B
 import Data.Monoid (mconcat, mempty)
 import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower)
+import Data.Char (toLower, isHexDigit)
 
 -- | Parse reStructuredText string and return Pandoc document.
 readRST :: ReaderOptions -- ^ Reader options
@@ -460,7 +460,7 @@ listItem :: RSTParser Int
 listItem start = try $ do
   (markerLength, first) <- rawListItem start
   rest <- many (listContinuation markerLength)
-  blanks <- choice [ try (many blankline >>~ lookAhead start),
+  blanks <- choice [ try (many blankline <* lookAhead start),
                      many1 blankline ]  -- whole list must end with blank.
   -- parsing with ListItemState forces markers at beginning of lines to
   -- count as list item markers, even if not separated by blank space.
@@ -480,7 +480,7 @@ listItem start = try $ do
 
 orderedList :: RSTParser Blocks
 orderedList = try $ do
-  (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
+  (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar)
   items <- many1 (listItem (orderedListStart style delim))
   let items' = compactify' items
   return $ B.orderedListWith (start, style, delim) items'
@@ -656,9 +656,6 @@ extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
   where (ds,rest) = span isHexDigit s
         mbc = safeRead ('\'':'\\':'x':ds ++ "'")
 
-isHexDigit :: Char -> Bool
-isHexDigit c = c `elem` "0123456789ABCDEFabcdef"
-
 extractCaption :: RSTParser (Inlines, Blocks)
 extractCaption = do
   capt <- trimInlines . mconcat <$> many inline
@@ -747,7 +744,7 @@ simpleReferenceName = do
 
 referenceName :: RSTParser Inlines
 referenceName = quotedReferenceName <|>
-                (try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
+                (try $ simpleReferenceName <* lookAhead (char ':')) <|>
                 unquotedReferenceName
 
 referenceKey :: RSTParser [Char]
@@ -1076,7 +1073,7 @@ explicitLink = try $ do
 
 referenceLink :: RSTParser Inlines
 referenceLink = try $ do
-  (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~
+  (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <*
                    char '_'
   state <- getState
   let keyTable = stateKeys state
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index f03eae044..3fee3051e 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -27,7 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of TeX math to a list of 'Pandoc' inline elements.
 -}
-module Text.Pandoc.Readers.TeXMath ( readTeXMath, readTeXMath' ) where
+module Text.Pandoc.Readers.TeXMath ( texMathToInlines ) where
 
 import Text.Pandoc.Definition
 import Text.TeXMath
@@ -35,22 +35,14 @@ import Text.TeXMath
 -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
 -- Defaults to raw formula between @$@ or @$$@ characters if entire formula
 -- can't be converted.
-readTeXMath' :: MathType
+texMathToInlines :: MathType
              -> String    -- ^ String to parse (assumes @'\n'@ line endings)
              -> [Inline]
-readTeXMath' mt inp = case texMathToPandoc dt inp of
-                           Left _    -> [Str (delim ++ inp ++ delim)]
-                           Right res -> res
+texMathToInlines mt inp =
+  case writePandoc dt `fmap` readTeX inp of
+       Right (Just ils)  -> ils
+       _                 -> [Str (delim ++ inp ++ delim)]
     where (dt, delim) = case mt of
                              DisplayMath -> (DisplayBlock, "$$")
                              InlineMath  -> (DisplayInline, "$")
 
-{-# DEPRECATED readTeXMath "Use readTeXMath' from Text.Pandoc.JSON instead" #-}
--- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
--- Defaults to raw formula between @$@ characters if entire formula
--- can't be converted.  (This is provided for backwards compatibility;
--- it is better to use @readTeXMath'@, which properly distinguishes
--- between display and inline math.)
-readTeXMath :: String    -- ^ String to parse (assumes @'\n'@ line endings)
-            -> [Inline]
-readTeXMath = readTeXMath' InlineMath
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 6d839ec1d..cd34da942 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -265,8 +265,20 @@ definitionList :: Parser [Char] ParserState Blocks
 definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
-listStart :: Parser [Char] st Char
-listStart = oneOf "*#-"
+listStart :: Parser [Char] ParserState ()
+listStart = genericListStart '*'
+        <|> () <$ genericListStart '#'
+        <|> () <$ definitionListStart
+
+genericListStart :: Char -> Parser [Char] st ()
+genericListStart c = () <$ try (many1 (char c) >> whitespace)
+
+definitionListStart :: Parser [Char] ParserState Inlines
+definitionListStart = try $ do
+  char '-'
+  whitespace
+  trimInlines . mconcat <$>
+    many1Till inline (try (string ":=")) <* optional whitespace
 
 listInline :: Parser [Char] ParserState Inlines
 listInline = try (notFollowedBy newline >> inline)
@@ -278,8 +290,7 @@ listInline = try (notFollowedBy newline >> inline)
 -- break.
 definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
 definitionListItem = try $ do
-  string "- "
-  term <- mconcat <$> many1Till inline (try (whitespace >> string ":="))
+  term <- definitionListStart
   def' <- multilineDef <|> inlineDef
   return (term, def')
   where inlineDef :: Parser [Char] ParserState [Blocks]
@@ -488,7 +499,7 @@ str = do
   return $ B.str fullStr
 
 -- | Some number of space chars
-whitespace :: Parser [Char] ParserState Inlines
+whitespace :: Parser [Char] st Inlines
 whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
@@ -616,7 +627,8 @@ simpleInline border construct = try $ do
   attr <- attributes
   body <- trimInlines . mconcat <$>
           withQuoteContext InSingleQuote
-            (manyTill inline (try border <* notFollowedBy alphaNum))
+            (manyTill (notFollowedBy newline >> inline)
+             (try border <* notFollowedBy alphaNum))
   return $ construct $
         if attr == nullAttr
            then body
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
new file mode 100644
index 000000000..3a51b9d84
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -0,0 +1,548 @@
+{-# LANGUAGE ViewPatterns #-}
+{-
+Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Txt2Tags
+   Copyright   : Copyright (C) 2014 Matthew Pickering
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Matthew Pickering <matthewtpickering@gmail.com>
+
+Conversion of txt2tags formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags
+                                    , getT2TMeta
+                                    , T2TMeta (..)
+                                    , readTxt2TagsNoMacros)
+                                    where
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Inlines, Blocks, (<>)
+                           , trimInlines )
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL)
+import Text.Pandoc.Parsing hiding (space, spaces, uri, macro)
+import Control.Applicative ((<$>), (<$), (<*>), (<*), (*>))
+import Data.Char (toLower)
+import Data.List (transpose, intersperse, intercalate)
+import Data.Maybe (fromMaybe)
+import Data.Monoid (Monoid, mconcat, mempty, mappend)
+--import Network.URI (isURI) -- Not sure whether to use this function
+import Control.Monad (void, guard, when)
+import Data.Default
+import Control.Monad.Reader (Reader, runReader, asks)
+
+import Data.Time.LocalTime (getZonedTime)
+import Text.Pandoc.Compat.Directory(getModificationTime)
+import Data.Time.Format (formatTime)
+import System.Locale (defaultTimeLocale)
+import System.IO.Error (catchIOError)
+
+type T2T = ParserT String ParserState (Reader T2TMeta)
+
+-- | An object for the T2T macros meta information
+-- the contents of each field is simply substituted verbatim into the file
+data  T2TMeta = T2TMeta {
+                 date :: String -- ^ Current date
+               , mtime :: String -- ^ Last modification time of infile
+               , infile :: FilePath -- ^ Input file
+               , outfile :: FilePath -- ^ Output file
+               } deriving Show
+
+instance Default T2TMeta where
+    def = T2TMeta "" "" "" ""
+
+-- | Get the meta information required by Txt2Tags macros
+getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta
+getT2TMeta inps out = do
+    curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime
+    let getModTime = fmap (formatTime defaultTimeLocale "%F") .
+                       getModificationTime
+    curMtime <- catchIOError
+                (maximum <$> mapM getModTime inps)
+                (const (return ""))
+    return $ T2TMeta curDate curMtime (intercalate ", " inps) out
+
+-- | Read Txt2Tags from an input string returning a Pandoc document
+readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc
+readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
+
+-- | Read Txt2Tags (ignoring all macros) from an input string returning
+-- a Pandoc document
+readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc
+readTxt2TagsNoMacros = readTxt2Tags def
+
+parseT2T :: T2T Pandoc
+parseT2T = do
+  _ <- (Nothing <$ try blankline) <|> (Just <$> (count 3 anyLine))
+  config <- manyTill setting (notFollowedBy setting)
+  -- TODO: Handle settings better
+  let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) nullMeta config
+  updateState (\s -> s {stateMeta = settings})
+  body <- mconcat <$>  manyTill block eof
+  return $ Pandoc mempty (B.toList body)
+
+type Keyword = String
+type Value = String
+
+setting :: T2T (Keyword, Value)
+setting = do
+  string "%!"
+  keyword <- ignoreSpacesCap (many1 alphaNum)
+  char ':'
+  value <- ignoreSpacesCap (manyTill anyChar (newline))
+  return (keyword, value)
+
+-- Blocks
+
+parseBlocks :: T2T Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: T2T Blocks
+block = do
+  choice
+    [ mempty <$ blanklines
+    , quote
+    , hrule -- hrule must go above title
+    , title
+    , commentBlock
+    , verbatim
+    , rawBlock
+    , taggedBlock
+    , list
+    , table
+    , para
+    ]
+
+title :: T2T Blocks
+title = try $ balancedTitle '+' <|> balancedTitle '='
+
+balancedTitle :: Char -> T2T Blocks
+balancedTitle c = try $ do
+  spaces
+  level <- length <$> many1 (char c)
+  guard (level <= 5) -- Max header level 5
+  heading <- manyTill (noneOf "\n\r") (count level (char c))
+  label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-"))
+  many spaceChar *> newline
+  let attr = maybe nullAttr (\x -> (x, [], [])) label
+  return $ B.headerWith attr level (trimInlines $ B.text heading)
+
+para :: T2T Blocks
+para = try $ do
+  ils <- parseInlines
+  nl <- option False (True <$ newline)
+  option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils))
+  where
+    listStart = try bulletListStart <|> orderedListStart
+
+commentBlock :: T2T Blocks
+commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment
+
+-- Seperator and Strong line treated the same
+hrule :: T2T Blocks
+hrule = try $ do
+  spaces
+  line <- many1 (oneOf "=-_")
+  guard (length line >= 20)
+  B.horizontalRule <$ blankline
+
+quote :: T2T Blocks
+quote = try $ do
+  lookAhead tab
+  rawQuote <-  many1 (tab *> optional spaces *> anyLine)
+  contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
+  return $ B.blockQuote contents
+
+commentLine :: T2T Inlines
+commentLine = comment
+
+-- List Parsing code from Org Reader
+
+list :: T2T Blocks
+list = choice [bulletList, orderedList, definitionList]
+
+bulletList :: T2T Blocks
+bulletList = B.bulletList . compactify'
+             <$> many1 (listItem bulletListStart parseBlocks)
+
+orderedList :: T2T Blocks
+orderedList = B.orderedList . compactify'
+              <$> many1 (listItem orderedListStart parseBlocks)
+
+definitionList :: T2T Blocks
+definitionList = try $ do
+  B.definitionList . compactify'DL <$>
+    many1 (listItem definitionListStart definitionListEnd)
+
+definitionListEnd :: T2T (Inlines, [Blocks])
+definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks)
+
+genericListStart :: T2T Char
+                 -> T2T Int
+genericListStart listMarker = try $
+  (2+) <$> (length <$> many spaceChar
+            <* listMarker <* space <* notFollowedBy space)
+
+-- parses bullet list \start and returns its length (excl. following whitespace)
+bulletListStart :: T2T  Int
+bulletListStart = genericListStart (char '-')
+
+orderedListStart :: T2T Int
+orderedListStart = genericListStart (char '+' )
+
+definitionListStart :: T2T Int
+definitionListStart = genericListStart (char ':')
+
+-- parse raw text for one list item, excluding start marker and continuations
+listItem :: T2T Int
+         -> T2T a
+         -> T2T a
+listItem start end = try $ do
+  markerLength <- try start
+  firstLine <- anyLineNewline
+  blank <- option "" ("\n" <$ blankline)
+  rest <- concat <$> many (listContinuation markerLength)
+  parseFromString end $ firstLine ++ blank ++ rest
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+                 -> T2T String
+listContinuation markerLength = try $
+  notFollowedBy' (blankline >> blankline)
+  *> (mappend <$> (concat <$> many1 listLine)
+              <*> many blankline)
+ where listLine = try $ indentWith markerLength *> anyLineNewline
+
+anyLineNewline :: T2T String
+anyLineNewline = (++ "\n") <$> anyLine
+
+indentWith :: Int -> T2T String
+indentWith n = count n space
+
+-- Table
+
+table :: T2T Blocks
+table = try $ do
+  header <- fmap snd <$> option mempty (try headerRow)
+  rows <- many1 (many commentLine *> tableRow)
+  let columns = transpose rows
+  let ncolumns = length columns
+  let aligns = map (foldr1 findAlign) (map (map fst) columns)
+  let rows' = map (map snd) rows
+  let size = maximum (map length rows')
+  let rowsPadded = map (pad size) rows'
+  let headerPadded = if (not (null header)) then pad size header else mempty
+  return $ B.table mempty
+                    (zip aligns (replicate ncolumns 0.0))
+                      headerPadded rowsPadded
+
+pad :: (Show a, Monoid a) => Int -> [a] -> [a]
+pad n xs = xs ++ (replicate (n - length xs) mempty)
+
+
+findAlign :: Alignment -> Alignment -> Alignment
+findAlign x y
+  | x == y = x
+  | otherwise = AlignDefault
+
+headerRow :: T2T [(Alignment, Blocks)]
+headerRow = genericRow (string "||")
+
+tableRow :: T2T [(Alignment, Blocks)]
+tableRow = genericRow (char '|')
+
+genericRow :: T2T a -> T2T [(Alignment, Blocks)]
+genericRow start = try $ do
+  spaces *> start
+  manyTill tableCell newline <?> "genericRow"
+
+
+tableCell :: T2T (Alignment, Blocks)
+tableCell = try $ do
+  leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead
+  content <- (manyTill inline (try $ lookAhead (cellEnd)))
+  rightSpaces <- length <$> many space
+  let align =
+        case compare leftSpaces rightSpaces of
+              LT -> AlignLeft
+              EQ -> AlignCenter
+              GT -> AlignRight
+  endOfCell
+  return $ (align, B.plain (B.trimInlines $ mconcat content))
+  where
+    cellEnd = (void newline <|> (many1 space *> endOfCell))
+
+endOfCell :: T2T ()
+endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline)
+
+-- Raw area
+
+verbatim :: T2T Blocks
+verbatim = genericBlock anyLineNewline B.codeBlock "```"
+
+rawBlock :: T2T Blocks
+rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\""
+
+taggedBlock :: T2T Blocks
+taggedBlock = do
+  target <- getTarget
+  genericBlock anyLineNewline (B.rawBlock target) "'''"
+
+-- Generic
+
+genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
+
+blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupArea p f s = try $ (do
+  string s *> blankline
+  f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline))))
+
+blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupLine p f s = try (f <$> (string s *> space *> p))
+
+-- Can be in either block or inline position
+comment :: Monoid a => T2T a
+comment = try $ do
+  atStart
+  notFollowedBy macro
+  mempty <$ (char '%' *> anyLine)
+
+-- Inline
+
+parseInlines :: T2T Inlines
+parseInlines = trimInlines . mconcat <$> many1 inline
+
+inline :: T2T Inlines
+inline = do
+  choice
+    [ endline
+    , macro
+    , commentLine
+    , whitespace
+    , url
+    , link
+    , image
+    , bold
+    , underline
+    , code
+    , raw
+    , tagged
+    , strike
+    , italic
+    , code
+    , str
+    , symbol
+    ]
+
+bold :: T2T Inlines
+bold = inlineMarkup inline B.strong '*' (B.str)
+
+underline :: T2T Inlines
+underline = inlineMarkup inline B.emph '_' (B.str)
+
+strike :: T2T Inlines
+strike = inlineMarkup inline B.strikeout '-' (B.str)
+
+italic :: T2T Inlines
+italic = inlineMarkup inline B.emph '/' (B.str)
+
+code :: T2T Inlines
+code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
+
+raw :: T2T Inlines
+raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id
+
+tagged :: T2T Inlines
+tagged = do
+  target <- getTarget
+  inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id
+
+-- Parser for markup indicated by a double character.
+-- Inline markup is greedy and glued
+-- Greedy meaning ***a*** = Bold [Str "*a*"]
+-- Glued meaning that markup must be tight to content
+-- Markup can't pass newlines
+inlineMarkup :: Monoid a
+             => (T2T a) -- Content parser
+             -> (a -> Inlines) -- Constructor
+             -> Char -- Fence
+             -> (String -> a) -- Special Case to handle ******
+             -> T2T Inlines
+inlineMarkup p f c special = try $ do
+  start <- many1 (char c)
+  let l = length start
+  guard (l >= 2)
+  when (l == 2) (void $ notFollowedBy space)
+  -- We must make sure that there is no space before the start of the
+  -- closing tags
+  body <-  optionMaybe (try $ manyTill (noneOf "\n\r") $
+                (try $ lookAhead (noneOf " " >> string [c,c] )))
+  case body of
+    Just middle -> do
+      lastChar <- anyChar
+      end <- many1 (char c)
+      let parser inp = parseFromString (mconcat <$> many p) inp
+      let start' = special (drop 2 start)
+      body' <- parser (middle ++ [lastChar])
+      let end' = special (drop 2 end)
+      return $ f (start' <> body' <> end')
+    Nothing -> do -- Either bad or case such as *****
+      guard (l >= 5)
+      let body' = (replicate (l - 4) c)
+      return $ f (special body')
+
+link :: T2T Inlines
+link = try imageLink <|> titleLink
+
+-- Link with title
+titleLink :: T2T Inlines
+titleLink = try $ do
+  char '['
+  notFollowedBy space
+  tokens <- sepBy1 (many $ noneOf " ]") space
+  guard (length tokens >= 2)
+  char ']'
+  let link' = last tokens
+  guard (length link' > 0)
+  let tit = concat (intersperse " " (init tokens))
+  return $ B.link link' "" (B.text tit)
+
+-- Link with image
+imageLink :: T2T Inlines
+imageLink = try $ do
+  char '['
+  body <- image
+  many1 space
+  l <- manyTill (noneOf "\n\r ") (char ']')
+  return (B.link l "" body)
+
+macro :: T2T Inlines
+macro = try $ do
+  name <- string "%%" *> oneOfStringsCI (map fst commands)
+  optional (try $ enclosed (char '(') (char ')') anyChar)
+  lookAhead (spaceChar <|> oneOf specialChars <|> newline)
+  maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands)
+  where
+    commands = [ ("date", date), ("mtime", mtime)
+               , ("infile", infile), ("outfile", outfile)]
+
+-- raw URLs in text are automatically linked
+url :: T2T Inlines
+url = try $ do
+  (rawUrl, escapedUrl) <- (try uri <|> emailAddress)
+  return $ B.link rawUrl "" (B.str escapedUrl)
+
+uri :: T2T (String, String)
+uri = try $ do
+  address <- t2tURI
+  return (address, escapeURI address)
+
+-- The definition of a URI in the T2T source differs from the
+-- actual definition. This is a transcription of the definition in
+-- the source of v2.6
+--isT2TURI :: String -> Bool
+--isT2TURI (parse t2tURI "" -> Right _) = True
+--isT2TURI _ = False
+
+t2tURI :: T2T String
+t2tURI = do
+  start <- try ((++) <$> proto <*> urlLogin) <|> guess
+  domain <- many1 chars
+  sep <- many (char '/')
+  form' <- option mempty ((:) <$> char '?' <*> many1 form)
+  anchor' <- option mempty ((:) <$> char '#' <*> many anchor)
+  return (start ++ domain ++ sep ++ form' ++ anchor')
+  where
+    protos = ["http", "https", "ftp", "telnet", "gopher", "wais"]
+    proto = (++) <$> oneOfStrings protos <*> string "://"
+    guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23"))
+              <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.')
+    login = alphaNum <|> oneOf "_.-"
+    pass = many (noneOf " @")
+    chars = alphaNum <|> oneOf "%._/~:,=$@&+-"
+    anchor = alphaNum <|> oneOf "%._0"
+    form = chars <|> oneOf ";*"
+    urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@')
+
+
+image :: T2T Inlines
+image =  try $ do
+  -- List taken from txt2tags source
+  let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
+  char '['
+  path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions))
+  ext <- oneOfStrings extensions
+  char ']'
+  return $ B.image (path ++ ext) "" mempty
+
+-- Characters used in markup
+specialChars :: String
+specialChars = "%*-_/|:+"
+
+tab :: T2T Char
+tab = char '\t'
+
+space :: T2T Char
+space = char ' '
+
+spaces :: T2T String
+spaces = many space
+
+endline :: T2T Inlines
+endline = try $ do
+  newline
+  notFollowedBy blankline
+  notFollowedBy hrule
+  notFollowedBy title
+  notFollowedBy verbatim
+  notFollowedBy rawBlock
+  notFollowedBy taggedBlock
+  notFollowedBy quote
+  notFollowedBy list
+  notFollowedBy table
+  return $ B.space
+
+str :: T2T Inlines
+str = try $ do
+  B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+
+whitespace :: T2T Inlines
+whitespace = try $ B.space <$ spaceChar
+
+symbol :: T2T Inlines
+symbol = B.str . (:[]) <$> oneOf specialChars
+
+-- Utility
+
+getTarget :: T2T String
+getTarget = do
+  mv <- lookupMeta "target" . stateMeta <$> getState
+  let MetaString target = fromMaybe (MetaString "html") mv
+  return target
+
+atStart :: T2T ()
+atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+
+ignoreSpacesCap :: T2T String -> T2T String
+ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
+