8 files changed, 256 insertions, 176 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index eb71d8dd8..9c7c3b264 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -50,8 +50,7 @@ implemented, [-] means partially implemented):
 * Inlines
 
   - [X] Str
-  - [X] Emph (From italics. `underline` currently read as span. In
-        future, it might optionally be emph as well)
+  - [X] Emph (italics and underline both read as Emph)
   - [X] Strong
   - [X] Strikeout
   - [X] Superscript
@@ -62,16 +61,16 @@ implemented, [-] means partially implemented):
   - [X] Code (styled with `VerbatimChar`)
   - [X] Space
   - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
-  - [ ] Math
+  - [X] Math
   - [X] Link (links to an arbitrary bookmark create a span with the target as
         id and "anchor" class)
-  - [-] Image (Links to path in archive. Future option for
-        data-encoded URI likely.)
+  - [X] Image 
   - [X] Note (Footnotes and Endnotes are silently combined.)
 -}
 
 module Text.Pandoc.Readers.Docx
-       ( readDocx
+       ( readDocxWithWarnings
+       , readDocx
        ) where
 
 import Codec.Archive.Zip
@@ -98,14 +97,23 @@ import qualified Data.Sequence as Seq (null)
 import Text.Pandoc.Error
 import Text.Pandoc.Compat.Except
 
+readDocxWithWarnings :: ReaderOptions
+                     -> B.ByteString
+                     -> Either PandocError (Pandoc, MediaBag, [String])
+readDocxWithWarnings opts bytes
+  | Right archive <- toArchiveOrFail bytes
+  , Right (docx, warnings) <- archiveToDocxWithWarnings archive = do
+      (meta, blks, mediaBag) <- docxToOutput opts docx
+      return (Pandoc meta blks, mediaBag, warnings)
+readDocxWithWarnings _ _ =
+  Left (ParseFailure "couldn't parse docx file")
+
 readDocx :: ReaderOptions
          -> B.ByteString
          -> Either PandocError (Pandoc, MediaBag)
-readDocx opts bytes =
-  case archiveToDocx (toArchive bytes) of
-    Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag))
-                    <$> (docxToOutput opts docx)
-    Left _   -> Left (ParseFailure "couldn't parse docx file")
+readDocx opts bytes = do
+  (pandoc, mediaBag, _) <- readDocxWithWarnings opts bytes
+  return (pandoc, mediaBag)
 
 data DState = DState { docxAnchorMap :: M.Map String String
                      , docxMediaBag      :: MediaBag
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index eec8b12c9..7265ef8dd 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , Row(..)
                                       , Cell(..)
                                       , archiveToDocx
+                                      , archiveToDocxWithWarnings
                                       ) where
 import Codec.Archive.Zip
 import Text.XML.Light
@@ -60,6 +61,7 @@ import Data.Bits ((.|.))
 import qualified Data.ByteString.Lazy as B
 import qualified Text.Pandoc.UTF8 as UTF8
 import Control.Monad.Reader
+import Control.Monad.State
 import Control.Applicative ((<|>))
 import qualified Data.Map as M
 import Text.Pandoc.Compat.Except
@@ -81,16 +83,20 @@ data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            }
                deriving Show
 
+data ReaderState = ReaderState { stateWarnings :: [String] }
+                 deriving Show
+                                                  
+
 data DocxError = DocxError | WrongElem
                deriving Show
 
 instance Error DocxError where
   noMsg = WrongElem
 
-type D = ExceptT DocxError (Reader ReaderEnv)
+type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
 
-runD :: D a -> ReaderEnv -> Either DocxError a
-runD dx re = runReader (runExceptT dx) re
+runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState)
+runD dx re rs = runState (runReaderT (runExceptT dx) re) rs
 
 maybeToD :: Maybe a -> D a
 maybeToD (Just a) = return a
@@ -257,7 +263,10 @@ type Author = String
 type ChangeDate = String
 
 archiveToDocx :: Archive -> Either DocxError Docx
-archiveToDocx archive = do
+archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
+
+archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
+archiveToDocxWithWarnings archive = do
   let notes     = archiveToNotes archive
       numbering = archiveToNumbering archive
       rels      = archiveToRelationships archive
@@ -265,8 +274,12 @@ archiveToDocx archive = do
       (styles, parstyles) = archiveToStyles archive
       rEnv =
         ReaderEnv notes numbering rels media Nothing styles parstyles InDocument
-  doc <- runD (archiveToDocument archive) rEnv
-  return $ Docx doc
+      rState = ReaderState { stateWarnings = [] }
+      (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
+  case eitherDoc of
+    Right doc -> Right (Docx doc, stateWarnings st)
+    Left e    -> Left e
+
 
 
 archiveToDocument :: Archive -> D Document
@@ -576,12 +589,14 @@ elemToBodyPart ns element
       sty <- asks envParStyles
       let parstyle = elemToParagraphStyle ns element sty
       parparts <- mapD (elemToParPart ns) (elChildren element)
-      case pNumInfo parstyle of
-       Just (numId, lvl) -> do
-         num <- asks envNumbering
-         let levelInfo = lookupLevel numId lvl num
-         return $ ListItem parstyle numId lvl levelInfo parparts
-       Nothing -> return $ Paragraph parstyle parparts
+      -- Word uses list enumeration for numbered headings, so we only
+      -- want to infer a list from the styles if it is NOT a heading.
+      case pHeading parstyle of
+        Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
+                    num <- asks envNumbering
+                    let levelInfo = lookupLevel numId lvl num
+                    return $ ListItem parstyle numId lvl levelInfo parparts
+        _ -> return $ Paragraph parstyle parparts
 elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
     let caption' = findChild (elemName ns "w" "tblPr") element
@@ -646,14 +661,14 @@ elemToParPart ns element
   | isElem ns "w" "r" element =
     elemToRun ns element >>= (\r -> return $ PlainRun r)
 elemToParPart ns element
-  | isElem ns "w" "ins" element
+  | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
   , Just cId <- findAttr (elemName ns "w" "id") element
   , Just cAuthor <- findAttr (elemName ns "w" "author") element
   , Just cDate <- findAttr (elemName ns "w" "date") element = do
     runs <- mapD (elemToRun ns) (elChildren element)
     return $ Insertion cId cAuthor cDate runs
 elemToParPart ns element
-  | isElem ns "w" "del" element
+  | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
   , Just cId <- findAttr (elemName ns "w" "id") element
   , Just cAuthor <- findAttr (elemName ns "w" "author") element
   , Just cDate <- findAttr (elemName ns "w" "date") element = do
@@ -702,36 +717,58 @@ elemToExtent drawingElem =
       getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
                     >>= findAttr (QName at Nothing Nothing) >>= safeRead
 
-elemToRun :: NameSpaces -> Element -> D Run
-elemToRun ns element
-  | isElem ns "w" "r" element
-  , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
+
+childElemToRun :: NameSpaces -> Element -> D Run
+childElemToRun ns element
+  | isElem ns "w" "drawing" element =
     let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
-        drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem
+        drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
                   >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
     in
      case drawing of
        Just s -> expandDrawingId s >>=
-                 (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent drawingElem)
+                 (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element)
        Nothing -> throwError WrongElem
-elemToRun ns element
-  | isElem ns "w" "r" element
-  , Just ref <- findChild (elemName ns "w" "footnoteReference") element
-  , Just fnId <- findAttr (elemName ns "w" "id") ref = do
+childElemToRun ns element
+  | isElem ns "w" "footnoteReference" element
+  , Just fnId <- findAttr (elemName ns "w" "id") element = do
     notes <- asks envNotes
     case lookupFootnote fnId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
                    return $ Footnote bps
       Nothing  -> return $ Footnote []
-elemToRun ns element
-  | isElem ns "w" "r" element
-  , Just ref <- findChild (elemName ns "w" "endnoteReference") element
-  , Just enId <- findAttr (elemName ns "w" "id") ref = do
+childElemToRun ns element
+  | isElem ns "w" "endnoteReference" element
+  , Just enId <- findAttr (elemName ns "w" "id") element = do
     notes <- asks envNotes
     case lookupEndnote enId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
                    return $ Endnote bps
       Nothing  -> return $ Endnote []
+childElemToRun _ _ = throwError WrongElem
+
+elemToRun :: NameSpaces -> Element -> D Run
+elemToRun ns element
+  | isElem ns "w" "r" element
+  , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element =
+    do let choices = findChildren (elemName ns "mc" "Choice") altCont
+           choiceChildren = map head $ filter (not . null) $ map elChildren choices
+       outputs <- mapD (childElemToRun ns) choiceChildren
+       case outputs of
+         r : _ -> return r
+         []    -> throwError WrongElem
+elemToRun ns element
+  | isElem ns "w" "r" element
+  , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
+    childElemToRun ns drawingElem
+elemToRun ns element
+  | isElem ns "w" "r" element
+  , Just ref <- findChild (elemName ns "w" "footnoteReference") element =
+    childElemToRun ns ref
+elemToRun ns element
+  | isElem ns "w" "r" element
+  , Just ref <- findChild (elemName ns "w" "endnoteReference") element =
+    childElemToRun ns ref
 elemToRun ns element
   | isElem ns "w" "r" element = do
     runElems <- elemToRunElems ns element
@@ -940,3 +977,4 @@ elemToRunElems _ _ = throwError WrongElem
 
 setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
 setFont f s = s{envFont = f}
+
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 07d282708..144ba9ca2 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -19,7 +19,7 @@ import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
 import Text.Pandoc.Compat.Monoid ((<>))
 import Text.Pandoc.MIME (MimeType)
 import qualified Text.Pandoc.Builder as B
-import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry
+import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry
                          , findEntryByPath, Entry)
 import qualified Data.ByteString.Lazy as BL (ByteString)
 import System.FilePath ( takeFileName, (</>), dropFileName, normalise
@@ -39,7 +39,9 @@ import Text.Pandoc.Error
 type Items = M.Map String (FilePath, MimeType)
 
 readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)
-readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes)
+readEPUB opts bytes = case toArchiveOrFail bytes of
+  Right archive -> runEPUB $ archiveToEPUB opts $ archive
+  Left  _       -> Left $ ParseFailure "Couldn't extract ePub file"
 
 runEPUB :: Except PandocError a -> Either PandocError a
 runEPUB = runExcept
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 69df13aac..8ee5da543 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -63,7 +63,7 @@ import Debug.Trace (trace)
 import Text.TeXMath (readMathML, writeTeX)
 import Data.Default (Default (..), def)
 import Control.Monad.Reader (Reader,ask, asks, local, runReader)
-import Network.URI (isURI)
+import Network.URI (URI, parseURIReference, nonStrictRelativeTo)
 import Text.Pandoc.Error
 import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
 import Text.Pandoc.Compat.Monoid ((<>))
@@ -103,7 +103,7 @@ data HTMLState =
   HTMLState
   {  parserState :: ParserState,
      noteTable   :: [(String, Blocks)],
-     baseHref    :: Maybe String,
+     baseHref    :: Maybe URI,
      identifiers :: Set.Set String,
      headerMap   :: M.Map Inlines String
   }
@@ -145,15 +145,9 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag
                return mempty
         pBaseTag = do
           bt <- pSatisfy (~== TagOpen "base" [])
-          let baseH = fromAttrib "href" bt
-          if null baseH
-             then return mempty
-             else do
-               let baseH' = case reverse baseH of
-                                  '/':_ -> baseH
-                                  _     -> baseH ++ "/"
-               updateState $ \st -> st{ baseHref = Just baseH' }
-               return mempty
+          updateState $ \st -> st{ baseHref =
+               parseURIReference $ fromAttrib "href" bt }
+          return mempty
 
 block :: TagParser Blocks
 block = do
@@ -610,9 +604,9 @@ pLink = try $ do
   tag <- pSatisfy $ tagOpenLit "a" (const True)
   mbBaseHref <- baseHref <$> getState
   let url' = fromAttrib "href" tag
-  let url = case (isURI url', mbBaseHref) of
-                 (False, Just h) -> h ++ url'
-                 _               -> url'
+  let url = case (parseURIReference url', mbBaseHref) of
+                 (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+                 _                   -> url'
   let title = fromAttrib "title" tag
   let uid = fromAttrib "id" tag
   let cls = words $ fromAttrib "class" tag
@@ -624,9 +618,9 @@ pImage = do
   tag <- pSelfClosing (=="img") (isJust . lookup "src")
   mbBaseHref <- baseHref <$> getState
   let url' = fromAttrib "src" tag
-  let url = case (isURI url', mbBaseHref) of
-                 (False, Just h) -> h ++ url'
-                 _               -> url'
+  let url = case (parseURIReference url', mbBaseHref) of
+                 (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+                 _                   -> url'
   let title = fromAttrib "title" tag
   let alt = fromAttrib "alt" tag
   let uid = fromAttrib "id" tag
@@ -945,7 +939,7 @@ htmlInBalanced f = try $ do
                         (TagClose _ : TagPosition er ec : _) -> do
                           let ls = er - sr
                           let cs = ec - sc
-                          lscontents <- concat <$> count ls anyLine
+                          lscontents <- unlines <$> count ls anyLine
                           cscontents <- count cs anyChar
                           (_,closetag) <- htmlTag (~== TagClose tn)
                           return (lscontents ++ cscontents ++ closetag)
@@ -977,11 +971,20 @@ htmlTag :: Monad m
 htmlTag f = try $ do
   lookAhead (char '<')
   inp <- getInput
-  let (next : rest) = canonicalizeTags $ parseTagsOptions
-                       parseOptions{ optTagWarning = True } inp
+  let (next : _) = canonicalizeTags $ parseTagsOptions
+                       parseOptions{ optTagWarning = False } inp
   guard $ f next
+  let handleTag tagname = do
+       -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
+       -- should NOT be parsed as an HTML tag, see #2277
+       guard $ not ('.' `elem` tagname)
+       -- <https://example.org> should NOT be a tag either.
+       -- tagsoup will parse it as TagOpen "https:" [("example.org","")]
+       guard $ not (null tagname)
+       guard $ last tagname /= ':'
+       rendered <- manyTill anyChar (char '>')
+       return (next, rendered ++ ">")
   case next of
-       TagWarning _ -> fail "encountered TagWarning"
        TagComment s
          | "<!--" `isPrefixOf` inp -> do
           count (length s + 4) anyChar
@@ -989,13 +992,9 @@ htmlTag f = try $ do
           char '>'
           return (next, "<!--" ++ s ++ "-->")
          | otherwise -> fail "bogus comment mode, HTML5 parse error"
-       _            -> do
-          -- we get a TagWarning on things like
-          -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
-          -- which should NOT be parsed as an HTML tag, see #2277
-          guard $ not $ hasTagWarning rest
-          rendered <- manyTill anyChar (char '>')
-          return (next, rendered ++ ">")
+       TagOpen tagname _attr -> handleTag tagname
+       TagClose tagname -> handleTag tagname
+       _ -> mzero
 
 mkAttr :: [(String, String)] -> Attr
 mkAttr attr = (attribsId, attribsClasses, attribsKV)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 587726084..e43714526 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -122,9 +122,6 @@ inList = do
   ctx <- stateParserContext <$> getState
   guard (ctx == ListItemState)
 
-isNull :: F Inlines -> Bool
-isNull ils = B.isNull $ runF ils def
-
 spnl :: Parser [Char] st ()
 spnl = try $ do
   skipSpaces
@@ -188,31 +185,38 @@ charsInBalancedBrackets openBrackets =
 -- document structure
 --
 
-titleLine :: MarkdownParser (F Inlines)
-titleLine = try $ do
+rawTitleBlockLine :: MarkdownParser String
+rawTitleBlockLine = do
   char '%'
   skipSpaces
-  res <- many $ (notFollowedBy newline >> inline)
-             <|> try (endline >> whitespace)
-  newline
+  first <- anyLine
+  rest <- many $ try $ do spaceChar
+                          notFollowedBy blankline
+                          skipSpaces
+                          anyLine
+  return $ trim $ unlines (first:rest)
+
+titleLine :: MarkdownParser (F Inlines)
+titleLine = try $ do
+  raw <- rawTitleBlockLine
+  res <- parseFromString (many inline) raw
   return $ trimInlinesF $ mconcat res
 
 authorsLine :: MarkdownParser (F [Inlines])
 authorsLine = try $ do
-  char '%'
-  skipSpaces
-  authors <- sepEndBy (many (notFollowedBy (satisfy $ \c ->
-                                c == ';' || c == '\n') >> inline))
-                       (char ';' <|>
-                        try (newline >> notFollowedBy blankline >> spaceChar))
-  newline
-  return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors
+  raw <- rawTitleBlockLine
+  let sep = (char ';' <* spaces) <|> newline
+  let pAuthors = sepEndBy
+            (trimInlinesF . mconcat <$> many
+                 (try $ notFollowedBy sep >> inline))
+            sep
+  sequence <$> parseFromString pAuthors raw
 
 dateLine :: MarkdownParser (F Inlines)
 dateLine = try $ do
-  char '%'
-  skipSpaces
-  trimInlinesF . mconcat <$> manyTill inline newline
+  raw <- rawTitleBlockLine
+  res <- parseFromString (many inline) raw
+  return $ trimInlinesF $ mconcat res
 
 titleBlock :: MarkdownParser ()
 titleBlock = pandocTitleBlock <|> mmdTitleBlock
@@ -1354,16 +1358,18 @@ pipeTable = try $ do
   nonindentSpaces
   lookAhead nonspaceChar
   (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
+  let heads' = take (length aligns) <$> heads
   lines' <- many pipeTableRow
+  let lines'' = map (take (length aligns) <$>) lines'
   let maxlength = maximum $
-       map (\x -> length . stringify $ runF x def) (heads : lines')
+       map (\x -> length . stringify $ runF x def) (heads' : lines'')
   numColumns <- getOption readerColumns
   let widths = if maxlength > numColumns
                   then map (\len ->
                            fromIntegral (len + 1) / fromIntegral numColumns)
                              seplengths
                   else replicate (length aligns) 0.0
-  return $ (aligns, widths, heads, sequence lines')
+  return $ (aligns, widths, heads', sequence lines'')
 
 sepPipe :: MarkdownParser ()
 sepPipe = try $ do
@@ -1372,25 +1378,27 @@ sepPipe = try $ do
 
 -- parse a row, also returning probable alignments for org-table cells
 pipeTableRow :: MarkdownParser (F [Blocks])
-pipeTableRow = do
+pipeTableRow = try $ do
+  scanForPipe
   skipMany spaceChar
   openPipe <- (True <$ char '|') <|> return False
-  let cell = mconcat <$>
-                 many (notFollowedBy (blankline <|> char '|') >> inline)
-  first <- cell
-  rest <- many $ sepPipe *> cell
+  -- split into cells
+  let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
+       <|> void (noneOf "|\n\r")
+  let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
+        parseFromString pipeTableCell
+  cells <- cellContents `sepEndBy1` (char '|')
   -- surrounding pipes needed for a one-column table:
-  guard $ not (null rest && not openPipe)
-  optional (char '|')
+  guard $ not (length cells == 1 && not openPipe)
   blankline
-  let cells  = sequence (first:rest)
-  return $ do
-    cells' <- cells
-    return $ map
-        (\ils ->
-           case trimInlines ils of
-                 ils' | B.isNull ils' -> mempty
-                      | otherwise   -> B.plain $ ils') cells'
+  return $ sequence cells
+
+pipeTableCell :: MarkdownParser (F Blocks)
+pipeTableCell = do
+  result <- many inline
+  if null result
+     then return mempty
+     else return $ B.plain . mconcat <$> sequence result
 
 pipeTableHeaderPart :: Parser [Char] st (Alignment, Int)
 pipeTableHeaderPart = try $ do
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 950497992..d3cee08e2 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -225,7 +225,7 @@ table = do
                          Nothing -> 1.0
   caption <- option mempty tableCaption
   optional rowsep
-  hasheader <- option False $ True <$ (lookAhead (char '!'))
+  hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!'))
   (cellspecs',hdr) <- unzip <$> tableRow
   let widths = map ((tableWidth *) . snd) cellspecs'
   let restwidth = tableWidth - sum widths
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index a925c1d84..68e89263c 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -59,7 +59,9 @@ readOdt _ bytes = case bytesToOdt bytes of
 
 --
 bytesToOdt :: B.ByteString -> Either PandocError Pandoc
-bytesToOdt bytes = archiveToOdt $ toArchive bytes
+bytesToOdt bytes = case toArchiveOrFail bytes of
+  Right archive -> archiveToOdt archive
+  Left _        -> Left $ ParseFailure "Couldn't parse odt file."
 
 --
 archiveToOdt :: Archive -> Either PandocError Pandoc
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7dd611be3..5a50a8f34 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -35,6 +35,7 @@ import           Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
                                        trimInlines )
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Compat.Monoid ((<>))
+import           Text.Pandoc.Error
 import           Text.Pandoc.Options
 import qualified Text.Pandoc.Parsing as P
 import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
@@ -49,7 +50,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 import           Control.Arrow (first)
 import           Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks, local)
-import           Data.Char (isAlphaNum, toLower)
+import           Data.Char (isAlphaNum, isSpace, toLower)
 import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
@@ -57,8 +58,6 @@ import qualified Data.Set as Set
 import           Data.Maybe (fromMaybe, isJust)
 import           Network.HTTP (urlEncode)
 
-import           Text.Pandoc.Error
-
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
@@ -391,6 +390,9 @@ lookupBlockAttribute key =
 
 type BlockProperties = (Int, String)  -- (Indentation, Block-Type)
 
+updateIndent :: BlockProperties -> Int -> BlockProperties
+updateIndent (_, blkType) indent = (indent, blkType)
+
 orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
   blockProp@(_, blkType) <- blockHeaderStart
@@ -407,11 +409,23 @@ orgBlock = try $ do
       _         -> withParsed (fmap $ divWithClass blkType)
 
 blockHeaderStart :: OrgParser (Int, String)
-blockHeaderStart = try $ (,) <$> indent <*> blockType
+blockHeaderStart = try $ (,) <$> indentation <*> blockType
  where
-  indent    = length      <$> many spaceChar
   blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
 
+indentation :: OrgParser Int
+indentation = try $ do
+  tabStop  <- getOption readerTabStop
+  s        <- many spaceChar
+  return $ spaceLength tabStop s
+
+spaceLength :: Int -> String -> Int
+spaceLength tabStop s = (sum . map charLen) s
+ where
+  charLen ' '  = 1
+  charLen '\t' = tabStop
+  charLen _    = 0
+
 withRaw'   :: (String   -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
 withRaw'   f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
 
@@ -450,7 +464,8 @@ codeBlock blkProp = do
   skipSpaces
   (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
   id'               <- fromMaybe "" <$> lookupBlockAttribute "name"
-  content           <- rawBlockContent blkProp
+  leadingIndent     <- lookAhead indentation
+  content           <- rawBlockContent (updateIndent blkProp leadingIndent)
   resultsContent    <- followingResultsBlock
   let includeCode    = exportsCode kv
   let includeResults = exportsResults kv
@@ -472,7 +487,7 @@ rawBlockContent (indent, blockType) = try $
   unlines . map commaEscaped <$> manyTill indentedLine blockEnder
  where
    indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
-   blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+   blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType)
 
 parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
 parsedBlockContent blkProps = try $ do
@@ -758,9 +773,13 @@ data OrgTableRow = OrgContentRow (F [Blocks])
                  | OrgAlignRow [Alignment]
                  | OrgHlineRow
 
+-- OrgTable is strongly related to the pandoc table ADT.  Using the same
+-- (i.e. pandoc-global) ADT would mean that the reader would break if the
+-- global structure was to be changed, which would be bad.  The final table
+-- should be generated using a builder function.  Column widths aren't
+-- implemented yet, so they are not tracked here.
 data OrgTable = OrgTable
-  { orgTableColumns    :: Int
-  , orgTableAlignments :: [Alignment]
+  { orgTableAlignments :: [Alignment]
   , orgTableHeader     :: [Blocks]
   , orgTableRows       :: [[Blocks]]
   }
@@ -776,7 +795,7 @@ table = try $ do
 orgToPandocTable :: OrgTable
                  -> Inlines
                  -> Blocks
-orgToPandocTable (OrgTable _ aligns heads lns) caption =
+orgToPandocTable (OrgTable aligns heads lns) caption =
   B.table caption (zip aligns $ repeat 0) heads lns
 
 tableStart :: OrgParser Char
@@ -787,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
 
 tableContentRow :: OrgParser OrgTableRow
 tableContentRow = try $
-  OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
+  OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
 
 tableContentCell :: OrgParser (F Blocks)
 tableContentCell = try $
-  fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
-
-endOfCell :: OrgParser Char
-endOfCell = try $ char '|' <|> lookAhead newline
+  fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
 
 tableAlignRow :: OrgParser OrgTableRow
-tableAlignRow = try $
-  OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
+tableAlignRow = try $ do
+  tableStart
+  cells <- many1Till tableAlignCell newline
+  -- Empty rows are regular (i.e. content) rows, not alignment rows.
+  guard $ any (/= AlignDefault) cells
+  return $ OrgAlignRow cells
 
 tableAlignCell :: OrgParser Alignment
 tableAlignCell =
@@ -813,65 +833,61 @@ tableAlignCell =
     where emptyCell = try $ skipSpaces *> endOfCell
 
 tableAlignFromChar :: OrgParser Alignment
-tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
-                                  , char 'c' *> return AlignCenter
-                                  , char 'r' *> return AlignRight
-                                  ]
+tableAlignFromChar = try $
+  choice [ char 'l' *> return AlignLeft
+         , char 'c' *> return AlignCenter
+         , char 'r' *> return AlignRight
+         ]
 
 tableHline :: OrgParser OrgTableRow
 tableHline = try $
   OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
 
+endOfCell :: OrgParser Char
+endOfCell = try $ char '|' <|> lookAhead newline
+
 rowsToTable :: [OrgTableRow]
             -> F OrgTable
-rowsToTable = foldM (flip rowToContent) zeroTable
-  where zeroTable = OrgTable 0 mempty mempty mempty
-
-normalizeTable :: OrgTable
-               -> OrgTable
-normalizeTable (OrgTable cols aligns heads lns) =
-  let aligns' = fillColumns aligns AlignDefault
-      heads'  = if heads == mempty
-                then mempty
-                else fillColumns heads (B.plain mempty)
-      lns'    = map (`fillColumns` B.plain mempty) lns
-      fillColumns base padding = take cols $ base ++ repeat padding
-  in OrgTable cols aligns' heads' lns'
+rowsToTable = foldM rowToContent emptyTable
+ where emptyTable = OrgTable mempty mempty mempty
 
+normalizeTable :: OrgTable -> OrgTable
+normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows
+ where
+   refRow = if heads /= mempty
+            then heads
+            else if rows == mempty then mempty else head rows
+   cols = length refRow
+   fillColumns base padding = take cols $ base ++ repeat padding
+   aligns' = fillColumns aligns AlignDefault
 
 -- One or more horizontal rules after the first content line mark the previous
 -- line as a header.  All other horizontal lines are discarded.
-rowToContent :: OrgTableRow
-             -> OrgTable
-             -> F OrgTable
-rowToContent OrgHlineRow        t = maybeBodyToHeader t
-rowToContent (OrgAlignRow as)   t = setLongestRow as =<< setAligns as t
-rowToContent (OrgContentRow rf) t = do
-  rs <- rf
-  setLongestRow rs =<< appendToBody rs t
-
-setLongestRow :: [a]
-              -> OrgTable
-              -> F OrgTable
-setLongestRow rs t =
-  return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
-
-maybeBodyToHeader :: OrgTable
-                  -> F OrgTable
-maybeBodyToHeader t = case t of
-  OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
-         return t{ orgTableHeader = b , orgTableRows = [] }
-  _   -> return t
-
-appendToBody :: [Blocks]
-             -> OrgTable
+rowToContent :: OrgTable
+             -> OrgTableRow
              -> F OrgTable
-appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
+rowToContent orgTable row =
+  case row of
+    OrgHlineRow       -> return singleRowPromotedToHeader
+    OrgAlignRow as    -> return . setAligns $ as
+    OrgContentRow cs  -> appendToBody cs
+ where
+   singleRowPromotedToHeader :: OrgTable
+   singleRowPromotedToHeader = case orgTable of
+     OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+            orgTable{ orgTableHeader = b , orgTableRows = [] }
+     _   -> orgTable
 
-setAligns :: [Alignment]
-          -> OrgTable
-          -> F OrgTable
-setAligns aligns t = return $ t{ orgTableAlignments = aligns }
+   setAligns :: [Alignment] -> OrgTable
+   setAligns aligns = orgTable{ orgTableAlignments = aligns }
+
+   appendToBody :: F [Blocks] -> F OrgTable
+   appendToBody frow = do
+     newRow <- frow
+     let oldRows = orgTableRows orgTable
+     -- NOTE: This is an inefficient O(n) operation.  This should be changed
+     -- if performance ever becomes a problem.
+     return orgTable{ orgTableRows = oldRows ++ [newRow] }
 
 
 --
@@ -1565,14 +1581,14 @@ inlineLaTeX = try $ do
 
    parseAsMathMLSym :: String -> Maybe Inlines
    parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
-    -- dropWhileEnd would be nice here, but it's not available before base 4.5
-    where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1
+    -- drop initial backslash and any trailing "{}"
+    where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
 
    state :: ParserState
    state = def{ stateOptions = def{ readerParseRaw = True }}
 
-   texMathToPandoc inp = (maybeRight $ readTeX inp) >>=
-                         writePandoc DisplayInline
+   texMathToPandoc :: String -> Maybe [Inline]
+   texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
 
 maybeRight :: Either a b -> Maybe b
 maybeRight = either (const Nothing) Just
@@ -1582,11 +1598,18 @@ inlineLaTeXCommand = try $ do
   rest <- getInput
   case runParser rawLaTeXInline def "source" rest of
     Right (RawInline _ cs) -> do
-      let len = length cs
+      -- drop any trailing whitespace, those are not be part of the command as
+      -- far as org mode is concerned.
+      let cmdNoSpc = dropWhileEnd isSpace cs
+      let len = length cmdNoSpc
       count len anyChar
-      return cs
+      return cmdNoSpc
     _ -> mzero
 
+-- Taken from Data.OldList.
+dropWhileEnd :: (a -> Bool) -> [a] -> [a]
+dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
+
 smart :: OrgParser (F Inlines)
 smart = do
   getOption readerSmart >>= guard