7 files changed, 239 insertions, 139 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6a799e270..56cb16b20 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,5 +1,6 @@
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
-import Data.Char (toUpper, isDigit)
+import Data.Char (toUpper)
+import Text.Pandoc.Shared (safeRead)
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder
@@ -11,6 +12,7 @@ import Data.Char (isSpace)
 import Control.Monad.State
 import Control.Applicative ((<$>))
 import Data.List (intersperse)
+import Data.Maybe (fromMaybe)
 
 {-
 
@@ -682,10 +684,9 @@ parseBlock (Elem e) =
                                "lowerroman" -> LowerRoman
                                "upperroman" -> UpperRoman
                                _            -> Decimal
-          let start = case attrValue "override" <$>
-                            filterElement (named "listitem") e of
-                              Just x@(_:_) | all isDigit x -> read x
-                              _                            -> 1
+          let start = fromMaybe 1 $
+                      (attrValue "override" <$> filterElement (named "listitem") e)
+                       >>= safeRead
           orderedListWith (start,listStyle,DefaultDelim)
             <$> listitems
         "variablelist" -> definitionList <$> deflistitems
@@ -779,7 +780,7 @@ parseBlock (Elem e) =
                       caption <- case filterChild isCaption e of
                                        Just t  -> getInlines t
                                        Nothing -> return mempty
-                      let e' = maybe e id $ filterChild (named "tgroup") e
+                      let e' = fromMaybe e $ filterChild (named "tgroup") e
                       let isColspec x = named "colspec" x || named "col" x
                       let colspecs = case filterChild (named "colgroup") e' of
                                            Just c -> filterChildren isColspec c
@@ -801,11 +802,14 @@ parseBlock (Elem e) =
                                                 Just "center" -> AlignCenter
                                                 _             -> AlignDefault
                       let toWidth c = case findAttr (unqual "colwidth") c of
-                                                Just w -> read $ filter (\x ->
+                                                Just w -> fromMaybe 0
+                                                   $ safeRead $ '0': filter (\x ->
                                                      (x >= '0' && x <= '9')
                                                       || x == '.') w
                                                 Nothing -> 0 :: Double
-                      let numrows = maximum $ map length bodyrows
+                      let numrows = case bodyrows of
+                                         []   -> 0
+                                         xs   -> maximum $ map length xs
                       let aligns = case colspecs of
                                      []  -> replicate numrows AlignDefault
                                      cs  -> map toAlignment cs
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 7ca554fa3..506fe7770 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -76,9 +76,18 @@ pBody :: TagParser [Block]
 pBody = pInTags "body" block
 
 pHead :: TagParser [Block]
-pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag)
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag)
   where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
         setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
+        pMetaTag = do
+          mt <- pSatisfy (~== TagOpen "meta" [])
+          let name = fromAttrib "name" mt
+          if null name
+             then return []
+             else do
+               let content = fromAttrib "content" mt
+               updateState $ B.setMeta name (B.text content)
+               return []
 
 block :: TagParser [Block]
 block = choice
@@ -92,6 +101,7 @@ block = choice
             , pHead
             , pBody
             , pPlain
+            , pDiv
             , pRawHtmlBlock
             ]
 
@@ -177,6 +187,13 @@ pRawTag = do
      then return []
      else return $ renderTags' [tag]
 
+pDiv :: TagParser [Block]
+pDiv = try $ do
+  getOption readerParseRaw >>= guard
+  TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
+  contents <- pInTags "div" block
+  return [Div (mkAttr attr) contents]
+
 pRawHtmlBlock :: TagParser [Block]
 pRawHtmlBlock = do
   raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
@@ -199,7 +216,7 @@ pHeader = try $ do
   let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
   let level = read (drop 1 tagtype)
   contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof)
-  let ident = maybe "" id $ lookup "id" attr
+  let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   return $ if bodyTitle
@@ -249,7 +266,7 @@ pCol = try $ do
   skipMany pBlank
   return $ case lookup "width" attribs of
            Just x | not (null x) && last x == '%' ->
-             maybe 0.0 id $ safeRead ('0':'.':init x)
+             fromMaybe 0.0 $ safeRead ('0':'.':init x)
            _ -> 0.0
 
 pColgroup :: TagParser [Double]
@@ -295,11 +312,7 @@ pCodeBlock = try $ do
   let result = case reverse result' of
                     '\n':_   -> init result'
                     _        -> result'
-  let attribsId = fromMaybe "" $ lookup "id" attr
-  let attribsClasses = words $ fromMaybe "" $ lookup "class" attr
-  let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
-  let attribs = (attribsId, attribsClasses, attribsKV)
-  return [CodeBlock attribs result]
+  return [CodeBlock (mkAttr attr) result]
 
 inline :: TagParser [Inline]
 inline = choice
@@ -314,6 +327,7 @@ inline = choice
            , pLink
            , pImage
            , pCode
+           , pSpan
            , pRawHtmlInline
            ]
 
@@ -397,11 +411,14 @@ pCode :: TagParser [Inline]
 pCode = try $ do
   (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
   result <- manyTill pAnyTag (pCloses open)
-  let ident = fromMaybe "" $ lookup "id" attr
-  let classes = words $ fromMaybe [] $ lookup "class" attr
-  let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr
-  return [Code (ident,classes,rest)
-         $ intercalate " " $ lines $ innerText result]
+  return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result]
+
+pSpan :: TagParser [Inline]
+pSpan = try $ do
+  getOption readerParseRaw >>= guard
+  TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
+  contents <- pInTags "span" inline
+  return [Span (mkAttr attr) contents]
 
 pRawHtmlInline :: TagParser [Inline]
 pRawHtmlInline = do
@@ -459,7 +476,13 @@ pBlank = try $ do
 
 pTagContents :: Parser [Char] ParserState Inline
 pTagContents =
-  pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
+      Math InlineMath  `fmap` mathInline
+  <|> Math DisplayMath `fmap` mathDisplay
+  <|> pStr
+  <|> pSpace
+  <|> smartPunctuation pTagContents
+  <|> pSymbol
+  <|> pBad
 
 pStr :: Parser [Char] ParserState Inline
 pStr = do
@@ -474,6 +497,7 @@ isSpecial '"' = True
 isSpecial '\'' = True
 isSpecial '.' = True
 isSpecial '-' = True
+isSpecial '$' = True
 isSpecial '\8216' = True
 isSpecial '\8217' = True
 isSpecial '\8220' = True
@@ -549,7 +573,7 @@ blockHtmlTags = ["address", "article", "aside", "blockquote", "body", "button",
                  "noframes", "noscript", "object", "ol", "output", "p", "pre", "progress",
                  "section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd",
                  "dt", "frameset", "li", "tbody", "td", "tfoot",
-                 "th", "thead", "tr", "script", "style", "video"]
+                 "th", "thead", "tr", "script", "style", "svg", "video"]
 
 -- We want to allow raw docbook in markdown documents, so we
 -- include docbook block tags here too.
@@ -648,3 +672,10 @@ htmlTag f = try $ do
        _            -> do
           rendered <- manyTill anyChar (char '>')
           return (next, rendered ++ ">")
+
+mkAttr :: [(String, String)] -> Attr
+mkAttr attr = (attribsId, attribsClasses, attribsKV)
+  where attribsId = fromMaybe "" $ lookup "id" attr
+        attribsClasses = words $ fromMaybe "" $ lookup "class" attr
+        attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index ff5b73348..51271edc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,12 +38,13 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Walk
 import Text.Pandoc.Shared
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding ((<|>), many, optional, space)
+import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
+                                   mathDisplay, mathInline)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
 import Control.Monad
 import Text.Pandoc.Builder
-import Data.Char (isLetter)
+import Data.Char (isLetter, isAlphaNum)
 import Control.Applicative
 import Data.Monoid
 import Data.Maybe (fromMaybe)
@@ -163,28 +164,40 @@ mathChars = concat <$>
       <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
        )
 
+quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
+quoted' f starter ender = do
+  startchs <- starter
+  try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs
+
 double_quote :: LP Inlines
-double_quote = (doubleQuoted . mconcat) <$>
-  (try $ string "``" *> manyTill inline (try $ string "''"))
+double_quote =
+  (   quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
+  <|> quoted' doubleQuoted (string "“")        (void $ char '”')
+  -- the following is used by babel for localized quotes:
+  <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
+  <|> quoted' doubleQuoted (string "\"")       (void $ char '"')
+  )
 
 single_quote :: LP Inlines
-single_quote = (singleQuoted . mconcat) <$>
-  (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter))
+single_quote =
+  (  quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
+  <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
+  )
 
 inline :: LP Inlines
 inline = (mempty <$ comment)
      <|> (space  <$ sp)
      <|> inlineText
      <|> inlineCommand
-     <|> grouped inline
+     <|> inlineGroup
      <|> (char '-' *> option (str "-")
            ((char '-') *> option (str "–") (str "—" <$ char '-')))
      <|> double_quote
      <|> single_quote
-     <|> (str "“" <$ try (string "``"))  -- nb. {``} won't be caught by double_quote
      <|> (str "”" <$ try (string "''"))
-     <|> (str "‘" <$ char '`')           -- nb. {`} won't be caught by single_quote
+     <|> (str "”" <$ char '”')
      <|> (str "’" <$ char '\'')
+     <|> (str "’" <$ char '’')
      <|> (str "\160" <$ char '~')
      <|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
      <|> (mathInline  $ char '$' *> mathChars <* char '$')
@@ -199,6 +212,15 @@ inline = (mempty <$ comment)
 inlines :: LP Inlines
 inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
 
+inlineGroup :: LP Inlines
+inlineGroup = do
+  ils <- grouped inline
+  if isNull ils
+     then return mempty
+     else return $ spanWith nullAttr ils
+          -- we need the span so we can detitlecase bibtex entries;
+          -- we need to know when something is {C}apitalized
+
 block :: LP Blocks
 block = (mempty <$ comment)
     <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
@@ -364,6 +386,7 @@ inlineCommands = M.fromList $
   , ("backslash", lit "\\")
   , ("slash", lit "/")
   , ("textbf", strong <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
   , ("ldots", lit "…")
   , ("dots", lit "…")
   , ("mdots", lit "…")
@@ -434,6 +457,7 @@ inlineCommands = M.fromList $
   , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
   , ("verb", doverb)
   , ("lstinline", doverb)
+  , ("Verb", doverb)
   , ("texttt", (code . stringify . toList) <$> tok)
   , ("url", (unescapeURL <$> braced) >>= \url ->
        pure (link url "" (str url)))
@@ -518,9 +542,7 @@ inNote ils =
 
 unescapeURL :: String -> String
 unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
-  where isEscapable '%' = True
-        isEscapable '#' = True
-        isEscapable _   = False
+  where isEscapable c = c `elem` "#$%&~_^\\{}"
 unescapeURL (x:xs) = x:unescapeURL xs
 unescapeURL [] = ""
 
@@ -747,7 +769,7 @@ inlineText :: LP Inlines
 inlineText = str <$> many1 inlineChar
 
 inlineChar :: LP Char
-inlineChar = noneOf "\\$%^_&~#{}^'`-[] \t\n"
+inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n"
 
 environment :: LP Blocks
 environment = do
@@ -852,9 +874,8 @@ verbatimEnv = do
   (_,r) <- withRaw $ do
              controlSeq "begin"
              name <- braced
-             guard $ name == "verbatim" || name == "Verbatim" ||
-                     name == "lstlisting" || name == "minted" ||
-                     name == "alltt"
+             guard $ name `elem` ["verbatim", "Verbatim", "lstlisting",
+                                  "minted", "alltt"]
              verbEnv name
   rest <- getInput
   return (r,rest)
@@ -1030,14 +1051,14 @@ paragraph = do
 preamble :: LP Blocks
 preamble = mempty <$> manyTill preambleBlock beginDoc
   where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
-        preambleBlock =  (mempty <$ comment)
-                     <|> (mempty <$ sp)
-                     <|> (mempty <$ blanklines)
-                     <|> (mempty <$ macro)
-                     <|> blockCommand
-                     <|> (mempty <$ anyControlSeq)
-                     <|> (mempty <$ braced)
-                     <|> (mempty <$ anyChar)
+        preambleBlock =  (void comment)
+                     <|> (void sp)
+                     <|> (void blanklines)
+                     <|> (void macro)
+                     <|> (void blockCommand)
+                     <|> (void anyControlSeq)
+                     <|> (void braced)
+                     <|> (void anyChar)
 
 -------
 
@@ -1058,6 +1079,7 @@ simpleCiteArgs = try $ do
   first  <- optionMaybe $ toList <$> opt
   second <- optionMaybe $ toList <$> opt
   char '{'
+  optional sp
   keys <- manyTill citationLabel (char '}')
   let (pre, suf) = case (first  , second ) of
         (Just s , Nothing) -> (mempty, s )
@@ -1073,18 +1095,24 @@ simpleCiteArgs = try $ do
   return $ addPrefix pre $ addSuffix suf $ map conv keys
 
 citationLabel :: LP String
-citationLabel  = trim <$>
-  (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp)
+citationLabel  = optional sp *>
+  (many1 (satisfy isBibtexKeyChar)
+          <* optional sp
+          <* optional (char ',')
+          <* optional sp)
+  where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
 
 cites :: CitationMode -> Bool -> LP [Citation]
 cites mode multi = try $ do
   cits <- if multi
              then many1 simpleCiteArgs
              else count 1 simpleCiteArgs
-  let (c:cs) = concat cits
+  let cs = concat cits
   return $ case mode of
-        AuthorInText   -> c {citationMode = mode} : cs
-        _              -> map (\a -> a {citationMode = mode}) (c:cs)
+        AuthorInText -> case cs of
+                             (c:rest) -> c {citationMode = mode} : rest
+                             []       -> []
+        _            -> map (\a -> a {citationMode = mode}) cs
 
 citation :: String -> CitationMode -> Bool -> LP Inlines
 citation name mode multi = do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 9b98cbc3e..3feafd362 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -215,10 +215,10 @@ pandocTitleBlock = try $ do
                  author' <- author
                  date' <- date
                  return $
-                   ( if B.isNull title' then id else B.setMeta "title" title'
-                   . if null author' then id else B.setMeta "author" author'
-                   . if B.isNull date' then id else B.setMeta "date" date' )
-                   nullMeta
+                     (if B.isNull title' then id else B.setMeta "title" title')
+                   . (if null author' then id else B.setMeta "author" author')
+                   . (if B.isNull date' then id else B.setMeta "date" date')
+                   $ nullMeta
   updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
 yamlMetaBlock :: MarkdownParser (F Blocks)
@@ -227,6 +227,7 @@ yamlMetaBlock = try $ do
   pos <- getPosition
   string "---"
   blankline
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
   rawYamlLines <- manyTill anyLine stopLine
   -- by including --- and ..., we allow yaml blocks with just comments:
   let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
@@ -443,6 +444,9 @@ block = choice [ mempty <$ blanklines
                , codeBlockFenced
                , yamlMetaBlock
                , guardEnabled Ext_latex_macros *> (macro >>= return . return)
+               -- note: bulletList needs to be before header because of
+               -- the possibility of empty list items: -
+               , bulletList
                , header
                , lhsCodeBlock
                , rawTeXBlock
@@ -453,7 +457,6 @@ block = choice [ mempty <$ blanklines
                , codeBlockIndented
                , blockQuote
                , hrule
-               , bulletList
                , orderedList
                , definitionList
                , noteBlock
@@ -698,7 +701,7 @@ bulletListStart = try $ do
   skipNonindentSpaces
   notFollowedBy' (() <$ hrule)     -- because hrules start out just like lists
   satisfy isBulletListMarker
-  spaceChar
+  spaceChar <|> lookAhead newline
   skipSpaces
 
 anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim)
@@ -726,11 +729,15 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ())
 -- parse a line of a list item (start = parser for beginning of list item)
 listLine :: MarkdownParser String
 listLine = try $ do
-  notFollowedBy blankline
   notFollowedBy' (do indentSpaces
-                     many (spaceChar)
+                     many spaceChar
                      listStart)
-  chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
+  notFollowedBy' $ htmlTag (~== TagClose "div")
+  chunks <- manyTill
+              (  many1 (satisfy $ \c -> c /= '\n' && c /= '<')
+             <|> liftM snd (htmlTag isCommentTag)
+             <|> count 1 anyChar
+              ) newline
   return $ concat chunks
 
 -- parse raw text for one list item, excluding start marker and continuations
@@ -739,7 +746,7 @@ rawListItem :: MarkdownParser a
 rawListItem start = try $ do
   start
   first <- listLine
-  rest <- many (notFollowedBy listStart >> listLine)
+  rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
   blanks <- many blankline
   return $ unlines (first:rest) ++ blanks
 
@@ -757,6 +764,7 @@ listContinuationLine :: MarkdownParser String
 listContinuationLine = try $ do
   notFollowedBy blankline
   notFollowedBy' listStart
+  notFollowedBy' $ htmlTag (~== TagClose "div")
   optional indentSpaces
   result <- anyLine
   return $ result ++ "\n"
@@ -781,8 +789,8 @@ listItem start = try $ do
 orderedList :: MarkdownParser (F Blocks)
 orderedList = try $ do
   (start, style, delim) <- lookAhead anyOrderedListStart
-  unless ((style == DefaultStyle || style == Decimal || style == Example) &&
-          (delim == DefaultDelim || delim == Period)) $
+  unless (style `elem` [DefaultStyle, Decimal, Example] &&
+          delim `elem` [DefaultDelim, Period]) $
     guardEnabled Ext_fancy_lists
   when (style == Example) $ guardEnabled Ext_example_lists
   items <- fmap sequence $ many1 $ listItem
@@ -871,8 +879,11 @@ para = try $ do
     $ try $ do
             newline
             (blanklines >> return mempty)
-              <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
-              <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
+              <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
+              <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
+              <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
+              <|> (guardEnabled Ext_lists_without_preceding_blankline >>
+                       () <$ lookAhead listStart)
             return $ do
               result' <- result
               case B.toList result' of
@@ -891,7 +902,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
 --
 
 htmlElement :: MarkdownParser String
-htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
+htmlElement = rawVerbatimBlock
+          <|> strictHtmlBlock
+          <|> liftM snd (htmlTag isBlockTag)
 
 htmlBlock :: MarkdownParser (F Blocks)
 htmlBlock = do
@@ -912,8 +925,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag)
 
 rawVerbatimBlock :: MarkdownParser String
 rawVerbatimBlock = try $ do
-  (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
-                              t == "pre" || t == "style" || t == "script")
+  (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem
+                                                  ["pre", "style", "script"])
                               (const True))
   contents <- manyTill anyChar (htmlTag (~== TagClose tag))
   return $ open ++ contents ++ renderTags [TagClose tag]
@@ -1113,12 +1126,12 @@ multilineTableHeader headless = try $ do
                      then liftM (map (:[]) . tail .
                               splitStringByIndices (init indices)) $ lookAhead anyLine
                      else return $ transpose $ map
-                           (\ln -> tail $ splitStringByIndices (init indices) ln)
+                           (tail . splitStringByIndices (init indices))
                            rawContent
   let aligns   = zipWith alignType rawHeadsList lengths
   let rawHeads = if headless
                     then replicate (length dashes) ""
-                    else map unwords rawHeadsList
+                    else map (unlines . map trim) rawHeadsList
   heads <- fmap sequence $
            mapM (parseFromString (mconcat <$> many plain)) $
              map trim rawHeads
@@ -1175,7 +1188,7 @@ gridTableHeader headless = try $ do
   -- RST does not have a notion of alignments
   let rawHeads = if headless
                     then replicate (length dashes) ""
-                    else map unwords $ transpose
+                    else map (unlines . map trim) $ transpose
                        $ map (gridTableSplitLine indices) rawContent
   heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
   return (heads, aligns, indices)
@@ -1401,39 +1414,6 @@ math :: MarkdownParser (F Inlines)
 math =  (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
      <|> (return . B.math <$> (mathInline >>= applyMacros'))
 
-mathDisplay :: MarkdownParser String
-mathDisplay =
-      (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
-  <|> (guardEnabled Ext_tex_math_single_backslash >>
-       mathDisplayWith "\\[" "\\]")
-  <|> (guardEnabled Ext_tex_math_double_backslash >>
-       mathDisplayWith "\\\\[" "\\\\]")
-
-mathDisplayWith :: String -> String -> MarkdownParser String
-mathDisplayWith op cl = try $ do
-  string op
-  many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
-
-mathInline :: MarkdownParser String
-mathInline =
-      (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
-  <|> (guardEnabled Ext_tex_math_single_backslash >>
-       mathInlineWith "\\(" "\\)")
-  <|> (guardEnabled Ext_tex_math_double_backslash >>
-       mathInlineWith "\\\\(" "\\\\)")
-
-mathInlineWith :: String -> String -> MarkdownParser String
-mathInlineWith op cl = try $ do
-  string op
-  notFollowedBy space
-  words' <- many1Till (count 1 (noneOf "\n\\")
-                   <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
-                   <|> count 1 newline <* notFollowedBy' blankline
-                       *> return " ")
-              (try $ string cl)
-  notFollowedBy digit  -- to prevent capture of $5
-  return $ concat words'
-
 -- Parses material enclosed in *s, **s, _s, or __s.
 -- Designed to avoid backtracking.
 enclosure :: Char
@@ -1450,6 +1430,7 @@ enclosure c = do
 -- Parse inlines til you hit one c or a sequence of two cs.
 -- If one c, emit emph and then parse two.
 -- If two cs, emit strong and then parse one.
+-- Otherwise, emit ccc then the results.
 three :: Char -> MarkdownParser (F Inlines)
 three c = do
   contents <- mconcat <$> many (notFollowedBy (char c) >> inline)
@@ -1474,7 +1455,7 @@ one c prefix' = do
   contents <- mconcat <$> many (  (notFollowedBy (char c) >> inline)
                            <|> try (string [c,c] >>
                                     notFollowedBy (char c) >>
-                                    two c prefix') )
+                                    two c mempty) )
   (char c >> return (B.emph <$> (prefix' <> contents)))
     <|> return (return (B.str [c]) <> (prefix' <> contents))
 
@@ -1559,8 +1540,11 @@ endline :: MarkdownParser (F Inlines)
 endline = try $ do
   newline
   notFollowedBy blankline
+  guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
   guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
   guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
+  guardEnabled Ext_backtick_code_blocks >>
+     notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
   -- parse potential list-starts differently if in a list:
   st <- getState
   when (stateParserContext st == ListItemState) $ do
@@ -1738,7 +1722,7 @@ spanHtml = try $ do
   guardEnabled Ext_markdown_in_html_blocks
   (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
   contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
-  let ident = maybe "" id $ lookup "id" attrs
+  let ident = fromMaybe "" $ lookup "id" attrs
   let classes = maybe [] words $ lookup "class" attrs
   let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
   return $ B.spanWith (ident, classes, keyvals) <$> contents
@@ -1748,7 +1732,7 @@ divHtml = try $ do
   guardEnabled Ext_markdown_in_html_blocks
   (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" [])
   contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div"))
-  let ident = maybe "" id $ lookup "id" attrs
+  let ident = fromMaybe "" $ lookup "id" attrs
   let classes = maybe [] words $ lookup "class" attrs
   let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
   return $ B.divWith (ident, classes, keyvals) <$> contents
@@ -1768,12 +1752,11 @@ rawHtmlInline = do
 cite :: MarkdownParser (F Inlines)
 cite = do
   guardEnabled Ext_citations
-  citations <- textualCite <|> (fmap (flip B.cite unknownC) <$> normalCite)
+  citations <- textualCite
+            <|> do (cs, raw) <- withRaw normalCite
+                   return $ (flip B.cite (B.text raw)) <$> cs
   return citations
 
-unknownC :: Inlines
-unknownC = B.str "???"
-
 textualCite :: MarkdownParser (F Inlines)
 textualCite = try $ do
   (_, key) <- citeKey
@@ -1784,14 +1767,18 @@ textualCite = try $ do
                       , citationNoteNum = 0
                       , citationHash    = 0
                       }
-  mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite
+  mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
   case mbrest of
-       Just rest -> return $ (flip B.cite unknownC . (first:)) <$> rest
-       Nothing   -> (fmap (flip B.cite unknownC) <$> bareloc first) <|>
-                    return (do st <- askF
-                               return $ case M.lookup key (stateExamples st) of
-                                              Just n -> B.str (show n)
-                                              _      -> B.cite [first] unknownC)
+       Just (rest, raw) ->
+         return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
+               <$> rest
+       Nothing   ->
+         (do (cs, raw) <- withRaw $ bareloc first
+             return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs)
+         <|> return (do st <- askF
+                        return $ case M.lookup key (stateExamples st) of
+                                 Just n -> B.str (show n)
+                                 _      -> B.cite [first] $ B.str $ '@':key)
 
 bareloc :: Citation -> MarkdownParser (F [Citation])
 bareloc c = try $ do
@@ -1817,11 +1804,17 @@ normalCite = try $ do
 
 citeKey :: MarkdownParser (Bool, String)
 citeKey = try $ do
+  -- make sure we're not right after an alphanumeric,
+  -- since foo@bar.baz is probably an email address
+  lastStrPos <- stateLastStrPos <$> getState
+  pos <- getPosition
+  guard $ lastStrPos /= Just pos
   suppress_author <- option False (char '-' >> return True)
   char '@'
-  first <- letter
-  let internal p = try $ p >>~ lookAhead (letter <|> digit)
-  rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/")
+  first <- letter <|> char '_'
+  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+  let internal p = try $ p >>~ lookAhead regchar
+  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
   let key = first:rest
   return (suppress_author, key)
 
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 2b938cd82..8d8ea0199 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
+-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
 {-
   Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
 
@@ -43,7 +44,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
 import Text.Pandoc.XML ( fromEntities )
 import Text.Pandoc.Parsing hiding ( nested )
 import Text.Pandoc.Walk ( walk )
-import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
+import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim )
 import Data.Monoid (mconcat, mempty)
 import Control.Applicative ((<$>), (<*), (*>), (<$))
 import Control.Monad
@@ -51,7 +52,9 @@ import Data.List (intersperse, intercalate, isPrefixOf )
 import Text.HTML.TagSoup
 import Data.Sequence (viewl, ViewL(..), (<|))
 import qualified Data.Foldable as F
+import qualified Data.Map as M
 import Data.Char (isDigit, isSpace)
+import Data.Maybe (fromMaybe)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
 readMediaWiki :: ReaderOptions -- ^ Reader options
@@ -62,6 +65,8 @@ readMediaWiki opts s =
                                        , mwMaxNestingLevel = 4
                                        , mwNextLinkNumber  = 1
                                        , mwCategoryLinks = []
+                                       , mwHeaderMap = M.empty
+                                       , mwIdentifierList = []
                                        }
        "source" (s ++ "\n") of
           Left err'    -> error $ "\nError:\n" ++ show err'
@@ -71,10 +76,23 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwMaxNestingLevel :: Int
                        , mwNextLinkNumber  :: Int
                        , mwCategoryLinks   :: [Inlines]
+                       , mwHeaderMap       :: M.Map Inlines String
+                       , mwIdentifierList  :: [String]
                        }
 
 type MWParser = Parser [Char] MWState
 
+instance HasReaderOptions MWParser where
+  askReaderOption f = (f . mwOptions) `fmap` getState
+
+instance HasHeaderMap MWParser where
+  getHeaderMap      = fmap mwHeaderMap getState
+  putHeaderMap hm   = updateState $ \st -> st{ mwHeaderMap = hm }
+
+instance HasIdentifierList MWParser where
+  getIdentifierList   = fmap mwIdentifierList getState
+  putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l }
+
 --
 -- auxiliary functions
 --
@@ -91,7 +109,7 @@ nested p = do
   return res
 
 specialChars :: [Char]
-specialChars = "'[]<=&*{}|\""
+specialChars = "'[]<=&*{}|\":\\"
 
 spaceChars :: [Char]
 spaceChars = " \n\t"
@@ -187,7 +205,7 @@ table = do
   tableStart
   styles <- option [] parseAttrs <* blankline
   let tableWidth = case lookup "width" styles of
-                         Just w  -> maybe 1.0 id $ parseWidth w
+                         Just w  -> fromMaybe 1.0 $ parseWidth w
                          Nothing -> 1.0
   caption <- option mempty tableCaption
   optional rowsep
@@ -268,7 +286,7 @@ tableCell = try $ do
                     Just "center" -> AlignCenter
                     _             -> AlignDefault
   let width = case lookup "width" attrs of
-                    Just xs -> maybe 0.0 id $ parseWidth xs
+                    Just xs -> fromMaybe 0.0 $ parseWidth xs
                     Nothing -> 0.0
   return ((align, width), bs)
 
@@ -351,7 +369,8 @@ header = try $ do
   let lev = length eqs
   guard $ lev <= 6
   contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
-  return $ B.header lev contents
+  attr <- registerHeader nullAttr contents
+  return $ B.headerWith attr lev contents
 
 bulletList :: MWParser Blocks
 bulletList = B.bulletList <$>
@@ -369,7 +388,7 @@ orderedList =
           spaces
           items <- many (listItem '#' <|> li)
           optional (htmlTag (~== TagClose "ol"))
-          let start = maybe 1 id $ safeRead $ fromAttrib "start" tag
+          let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
           return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
 
 definitionList :: MWParser Blocks
@@ -380,8 +399,9 @@ defListItem = try $ do
   terms <- mconcat . intersperse B.linebreak <$> many defListTerm
   -- we allow dd with no dt, or dt with no dd
   defs  <- if B.isNull terms
-              then many1 $ listItem ':'
-              else many $ listItem ':'
+              then notFollowedBy (try $ string ":<math>") *>
+                       many1 (listItem ':')
+              else many (listItem ':')
   return (terms, defs)
 
 defListTerm  :: MWParser Inlines
@@ -462,6 +482,7 @@ inline =  whitespace
       <|> image
       <|> internalLink
       <|> externalLink
+      <|> math
       <|> inlineTag
       <|> B.singleton <$> charRef
       <|> inlineHtml
@@ -472,6 +493,16 @@ inline =  whitespace
 str :: MWParser Inlines
 str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
 
+math :: MWParser Inlines
+math = (B.displayMath . trim <$> try (char ':' >> charsInTags "math"))
+   <|> (B.math . trim <$> charsInTags "math")
+   <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
+   <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
+ where dmStart = string "\\["
+       dmEnd   = try (string "\\]")
+       mStart  = string "\\("
+       mEnd    = try (string "\\)")
+
 variable :: MWParser String
 variable = try $ do
   string "{{{"
@@ -495,7 +526,6 @@ inlineTag = do
        TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
        TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
        TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
-       TagOpen "math" _ -> B.math <$> charsInTags "math"
        TagOpen "code" _ -> B.code <$> charsInTags "code"
        TagOpen "tt" _ -> B.code <$> charsInTags "tt"
        TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
@@ -528,7 +558,7 @@ image = try $ do
   _ <- many (try $ char '|' *> imageOption)
   caption <-   (B.str fname <$ sym "]]")
            <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
-  return $ B.image fname "image" caption
+  return $ B.image fname ("fig:" ++ stringify caption) caption
 
 imageOption :: MWParser String
 imageOption =
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 1f7088f72..6bd617f7e 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -27,16 +27,30 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of TeX math to a list of 'Pandoc' inline elements.
 -}
-module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where
+module Text.Pandoc.Readers.TeXMath ( readTeXMath, readTeXMath' ) where
 
 import Text.Pandoc.Definition
 import Text.TeXMath
 
 -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
--- Defaults to raw formula between @$@ characters if entire formula
+-- Defaults to raw formula between @$@ or @$$@ characters if entire formula
 -- can't be converted.
+readTeXMath' :: MathType
+             -> String    -- ^ String to parse (assumes @'\n'@ line endings)
+             -> [Inline]
+readTeXMath' mt inp = case texMathToPandoc dt inp of
+                           Left _    -> [Str (delim ++ inp ++ delim)]
+                           Right res -> res
+    where (dt, delim) = case mt of
+                             DisplayMath -> (DisplayBlock, "$$")
+                             InlineMath  -> (DisplayInline, "$")
+
+{-# DEPRECATED readTeXMath "Use readTeXMath' from Text.Pandoc.JSON instead" #-}
+-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
+-- Defaults to raw formula between @$@ characters if entire formula
+-- can't be converted.  (This is provided for backwards compatibility;
+-- it is better to use @readTeXMath'@, which properly distinguishes
+-- between display and inline math.)
 readTeXMath :: String    -- ^ String to parse (assumes @'\n'@ line endings)
             -> [Inline]
-readTeXMath inp = case texMathToPandoc DisplayInline inp of
-                        Left _    -> [Str ("$" ++ inp ++ "$")]
-                        Right res -> res
+readTeXMath = readTeXMath' InlineMath
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 23e07f621..93658cdea 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -594,7 +594,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo
 simpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
                 -> ([Inline] -> Inline)       -- ^ Inline constructor
                 -> Parser [Char] ParserState Inline   -- ^ content parser (to be used repeatedly)
-simpleInline border construct = surrounded border (inlineWithAttribute) >>=
+simpleInline border construct = surrounded border inlineWithAttribute >>=
                                 return . construct . normalizeSpaces
   where inlineWithAttribute = (try $ optional attributes) >> inline