9 files changed, 116 insertions, 57 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 44f67ce75..1b3269136 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -89,6 +89,7 @@ import Text.TeXMath (writeTeX)
 import Data.Default (Default)
 import qualified Data.ByteString.Lazy as B
 import qualified Data.Map as M
+import qualified Data.Set as Set
 import Control.Monad.Reader
 import Control.Monad.State
 import Data.Sequence (ViewL(..), viewl)
@@ -350,7 +351,7 @@ parPartToInlines (BookMark _ anchor) =
     -- avoid an extra pass.
     let newAnchor =
           if not inHdrBool && anchor `elem` (M.elems anchorMap)
-          then uniqueIdent [Str anchor] (M.elems anchorMap)
+          then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
           else anchor
     unless inHdrBool
       (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
@@ -393,7 +394,7 @@ makeHeaderAnchor' (Header n (_, classes, kvs) ils)
   | (c:cs) <- filter isAnchorSpan ils
   , (Span (ident, ["anchor"], _) _) <- c = do
     hdrIDMap <- gets docxAnchorMap
-    let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+    let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
     modify $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap}
     return $ Header n (newIdent, classes, kvs) (ils \\ (c:cs))
 -- Otherwise we just give it a name, and register that name (associate
@@ -401,7 +402,7 @@ makeHeaderAnchor' (Header n (_, classes, kvs) ils)
 makeHeaderAnchor' (Header n (_, classes, kvs) ils) =
   do
     hdrIDMap <- gets docxAnchorMap
-    let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+    let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
     modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
     return $ Header n (newIdent, classes, kvs) ils
 makeHeaderAnchor' blk = return blk
diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
index c93b40119..e6de2d474 100644
--- a/src/Text/Pandoc/Readers/Docx/Reducible.hs
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -53,6 +53,7 @@ instance Modifiable Inlines where
       (Strikeout _)   -> Modifier strikeout
       (Superscript _) -> Modifier superscript
       (Subscript _)   -> Modifier subscript
+      (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
       (Span attr _)   -> AttrModifier spanWith attr
       _               -> NullModifier
     _ -> NullModifier
@@ -65,6 +66,7 @@ instance Modifiable Inlines where
       (Strikeout lst)   -> fromList lst
       (Superscript lst) -> fromList lst
       (Subscript lst)   -> fromList lst
+      (Link _ lst _)    -> fromList lst
       (Span _ lst)      -> fromList lst
       _        -> ils
     _          -> ils
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index a34e2fb5c..03b790d0b 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -43,7 +43,7 @@ import Text.HTML.TagSoup.Match
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
-import Text.Pandoc.Shared ( extractSpaces, renderTags'
+import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField
                           , escapeURI, safeRead, mapLeft )
 import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
                            , Extension (Ext_epub_html_exts,
@@ -68,7 +68,7 @@ import Text.Pandoc.Error
 import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
 import Text.Pandoc.Compat.Monoid ((<>))
 import Text.Parsec.Error
-
+import qualified Data.Set as Set
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
 readHtml :: ReaderOptions -- ^ Reader options
@@ -77,7 +77,7 @@ readHtml :: ReaderOptions -- ^ Reader options
 readHtml opts inp =
     mapLeft (ParseFailure . getError) . flip runReader def $
       runParserT parseDoc
-        (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty)
+        (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
         "source" tags
     where tags = stripPrefixes . canonicalizeTags $
                    parseTagsOptions parseOptions{ optTagPosition = True } inp
@@ -104,7 +104,7 @@ data HTMLState =
   {  parserState :: ParserState,
      noteTable   :: [(String, Blocks)],
      baseHref    :: Maybe String,
-     identifiers :: [String],
+     identifiers :: Set.Set String,
      headerMap   :: M.Map Inlines String
   }
 
@@ -137,7 +137,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag
              then return mempty
              else do
                let content = fromAttrib "content" mt
-               updateState $ B.setMeta name (B.text content)
+               updateState $ \s ->
+                 let ps = parserState s in
+                 s{ parserState = ps{
+                      stateMeta = addMetaField name (B.text content)
+                                   (stateMeta ps) } }
                return mempty
         pBaseTag = do
           bt <- pSatisfy (~== TagOpen "base" [])
@@ -441,6 +445,7 @@ pTable = try $ do
   -- fail on empty table
   guard $ not $ null head' && null rows
   let isSinglePlain x = case B.toList x of
+                             []        -> True
                              [Plain _] -> True
                              _         -> False
   let isSimple = all isSinglePlain $ concat (head':rows)
@@ -925,14 +930,45 @@ htmlInBalanced :: (Monad m)
                => (Tag String -> Bool)
                -> ParserT String st m String
 htmlInBalanced f = try $ do
-  (TagOpen t _, tag) <- htmlTag f
-  guard $ not $ "/>" `isSuffixOf` tag -- not a self-closing tag
-  let stopper = htmlTag (~== TagClose t)
-  let anytag = snd <$> htmlTag (const True)
-  contents <- many $ notFollowedBy' stopper >>
-                     (htmlInBalanced f <|> anytag <|> count 1 anyChar)
-  endtag <- liftM snd stopper
-  return $ tag ++ concat contents ++ endtag
+  lookAhead (char '<')
+  inp <- getInput
+  let ts = canonicalizeTags $
+        parseTagsOptions parseOptions{ optTagWarning = True,
+                                       optTagPosition = True } inp
+  case ts of
+    (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
+       guard $ f t
+       guard $ not $ hasTagWarning (t : take 1 rest)
+       case htmlInBalanced' tn (t:rest) of
+            []  -> mzero
+            xs  -> case reverse xs of
+                        (TagClose _ : TagPosition er ec : _) -> do
+                          let ls = er - sr
+                          let cs = ec - sc
+                          lscontents <- concat <$> count ls anyLine
+                          cscontents <- count cs anyChar
+                          (_,closetag) <- htmlTag (~== TagClose tn)
+                          return (lscontents ++ cscontents ++ closetag)
+                        _ -> mzero
+    _ -> mzero
+
+htmlInBalanced' :: String
+                -> [Tag String]
+                -> [Tag String]
+htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
+  where go :: Int -> [Tag String] -> Maybe [Tag String]
+        go n (t@(TagOpen tn' _):rest) | tn' == tagname =
+              (t :) <$> go (n + 1) rest
+        go 1 (t@(TagClose tn'):_) | tn' == tagname =
+              return [t]
+        go n (t@(TagClose tn'):rest)  | tn' == tagname =
+              (t :) <$> go (n - 1) rest
+        go n (t:ts') = (t :) <$> go n ts'
+        go n [] = mzero
+
+hasTagWarning :: [Tag String] -> Bool
+hasTagWarning (TagWarning _:_) = True
+hasTagWarning _ = False
 
 -- | Matches a tag meeting a certain condition.
 htmlTag :: Monad m
@@ -941,8 +977,6 @@ htmlTag :: Monad m
 htmlTag f = try $ do
   lookAhead (char '<')
   inp <- getInput
-  let hasTagWarning (TagWarning _:_) = True
-      hasTagWarning _ = False
   let (next : rest) = canonicalizeTags $ parseTagsOptions
                        parseOptions{ optTagWarning = True } inp
   guard $ f next
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4b30725aa..9a1708331 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -422,7 +422,8 @@ inlineCommand = try $ do
            else if parseRaw
                    then return $ rawInline "latex" rawcommand
                    else return mempty
-  lookupListDefault mzero [name',name] inlineCommands
+  (lookupListDefault mzero [name',name] inlineCommands <*
+      optional (try (string "{}")))
     <|> raw
 
 unlessParseRaw :: LP ()
@@ -435,6 +436,7 @@ isBlockCommand s = s `M.member` blockCommands
 inlineEnvironments :: M.Map String (LP Inlines)
 inlineEnvironments = M.fromList
   [ ("displaymath", mathEnv id Nothing "displaymath")
+  , ("math", math <$> verbEnv "math")
   , ("equation", mathEnv id Nothing "equation")
   , ("equation*", mathEnv id Nothing "equation*")
   , ("gather", mathEnv id (Just "gathered") "gather")
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 77c3a1016..82d343243 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -36,7 +36,7 @@ import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate )
 import qualified Data.Map as M
 import Data.Scientific (coefficient, base10Exponent)
 import Data.Ord ( comparing )
-import Data.Char ( isSpace, isAlphaNum, toLower )
+import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation )
 import Data.Maybe
 import Text.Pandoc.Definition
 import Text.Pandoc.Emoji (emojis)
@@ -1554,7 +1554,7 @@ math :: MarkdownParser (F Inlines)
 math =  (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
      <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
                ((getOption readerSmart >>= guard) *> (return <$> apostrophe)
-                <* notFollowedBy space)
+                <* notFollowedBy (space <|> satisfy isPunctuation))
 
 -- Parses material enclosed in *s, **s, _s, or __s.
 -- Designed to avoid backtracking.
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index d29ec50e7..950497992 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -52,6 +52,7 @@ import Text.HTML.TagSoup
 import Data.Sequence (viewl, ViewL(..), (<|))
 import qualified Data.Foldable as F
 import qualified Data.Map as M
+import qualified Data.Set as Set
 import Data.Char (isDigit, isSpace)
 import Data.Maybe (fromMaybe)
 import Text.Printf (printf)
@@ -69,7 +70,7 @@ readMediaWiki opts s =
                                        , mwNextLinkNumber  = 1
                                        , mwCategoryLinks = []
                                        , mwHeaderMap = M.empty
-                                       , mwIdentifierList = []
+                                       , mwIdentifierList = Set.empty
                                        }
            (s ++ "\n")
 
@@ -78,7 +79,7 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwNextLinkNumber  :: Int
                        , mwCategoryLinks   :: [Inlines]
                        , mwHeaderMap       :: M.Map Inlines String
-                       , mwIdentifierList  :: [String]
+                       , mwIdentifierList  :: Set.Set String
                        }
 
 type MWParser = Parser [Char] MWState
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 1f1c57646..8c475eefc 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -61,6 +61,7 @@ import           Text.Pandoc.Readers.Odt.Generic.XMLConverter
 import           Text.Pandoc.Readers.Odt.Generic.Fallible
 import           Text.Pandoc.Readers.Odt.Generic.Utils
 
+import qualified Data.Set as Set
 
 --------------------------------------------------------------------------------
 -- State
@@ -221,7 +222,7 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do
 getHeaderAnchor :: OdtReaderSafe Inlines Anchor
 getHeaderAnchor = proc title -> do
   state <- getExtraState -< ()
-  let anchor = uniqueIdent (toList title) (usedAnchors state)
+  let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state)
   modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor
 
 
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c7906618c..3f29d06ef 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -53,6 +53,7 @@ import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
+import qualified Data.Set as Set
 import           Data.Maybe (fromMaybe, isJust)
 import           Network.HTTP (urlEncode)
 
@@ -144,7 +145,7 @@ data OrgParserState = OrgParserState
                       , orgStateMeta'                :: F Meta
                       , orgStateNotes'               :: OrgNoteTable
                       , orgStateParserContext        :: ParserContext
-                      , orgStateIdentifiers          :: [String]
+                      , orgStateIdentifiers          :: Set.Set String
                       , orgStateHeaderMap            :: M.Map Inlines String
                       }
 
@@ -186,7 +187,7 @@ defaultOrgParserState = OrgParserState
                         , orgStateMeta' = return nullMeta
                         , orgStateNotes' = []
                         , orgStateParserContext = NullState
-                        , orgStateIdentifiers = []
+                        , orgStateIdentifiers = Set.empty
                         , orgStateHeaderMap = M.empty
                         }
 
@@ -1238,37 +1239,37 @@ applyCustomLinkFormat link = do
     formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
     return $ maybe link ($ drop 1 rest) formatter
 
--- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind
--- of parsing.
+-- | Take a link and return a function which produces new inlines when given
+-- description inlines.
 linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF s =
+linkToInlinesF linkStr =
+  case linkStr of
+    ""      -> pure . B.link mempty ""       -- wiki link (empty by convention)
+    ('#':_) -> pure . B.link linkStr ""      -- document-local fraction
+    _       -> case cleanLinkString linkStr of
+                 (Just cleanedLink) -> if isImageFilename cleanedLink
+                                       then const . pure $ B.image cleanedLink "" ""
+                                       else pure . B.link cleanedLink ""
+                 Nothing -> internalLink linkStr  -- other internal link
+
+-- | Cleanup and canonicalize a string describing a link.  Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
   case s of
-    ""      -> pure . B.link "" ""
-    ('#':_) -> pure . B.link s ""
-    _ | isImageFilename s     -> const . pure $ B.image s "" ""
-    _ | isFileLink s          -> pure . B.link (dropLinkType s) ""
-    _ | isUri s               -> pure . B.link s ""
-    _ | isAbsoluteFilePath s  -> pure . B.link ("file://" ++ s) ""
-    _ | isRelativeFilePath s  -> pure . B.link s ""
-    _                         -> internalLink s
-
-isFileLink :: String -> Bool
-isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s)
-
-dropLinkType :: String -> String
-dropLinkType = tail . snd . break (== ':')
-
-isRelativeFilePath :: String -> Bool
-isRelativeFilePath s = (("./" `isPrefixOf` s) || ("../" `isPrefixOf` s)) &&
-                       (':' `notElem` s)
-
-isUri :: String -> Bool
-isUri s = let (scheme, path) = break (== ':') s
-          in all (\c -> isAlphaNum c || c `elem` (".-" :: String)) scheme
-             && not (null path)
-
-isAbsoluteFilePath :: String -> Bool
-isAbsoluteFilePath = ('/' ==) . head
+    '/':_                  -> Just $ "file://" ++ s  -- absolute path
+    '.':'/':_              -> Just s                 -- relative path
+    '.':'.':'/':_          -> Just s                 -- relative path
+    -- Relative path or URL (file schema)
+    'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+    _ | isUrl s            -> Just s                 -- URL
+    _                      -> Nothing
+ where
+   isUrl :: String -> Bool
+   isUrl cs =
+     let (scheme, path) = break (== ':') cs
+     in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+          && not (null path)
 
 isImageFilename :: String -> Bool
 isImageFilename filename =
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index dd1d289a3..6f64540f8 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -583,7 +583,18 @@ code2 = do
 
 -- | Html / CSS attributes
 attributes :: Parser [Char] ParserState Attr
-attributes = (foldl (flip ($)) ("",[],[])) `fmap` many attribute
+attributes = (foldl (flip ($)) ("",[],[])) <$>
+  try (do special <- option id specialAttribute
+          attrs <- many attribute
+          return (special : attrs))
+
+specialAttribute :: Parser [Char] ParserState (Attr -> Attr)
+specialAttribute = do
+  alignStr <- ("center" <$ char '=') <|>
+    ("justify" <$ try (string "<>")) <|>
+    ("right" <$ char '>') <|>
+    ("left" <$ char '<')
+  return $ addStyle ("text-align:" ++ alignStr)
 
 attribute :: Parser [Char] ParserState (Attr -> Attr)
 attribute = classIdAttr <|> styleAttr <|> langAttr
@@ -602,7 +613,13 @@ classIdAttr = try $ do -- (class class #id)
 styleAttr :: Parser [Char] ParserState (Attr -> Attr)
 styleAttr = do
   style <- try $ enclosed (char '{') (char '}') anyChar'
-  return $ \(id',classes,keyvals) -> (id',classes,("style",style):keyvals)
+  return $ addStyle style
+
+addStyle :: String -> Attr -> Attr
+addStyle style (id',classes,keyvals) =
+  (id',classes,keyvals')
+  where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
+        style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
 
 langAttr :: Parser [Char] ParserState (Attr -> Attr)
 langAttr = do