5 files changed, 91 insertions, 76 deletions
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index fb3cfa72a..8d9caa6e8 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -572,10 +572,10 @@ downloadOrRead s = do
             Nothing -> openURL s' -- will throw error
     (Nothing, s') ->
        case parseURI s' of  -- requires absolute URI
-            -- We don't want to treat C:/ as a scheme:
-            Just u' | length (uriScheme u') > 2 -> openURL (show u')
             Just u' | uriScheme u' == "file:" ->
                  readLocalFile $ uriPathToPath (uriPath u')
+            -- We don't want to treat C:/ as a scheme:
+            Just u' | length (uriScheme u') > 2 -> openURL (show u')
             _ -> readLocalFile fp -- get from local file system
    where readLocalFile f = do
              resourcePath <- getResourcePath
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 19488c9bc..ee0fe3efb 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -11,12 +11,13 @@
 Mime type lookup for ODT writer.
 -}
 module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef,
-                          extensionFromMimeType )where
+                          extensionFromMimeType, mediaCategory ) where
 import Prelude
 import Data.Char (toLower)
 import Data.List (isPrefixOf, isSuffixOf)
+import Data.List.Split (splitOn)
 import qualified Data.Map as M
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, listToMaybe)
 import System.FilePath
 
 type MimeType = String
@@ -42,15 +43,25 @@ extensionFromMimeType mimetype =
   M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes
   -- note:  we just look up the basic mime type, dropping the content-encoding etc.
 
+-- | Determine general media category for file path, e.g.
+--
+-- prop> mediaCategory "foo.jpg" = Just "image"
+mediaCategory :: FilePath -> Maybe String
+mediaCategory fp = getMimeType fp >>= listToMaybe . splitOn "/"
+
 reverseMimeTypes :: M.Map MimeType String
 reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList
 
 mimeTypes :: M.Map String MimeType
 mimeTypes = M.fromList mimeTypesList
 
+-- | Collection of common mime types.
+-- Except for first entry, list borrowed from
+-- <https://github.com/Happstack/happstack-server/blob/master/src/Happstack/Server/FileServe/BuildingBlocks.hs happstack-server>
 mimeTypesList :: [(String, MimeType)]
-mimeTypesList = -- List borrowed from happstack-server.
-           [("gz","application/x-gzip")
+mimeTypesList =
+           [("cpt","image/x-corelphotopaint")
+           ,("gz","application/x-gzip")
            ,("cabal","application/x-cabal")
            ,("%","application/x-trash")
            ,("323","text/h323")
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 474bda3de..78b377993 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE FlexibleContexts      #-}
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
@@ -94,13 +95,14 @@ readHtml opts inp = do
     Left  err -> throwError $ PandocParseError $ getError err
 
 replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
-replaceNotes = walkM replaceNotes'
+replaceNotes bs = do
+  st <- getState
+  return $ walk (replaceNotes' (noteTable st)) bs
 
-replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline
-replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes
-  where
-    getNotes = noteTable <$> getState
-replaceNotes' x = return x
+replaceNotes' :: [(String, Blocks)] -> Inline -> Inline
+replaceNotes' noteTbl (RawInline (Format "noteref") ref) =
+  maybe (Str "") (Note . B.toList) $ lookup ref noteTbl
+replaceNotes' _ x = x
 
 data HTMLState =
   HTMLState
@@ -129,7 +131,7 @@ type TagParser m = HTMLParser m [Tag Text]
 
 pHtml :: PandocMonad m => TagParser m Blocks
 pHtml = try $ do
-  (TagOpen "html" attr) <- lookAhead pAnyTag
+  (TagOpen "html" attr) <- lookAhead pAny
   for_ (lookup "lang" attr) $
     updateState . B.setMeta "lang" . B.text . T.unpack
   pInTags "html" block
@@ -138,7 +140,7 @@ pBody :: PandocMonad m => TagParser m Blocks
 pBody = pInTags "body" block
 
 pHead :: PandocMonad m => TagParser m Blocks
-pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag)
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
   where pTitle = pInTags "title" inline >>= setTitle . trimInlines
         setTitle t = mempty <$ updateState (B.setMeta "title" t)
         pMetaTag = do
@@ -216,15 +218,16 @@ eCase = do
   let attr = toStringAttr attr'
   case flip lookup namespaces =<< lookup "required-namespace" attr of
     Just p -> Just <$> pInTags "case" (skipMany pBlank *> p <* skipMany pBlank)
-    Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (matchTagClose "case"))
+    Nothing -> Nothing <$ manyTill pAny (pSatisfy (matchTagClose "case"))
 
 eFootnote :: PandocMonad m => TagParser m ()
 eFootnote = try $ do
   let notes = ["footnote", "rearnote"]
   guardEnabled Ext_epub_html_exts
-  (TagOpen tag attr') <- lookAhead pAnyTag
+  (TagOpen tag attr') <- lookAhead pAny
   let attr = toStringAttr attr'
-  guard $ maybe False (`elem` notes) (lookup "type" attr)
+  guard $ maybe False (`elem` notes)
+          (lookup "type" attr <|> lookup "epub:type" attr)
   let ident = fromMaybe "" (lookup "id" attr)
   content <- pInTags tag block
   addNote ident content
@@ -235,20 +238,26 @@ addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s})
 eNoteref :: PandocMonad m => TagParser m Inlines
 eNoteref = try $ do
   guardEnabled Ext_epub_html_exts
-  TagOpen tag attr' <- lookAhead pAnyTag
-  let attr = toStringAttr attr'
-  guard $ lookup "type" attr == Just "noteref"
-  let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr)
-  guard (not (null ident))
-  pInTags tag block
+  TagOpen tag attr <-
+    pSatisfy (\case
+                 TagOpen _ as
+                    -> (lookup "type" as <|> lookup "epub:type" as)
+                        == Just "noteref"
+                 _  -> False)
+  ident <- case T.unpack <$> lookup "href" attr of
+             Just ('#':rest) -> return rest
+             _ -> mzero
+  _ <- manyTill pAny (pSatisfy (\case
+                                   TagClose t -> t == tag
+                                   _          -> False))
   return $ B.rawInline "noteref" ident
 
 -- Strip TOC if there is one, better to generate again
 eTOC :: PandocMonad m => TagParser m ()
 eTOC = try $ do
   guardEnabled Ext_epub_html_exts
-  (TagOpen tag attr) <- lookAhead pAnyTag
-  guard $ lookup "type" attr == Just "toc"
+  (TagOpen tag attr) <- lookAhead pAny
+  guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc"
   void (pInTags tag block)
 
 pList :: PandocMonad m => TagParser m Blocks
@@ -357,7 +366,7 @@ fixPlains inList bs = if any isParaish bs'
 
 pRawTag :: PandocMonad m => TagParser m Text
 pRawTag = do
-  tag <- pAnyTag
+  tag <- pAny
   let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
   if tagOpen ignorable (const True) tag || tagClose ignorable tag
      then return mempty
@@ -414,13 +423,14 @@ ignore raw = do
 pHtmlBlock :: PandocMonad m => Text -> TagParser m Text
 pHtmlBlock t = try $ do
   open <- pSatisfy (matchTagOpen t [])
-  contents <- manyTill pAnyTag (pSatisfy (matchTagClose t))
+  contents <- manyTill pAny (pSatisfy (matchTagClose t))
   return $ renderTags' $ [open] <> contents <> [TagClose t]
 
 -- Sets chapter context
 eSection :: PandocMonad m => TagParser m Blocks
 eSection = try $ do
-  let matchChapter as = maybe False (T.isInfixOf "chapter") (lookup "type" as)
+  let matchChapter as = maybe False (T.isInfixOf "chapter")
+                        (lookup "type" as <|> lookup "epub:type" as)
   let sectTag = tagOpen (`elem` sectioningContent) matchChapter
   TagOpen tag _ <- lookAhead $ pSatisfy sectTag
   setInChapter (pInTags tag block)
@@ -439,7 +449,8 @@ headerLevel tagtype =
 
 eTitlePage :: PandocMonad m => TagParser m ()
 eTitlePage = try $ do
-  let isTitlePage as = maybe False (T.isInfixOf "titlepage") (lookup "type" as)
+  let isTitlePage as = maybe False (T.isInfixOf "titlepage")
+       (lookup "type" as <|> lookup "epub:type" as)
   let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section")
                           isTitlePage
   TagOpen tag _ <- lookAhead $ pSatisfy groupTag
@@ -605,7 +616,7 @@ pCodeBlock :: PandocMonad m => TagParser m Blocks
 pCodeBlock = try $ do
   TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
   let attr = toStringAttr attr'
-  contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
+  contents <- manyTill pAny (pCloses "pre" <|> eof)
   let rawText = concatMap tagToString contents
   -- drop leading newline if any
   let result' = case rawText of
@@ -658,8 +669,8 @@ pSat f = do
 pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
 pSatisfy f = try $ optional pLocation >> pSat f
 
-pAnyTag :: PandocMonad m => TagParser m (Tag Text)
-pAnyTag = pSatisfy (const True)
+pAny :: PandocMonad m => TagParser m (Tag Text)
+pAny = pSatisfy (const True)
 
 pSelfClosing :: PandocMonad m
              => (Text -> Bool) -> ([Attribute Text] -> Bool)
@@ -766,7 +777,7 @@ pCode :: PandocMonad m => TagParser m Inlines
 pCode = try $ do
   (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
   let attr = toStringAttr attr'
-  result <- manyTill pAnyTag (pCloses open)
+  result <- manyTill pAny (pCloses open)
   return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $
            innerText result
 
@@ -813,7 +824,7 @@ pScriptMath = try $ do
                       -> return $ "display" `T.isSuffixOf` x
                     _ -> mzero
   contents <- T.unpack . innerText <$>
-                manyTill pAnyTag (pSatisfy (matchTagClose "script"))
+                manyTill pAny (pSatisfy (matchTagClose "script"))
   return $ (if isdisplay then B.displayMath else B.math) contents
 
 pMath :: PandocMonad m => Bool -> TagParser m Inlines
@@ -824,7 +835,7 @@ pMath inCase = try $ do
   let attr = toStringAttr attr'
   unless inCase $
     guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr))
-  contents <- manyTill pAnyTag (pSatisfy (matchTagClose "math"))
+  contents <- manyTill pAny (pSatisfy (matchTagClose "math"))
   case mathMLToTeXMath (T.unpack $ renderTags $
           [open] <> contents <> [TagClose "math"]) of
        Left _   -> return $ B.spanWith ("",["math"],attr) $ B.text $
@@ -867,7 +878,7 @@ pCloses :: PandocMonad m => Text -> TagParser m ()
 pCloses tagtype = try $ do
   t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag
   case t of
-       (TagClose t') | t' == tagtype -> void pAnyTag
+       (TagClose t') | t' == tagtype -> void pAny
        (TagOpen t' _) | t' `closes` tagtype -> return ()
        (TagClose "ul") | tagtype == "li" -> return ()
        (TagClose "ol") | tagtype == "li" -> return ()
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index b0f6adecc..062ab19ed 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -793,13 +793,8 @@ pandocToEPUB version opts doc = do
                                    [("id","toc")]) $
                     [ unode "h1" ! [("id","toc-title")] $ tocTitle
                     , unode "ol" ! [("class","toc")] $ tocBlocks ]]
-  let landmarks = if epub3
-                     then [RawBlock (Format "html") $ ppElement $
-                            unode "nav" ! [("epub:type","landmarks")
-                                          ,("id","landmarks")
-                                          ,("hidden","hidden")] $
-                            [ unode "ol" $
-                              [ unode "li"
+  let landmarkItems = if epub3
+                         then [ unode "li"
                                 [ unode "a" ! [("href", "text/cover.xhtml")
                                               ,("epub:type", "cover")] $
                                   "Cover"] |
@@ -811,9 +806,15 @@ pandocToEPUB version opts doc = do
                                     "Table of contents"
                                 ] | writerTableOfContents opts
                               ]
-                            ]
+                         else []
+  let landmarks = if null landmarkItems
+                     then []
+                     else [RawBlock (Format "html") $ ppElement $
+                            unode "nav" ! [("epub:type","landmarks")
+                                          ,("id","landmarks")
+                                          ,("hidden","hidden")] $
+                            [ unode "ol" landmarkItems ]
                           ]
-                     else []
   navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):
                      cssvars False ++ vars }
             (Pandoc (setMeta "title"
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 2c386b465..ca44583ab 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -65,18 +65,21 @@ import Text.Blaze.Internal (preEscapedString, preEscapedText)
 #endif
 #if MIN_VERSION_blaze_html(0,5,1)
 import qualified Text.Blaze.XHtml5 as H5
+import qualified Text.Blaze.XHtml5.Attributes as A5
 #else
 import qualified Text.Blaze.Html5 as H5
+import qualified Text.Blaze.Html5.Attributes as A5
 #endif
 import Control.Monad.Except (throwError)
 import Data.Aeson (Value)
-import System.FilePath (takeBaseName, takeExtension)
+import System.FilePath (takeBaseName)
 import Text.Blaze.Html.Renderer.Text (renderHtml)
 import qualified Text.Blaze.XHtml1.Transitional as H
 import qualified Text.Blaze.XHtml1.Transitional.Attributes as A
 import Text.Pandoc.Class (PandocMonad, report, runPure)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
+import Text.Pandoc.MIME (mediaCategory)
 import Text.TeXMath
 import Text.XML.Light (elChildren, unode, unqual)
 import qualified Text.XML.Light as XML
@@ -665,23 +668,11 @@ dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height
                (Just x)         -> [("style", show dir ++ ":" ++ show x)]
                Nothing          -> []
 
-imageExts :: [String]
-imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
-              "gif", "ico", "ief", "jng", "jpg", "jpeg", "nef", "orf", "pat", "pbm",
-              "pcx", "pgm", "png", "pnm", "ppm", "psd", "ras", "rgb", "svg", "tiff",
-              "wbmp", "xbm", "xpm", "xwd" ]
-
-treatAsImage :: FilePath -> Bool
-treatAsImage fp =
-  let path = maybe fp uriPath (parseURIReference fp)
-      ext  = map toLower $ drop 1 $ takeExtension path
-  in  null ext || ext `elem` imageExts
-
 figure :: PandocMonad m
        => WriterOptions -> Attr -> [Inline] -> (String, String)
        -> StateT WriterState m Html
 figure opts attr txt (s,tit) = do
-  img <- inlineToHtml opts (Image attr txt (s,tit))
+  img <- inlineToHtml opts (Image attr [Str ""] (s,tit))
   html5 <- gets stHtml5
   let tocapt = if html5
                   then H5.figcaption
@@ -1135,8 +1126,8 @@ inlineToHtml opts inline = do
                         return $ if null tit
                                     then link'
                                     else link' ! A.title (toValue tit)
-    (Image attr txt (s,tit)) | treatAsImage s -> do
-                        let alternate' = stringify txt
+    (Image attr txt (s,tit)) -> do
+                        let alternate = stringify txt
                         slideVariant <- gets stSlideVariant
                         let isReveal = slideVariant == RevealJsSlides
                         attrs <- imgAttrsToHtml opts attr
@@ -1146,22 +1137,23 @@ inlineToHtml opts inline = do
                                   then customAttribute "data-src" $ toValue s
                                   else A.src $ toValue s) :
                               [A.title $ toValue tit | not (null tit)] ++
-                              [A.alt $ toValue alternate' | not (null txt)] ++
-                              attrs
-                        let tag = if html5 then H5.img else H.img
-                        return $ foldl (!) tag attributes
-                        -- note:  null title included, as in Markdown.pl
-    (Image attr _ (s,tit)) -> do
-                        slideVariant <- gets stSlideVariant
-                        let isReveal = slideVariant == RevealJsSlides
-                        attrs <- imgAttrsToHtml opts attr
-                        let attributes =
-                              (if isReveal
-                                  then customAttribute "data-src" $ toValue s
-                                  else A.src $ toValue s) :
-                              [A.title $ toValue tit | not (null tit)] ++
                               attrs
-                        return $ foldl (!) H5.embed attributes
+                            imageTag = (if html5 then H5.img else H.img
+                              , [A.alt $ toValue alternate | not (null txt)] )
+                            mediaTag tg fallbackTxt =
+                              let linkTxt = if null txt
+                                            then fallbackTxt
+                                            else alternate
+                              in (tg $ H.a ! A.href (toValue s) $ toHtml linkTxt
+                                 , [A5.controls ""] )
+                            normSrc = maybe s uriPath (parseURIReference s)
+                            (tag, specAttrs) = case mediaCategory normSrc of
+                              Just "image" -> imageTag
+                              Just "video" -> mediaTag H5.video "Video"
+                              Just "audio" -> mediaTag H5.audio "Audio"
+                              Just _       -> (H5.embed, [])
+                              _            -> imageTag
+                        return $ foldl (!) tag $ attributes ++ specAttrs
                         -- note:  null title included, as in Markdown.pl
     (Note contents) -> do
                         notes <- gets stNotes