diff options
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Class.hs | 4 | ||||
| -rw-r--r-- | src/Text/Pandoc/MIME.hs | 19 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 71 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 19 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 54 | 
5 files changed, 91 insertions, 76 deletions
| diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index fb3cfa72a..8d9caa6e8 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -572,10 +572,10 @@ downloadOrRead s = do              Nothing -> openURL s' -- will throw error      (Nothing, s') ->         case parseURI s' of  -- requires absolute URI -            -- We don't want to treat C:/ as a scheme: -            Just u' | length (uriScheme u') > 2 -> openURL (show u')              Just u' | uriScheme u' == "file:" ->                   readLocalFile $ uriPathToPath (uriPath u') +            -- We don't want to treat C:/ as a scheme: +            Just u' | length (uriScheme u') > 2 -> openURL (show u')              _ -> readLocalFile fp -- get from local file system     where readLocalFile f = do               resourcePath <- getResourcePath diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 19488c9bc..ee0fe3efb 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -11,12 +11,13 @@  Mime type lookup for ODT writer.  -}  module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef, -                          extensionFromMimeType )where +                          extensionFromMimeType, mediaCategory ) where  import Prelude  import Data.Char (toLower)  import Data.List (isPrefixOf, isSuffixOf) +import Data.List.Split (splitOn)  import qualified Data.Map as M -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, listToMaybe)  import System.FilePath  type MimeType = String @@ -42,15 +43,25 @@ extensionFromMimeType mimetype =    M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes    -- note:  we just look up the basic mime type, dropping the content-encoding etc. +-- | Determine general media category for file path, e.g. +-- +-- prop> mediaCategory "foo.jpg" = Just "image" +mediaCategory :: FilePath -> Maybe String +mediaCategory fp = getMimeType fp >>= listToMaybe . splitOn "/" +  reverseMimeTypes :: M.Map MimeType String  reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList  mimeTypes :: M.Map String MimeType  mimeTypes = M.fromList mimeTypesList +-- | Collection of common mime types. +-- Except for first entry, list borrowed from +-- <https://github.com/Happstack/happstack-server/blob/master/src/Happstack/Server/FileServe/BuildingBlocks.hs happstack-server>  mimeTypesList :: [(String, MimeType)] -mimeTypesList = -- List borrowed from happstack-server. -           [("gz","application/x-gzip") +mimeTypesList = +           [("cpt","image/x-corelphotopaint") +           ,("gz","application/x-gzip")             ,("cabal","application/x-cabal")             ,("%","application/x-trash")             ,("323","text/h323") diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 474bda3de..78b377993 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,5 @@  {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE LambdaCase        #-}  {-# LANGUAGE FlexibleContexts      #-}  {-# LANGUAGE FlexibleInstances     #-}  {-# LANGUAGE MultiParamTypeClasses #-} @@ -94,13 +95,14 @@ readHtml opts inp = do      Left  err -> throwError $ PandocParseError $ getError err  replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] -replaceNotes = walkM replaceNotes' +replaceNotes bs = do +  st <- getState +  return $ walk (replaceNotes' (noteTable st)) bs -replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline -replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes -  where -    getNotes = noteTable <$> getState -replaceNotes' x = return x +replaceNotes' :: [(String, Blocks)] -> Inline -> Inline +replaceNotes' noteTbl (RawInline (Format "noteref") ref) = +  maybe (Str "") (Note . B.toList) $ lookup ref noteTbl +replaceNotes' _ x = x  data HTMLState =    HTMLState @@ -129,7 +131,7 @@ type TagParser m = HTMLParser m [Tag Text]  pHtml :: PandocMonad m => TagParser m Blocks  pHtml = try $ do -  (TagOpen "html" attr) <- lookAhead pAnyTag +  (TagOpen "html" attr) <- lookAhead pAny    for_ (lookup "lang" attr) $      updateState . B.setMeta "lang" . B.text . T.unpack    pInTags "html" block @@ -138,7 +140,7 @@ pBody :: PandocMonad m => TagParser m Blocks  pBody = pInTags "body" block  pHead :: PandocMonad m => TagParser m Blocks -pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)    where pTitle = pInTags "title" inline >>= setTitle . trimInlines          setTitle t = mempty <$ updateState (B.setMeta "title" t)          pMetaTag = do @@ -216,15 +218,16 @@ eCase = do    let attr = toStringAttr attr'    case flip lookup namespaces =<< lookup "required-namespace" attr of      Just p -> Just <$> pInTags "case" (skipMany pBlank *> p <* skipMany pBlank) -    Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (matchTagClose "case")) +    Nothing -> Nothing <$ manyTill pAny (pSatisfy (matchTagClose "case"))  eFootnote :: PandocMonad m => TagParser m ()  eFootnote = try $ do    let notes = ["footnote", "rearnote"]    guardEnabled Ext_epub_html_exts -  (TagOpen tag attr') <- lookAhead pAnyTag +  (TagOpen tag attr') <- lookAhead pAny    let attr = toStringAttr attr' -  guard $ maybe False (`elem` notes) (lookup "type" attr) +  guard $ maybe False (`elem` notes) +          (lookup "type" attr <|> lookup "epub:type" attr)    let ident = fromMaybe "" (lookup "id" attr)    content <- pInTags tag block    addNote ident content @@ -235,20 +238,26 @@ addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s})  eNoteref :: PandocMonad m => TagParser m Inlines  eNoteref = try $ do    guardEnabled Ext_epub_html_exts -  TagOpen tag attr' <- lookAhead pAnyTag -  let attr = toStringAttr attr' -  guard $ lookup "type" attr == Just "noteref" -  let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr) -  guard (not (null ident)) -  pInTags tag block +  TagOpen tag attr <- +    pSatisfy (\case +                 TagOpen _ as +                    -> (lookup "type" as <|> lookup "epub:type" as) +                        == Just "noteref" +                 _  -> False) +  ident <- case T.unpack <$> lookup "href" attr of +             Just ('#':rest) -> return rest +             _ -> mzero +  _ <- manyTill pAny (pSatisfy (\case +                                   TagClose t -> t == tag +                                   _          -> False))    return $ B.rawInline "noteref" ident  -- Strip TOC if there is one, better to generate again  eTOC :: PandocMonad m => TagParser m ()  eTOC = try $ do    guardEnabled Ext_epub_html_exts -  (TagOpen tag attr) <- lookAhead pAnyTag -  guard $ lookup "type" attr == Just "toc" +  (TagOpen tag attr) <- lookAhead pAny +  guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc"    void (pInTags tag block)  pList :: PandocMonad m => TagParser m Blocks @@ -357,7 +366,7 @@ fixPlains inList bs = if any isParaish bs'  pRawTag :: PandocMonad m => TagParser m Text  pRawTag = do -  tag <- pAnyTag +  tag <- pAny    let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]    if tagOpen ignorable (const True) tag || tagClose ignorable tag       then return mempty @@ -414,13 +423,14 @@ ignore raw = do  pHtmlBlock :: PandocMonad m => Text -> TagParser m Text  pHtmlBlock t = try $ do    open <- pSatisfy (matchTagOpen t []) -  contents <- manyTill pAnyTag (pSatisfy (matchTagClose t)) +  contents <- manyTill pAny (pSatisfy (matchTagClose t))    return $ renderTags' $ [open] <> contents <> [TagClose t]  -- Sets chapter context  eSection :: PandocMonad m => TagParser m Blocks  eSection = try $ do -  let matchChapter as = maybe False (T.isInfixOf "chapter") (lookup "type" as) +  let matchChapter as = maybe False (T.isInfixOf "chapter") +                        (lookup "type" as <|> lookup "epub:type" as)    let sectTag = tagOpen (`elem` sectioningContent) matchChapter    TagOpen tag _ <- lookAhead $ pSatisfy sectTag    setInChapter (pInTags tag block) @@ -439,7 +449,8 @@ headerLevel tagtype =  eTitlePage :: PandocMonad m => TagParser m ()  eTitlePage = try $ do -  let isTitlePage as = maybe False (T.isInfixOf "titlepage") (lookup "type" as) +  let isTitlePage as = maybe False (T.isInfixOf "titlepage") +       (lookup "type" as <|> lookup "epub:type" as)    let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section")                            isTitlePage    TagOpen tag _ <- lookAhead $ pSatisfy groupTag @@ -605,7 +616,7 @@ pCodeBlock :: PandocMonad m => TagParser m Blocks  pCodeBlock = try $ do    TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])    let attr = toStringAttr attr' -  contents <- manyTill pAnyTag (pCloses "pre" <|> eof) +  contents <- manyTill pAny (pCloses "pre" <|> eof)    let rawText = concatMap tagToString contents    -- drop leading newline if any    let result' = case rawText of @@ -658,8 +669,8 @@ pSat f = do  pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)  pSatisfy f = try $ optional pLocation >> pSat f -pAnyTag :: PandocMonad m => TagParser m (Tag Text) -pAnyTag = pSatisfy (const True) +pAny :: PandocMonad m => TagParser m (Tag Text) +pAny = pSatisfy (const True)  pSelfClosing :: PandocMonad m               => (Text -> Bool) -> ([Attribute Text] -> Bool) @@ -766,7 +777,7 @@ pCode :: PandocMonad m => TagParser m Inlines  pCode = try $ do    (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)    let attr = toStringAttr attr' -  result <- manyTill pAnyTag (pCloses open) +  result <- manyTill pAny (pCloses open)    return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $             innerText result @@ -813,7 +824,7 @@ pScriptMath = try $ do                        -> return $ "display" `T.isSuffixOf` x                      _ -> mzero    contents <- T.unpack . innerText <$> -                manyTill pAnyTag (pSatisfy (matchTagClose "script")) +                manyTill pAny (pSatisfy (matchTagClose "script"))    return $ (if isdisplay then B.displayMath else B.math) contents  pMath :: PandocMonad m => Bool -> TagParser m Inlines @@ -824,7 +835,7 @@ pMath inCase = try $ do    let attr = toStringAttr attr'    unless inCase $      guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) -  contents <- manyTill pAnyTag (pSatisfy (matchTagClose "math")) +  contents <- manyTill pAny (pSatisfy (matchTagClose "math"))    case mathMLToTeXMath (T.unpack $ renderTags $            [open] <> contents <> [TagClose "math"]) of         Left _   -> return $ B.spanWith ("",["math"],attr) $ B.text $ @@ -867,7 +878,7 @@ pCloses :: PandocMonad m => Text -> TagParser m ()  pCloses tagtype = try $ do    t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag    case t of -       (TagClose t') | t' == tagtype -> void pAnyTag +       (TagClose t') | t' == tagtype -> void pAny         (TagOpen t' _) | t' `closes` tagtype -> return ()         (TagClose "ul") | tagtype == "li" -> return ()         (TagClose "ol") | tagtype == "li" -> return () diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index b0f6adecc..062ab19ed 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -793,13 +793,8 @@ pandocToEPUB version opts doc = do                                     [("id","toc")]) $                      [ unode "h1" ! [("id","toc-title")] $ tocTitle                      , unode "ol" ! [("class","toc")] $ tocBlocks ]] -  let landmarks = if epub3 -                     then [RawBlock (Format "html") $ ppElement $ -                            unode "nav" ! [("epub:type","landmarks") -                                          ,("id","landmarks") -                                          ,("hidden","hidden")] $ -                            [ unode "ol" $ -                              [ unode "li" +  let landmarkItems = if epub3 +                         then [ unode "li"                                  [ unode "a" ! [("href", "text/cover.xhtml")                                                ,("epub:type", "cover")] $                                    "Cover"] | @@ -811,9 +806,15 @@ pandocToEPUB version opts doc = do                                      "Table of contents"                                  ] | writerTableOfContents opts                                ] -                            ] +                         else [] +  let landmarks = if null landmarkItems +                     then [] +                     else [RawBlock (Format "html") $ ppElement $ +                            unode "nav" ! [("epub:type","landmarks") +                                          ,("id","landmarks") +                                          ,("hidden","hidden")] $ +                            [ unode "ol" landmarkItems ]                            ] -                     else []    navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):                       cssvars False ++ vars }              (Pandoc (setMeta "title" diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 2c386b465..ca44583ab 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -65,18 +65,21 @@ import Text.Blaze.Internal (preEscapedString, preEscapedText)  #endif  #if MIN_VERSION_blaze_html(0,5,1)  import qualified Text.Blaze.XHtml5 as H5 +import qualified Text.Blaze.XHtml5.Attributes as A5  #else  import qualified Text.Blaze.Html5 as H5 +import qualified Text.Blaze.Html5.Attributes as A5  #endif  import Control.Monad.Except (throwError)  import Data.Aeson (Value) -import System.FilePath (takeBaseName, takeExtension) +import System.FilePath (takeBaseName)  import Text.Blaze.Html.Renderer.Text (renderHtml)  import qualified Text.Blaze.XHtml1.Transitional as H  import qualified Text.Blaze.XHtml1.Transitional.Attributes as A  import Text.Pandoc.Class (PandocMonad, report, runPure)  import Text.Pandoc.Error  import Text.Pandoc.Logging +import Text.Pandoc.MIME (mediaCategory)  import Text.TeXMath  import Text.XML.Light (elChildren, unode, unqual)  import qualified Text.XML.Light as XML @@ -665,23 +668,11 @@ dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height                 (Just x)         -> [("style", show dir ++ ":" ++ show x)]                 Nothing          -> [] -imageExts :: [String] -imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", -              "gif", "ico", "ief", "jng", "jpg", "jpeg", "nef", "orf", "pat", "pbm", -              "pcx", "pgm", "png", "pnm", "ppm", "psd", "ras", "rgb", "svg", "tiff", -              "wbmp", "xbm", "xpm", "xwd" ] - -treatAsImage :: FilePath -> Bool -treatAsImage fp = -  let path = maybe fp uriPath (parseURIReference fp) -      ext  = map toLower $ drop 1 $ takeExtension path -  in  null ext || ext `elem` imageExts -  figure :: PandocMonad m         => WriterOptions -> Attr -> [Inline] -> (String, String)         -> StateT WriterState m Html  figure opts attr txt (s,tit) = do -  img <- inlineToHtml opts (Image attr txt (s,tit)) +  img <- inlineToHtml opts (Image attr [Str ""] (s,tit))    html5 <- gets stHtml5    let tocapt = if html5                    then H5.figcaption @@ -1135,8 +1126,8 @@ inlineToHtml opts inline = do                          return $ if null tit                                      then link'                                      else link' ! A.title (toValue tit) -    (Image attr txt (s,tit)) | treatAsImage s -> do -                        let alternate' = stringify txt +    (Image attr txt (s,tit)) -> do +                        let alternate = stringify txt                          slideVariant <- gets stSlideVariant                          let isReveal = slideVariant == RevealJsSlides                          attrs <- imgAttrsToHtml opts attr @@ -1146,22 +1137,23 @@ inlineToHtml opts inline = do                                    then customAttribute "data-src" $ toValue s                                    else A.src $ toValue s) :                                [A.title $ toValue tit | not (null tit)] ++ -                              [A.alt $ toValue alternate' | not (null txt)] ++ -                              attrs -                        let tag = if html5 then H5.img else H.img -                        return $ foldl (!) tag attributes -                        -- note:  null title included, as in Markdown.pl -    (Image attr _ (s,tit)) -> do -                        slideVariant <- gets stSlideVariant -                        let isReveal = slideVariant == RevealJsSlides -                        attrs <- imgAttrsToHtml opts attr -                        let attributes = -                              (if isReveal -                                  then customAttribute "data-src" $ toValue s -                                  else A.src $ toValue s) : -                              [A.title $ toValue tit | not (null tit)] ++                                attrs -                        return $ foldl (!) H5.embed attributes +                            imageTag = (if html5 then H5.img else H.img +                              , [A.alt $ toValue alternate | not (null txt)] ) +                            mediaTag tg fallbackTxt = +                              let linkTxt = if null txt +                                            then fallbackTxt +                                            else alternate +                              in (tg $ H.a ! A.href (toValue s) $ toHtml linkTxt +                                 , [A5.controls ""] ) +                            normSrc = maybe s uriPath (parseURIReference s) +                            (tag, specAttrs) = case mediaCategory normSrc of +                              Just "image" -> imageTag +                              Just "video" -> mediaTag H5.video "Video" +                              Just "audio" -> mediaTag H5.audio "Audio" +                              Just _       -> (H5.embed, []) +                              _            -> imageTag +                        return $ foldl (!) tag $ attributes ++ specAttrs                          -- note:  null title included, as in Markdown.pl      (Note contents) -> do                          notes <- gets stNotes | 
