From fce0a60f0a85d6c3a9e7633074ecd781af08c75b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 25 Jan 2017 21:51:26 +0100 Subject: Provide explicit separate functions for HTML 4 and 5. * Text.Pandoc.Writers.HTML: removed writeHtml, writeHtmlString, added writeHtml4, writeHtml4String, writeHtml5, writeHtml5String. * Removed writerHtml5 from WriterOptions. * Renamed default.html template to default.html4. * "html" now aliases to "html5"; to get the old HTML4 behavior, you must now specify "-t html4". --- pandoc.cabal | 8 +- pandoc.hs | 10 +- src/Text/Pandoc.hs | 27 +- src/Text/Pandoc/Options.hs | 2 - src/Text/Pandoc/Templates.hs | 1 + src/Text/Pandoc/Writers/CommonMark.hs | 4 +- src/Text/Pandoc/Writers/EPUB.hs | 11 +- src/Text/Pandoc/Writers/HTML.hs | 117 +++++--- src/Text/Pandoc/Writers/Markdown.hs | 8 +- src/Text/Pandoc/Writers/OPML.hs | 4 +- tests/Tests/Old.hs | 7 +- tests/Tests/Writers/HTML.hs | 2 +- tests/lhs-test.html | 13 +- tests/lhs-test.html+lhs | 13 +- tests/tables.html | 204 ------------- tests/tables.html4 | 204 +++++++++++++ tests/tables.html5 | 204 +++++++++++++ tests/writer.html | 546 --------------------------------- tests/writer.html4 | 546 +++++++++++++++++++++++++++++++++ tests/writer.html5 | 548 ++++++++++++++++++++++++++++++++++ 20 files changed, 1633 insertions(+), 846 deletions(-) delete mode 100644 tests/tables.html create mode 100644 tests/tables.html4 create mode 100644 tests/tables.html5 delete mode 100644 tests/writer.html create mode 100644 tests/writer.html4 create mode 100644 tests/writer.html5 diff --git a/pandoc.cabal b/pandoc.cabal index 7cb292b05..97e70c830 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -36,7 +36,7 @@ Description: Pandoc is a Haskell library for converting from one markup only adding a reader or writer. Data-Files: -- templates - data/templates/default.html + data/templates/default.html4 data/templates/default.html5 data/templates/default.docbook data/templates/default.docbook5 @@ -150,7 +150,8 @@ Extra-Source-Files: tests/tables.dokuwiki tests/tables.zimwiki tests/tables.icml - tests/tables.html + tests/tables.html4 + tests/tables.html5 tests/tables.latex tests/tables.man tests/tables.plain @@ -172,7 +173,8 @@ Extra-Source-Files: tests/writer.context tests/writer.docbook tests/writer.docbook5 - tests/writer.html + tests/writer.html4 + tests/writer.html5 tests/writer.man tests/writer.markdown tests/writer.plain diff --git a/pandoc.hs b/pandoc.hs index c6faa9edf..9ee6e376b 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -111,7 +111,6 @@ convertWithOpts opts args = do , optSectionDivs = sectionDivs , optIncremental = incremental , optSelfContained = selfContained - , optHtml5 = html5 , optHtmlQTags = htmlQTags , optHighlightStyle = highlightStyle , optTopLevelDivision = topLevelDivision @@ -188,13 +187,11 @@ convertWithOpts opts args = do (if any isURI sources then "html" else "markdown") sources - "html4" -> "html" x -> x let writerName' = case map toLower writerName of [] -> defaultWriterName outputFile "epub2" -> "epub" - "html4" -> "html" x -> x let format = takeWhile (`notElem` ['+','-']) $ takeFileName writerName' -- in case path to lua script @@ -203,7 +200,7 @@ convertWithOpts opts args = do let laTeXOutput = format `elem` ["latex", "beamer"] let conTeXtOutput = format == "context" - let html5Output = format == "html5" + let html5Output = format == "html5" || format == "html" -- disabling the custom writer for now writer <- if ".lua" `isSuffixOf` format @@ -313,7 +310,6 @@ convertWithOpts opts args = do writerIdentifierPrefix = idPrefix, writerSourceURL = sourceURL, writerUserDataDir = datadir, - writerHtml5 = html5, writerHtmlQTags = htmlQTags, writerTopLevelDivision = topLevelDivision, writerListings = listings, @@ -413,7 +409,7 @@ convertWithOpts opts args = do err 43 "Error producing PDF" | otherwise -> do let htmlFormat = format `elem` - ["html","html5","s5","slidy","slideous","dzslides","revealjs"] + ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] selfcontain = if selfContained && htmlFormat then makeSelfContained writerOptions media else return @@ -523,7 +519,6 @@ data Opt = Opt , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 , optSelfContained :: Bool -- ^ Make HTML accessible offline - , optHtml5 :: Bool -- ^ Produce HTML5 in HTML , optHtmlQTags :: Bool -- ^ Use tags in HTML , optHighlightStyle :: Maybe Style -- ^ Style to use for highlighted code , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions @@ -583,7 +578,6 @@ defaultOpts = Opt , optSectionDivs = False , optIncremental = False , optSelfContained = False - , optHtml5 = False , optHtmlQTags = False , optHighlightStyle = Just pygments , optTopLevelDivision = TopLevelDefault diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index f9e032f4f..aa4cab840 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -99,8 +99,10 @@ module Text.Pandoc , writeLaTeX , writeConTeXt , writeTexinfo - , writeHtml - , writeHtmlString + , writeHtml4 + , writeHtml4String + , writeHtml5 + , writeHtml5String , writeICML , writeDocbook , writeOPML @@ -281,23 +283,21 @@ writers = [ ,("epub3" , ByteStringWriter $ \o -> writeEPUB o{ writerEpubVersion = Just EPUB3 }) ,("fb2" , StringWriter writeFB2) - ,("html" , StringWriter writeHtmlString) - ,("html5" , StringWriter $ \o -> - writeHtmlString o{ writerHtml5 = True }) + ,("html" , StringWriter writeHtml5String) + ,("html4" , StringWriter writeHtml4String) + ,("html5" , StringWriter writeHtml5String) ,("icml" , StringWriter writeICML) ,("s5" , StringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = S5Slides - , writerTableOfContents = False }) + writeHtml4String o{ writerSlideVariant = S5Slides + , writerTableOfContents = False }) ,("slidy" , StringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = SlidySlides }) + writeHtml4String o{ writerSlideVariant = SlidySlides }) ,("slideous" , StringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = SlideousSlides }) + writeHtml4String o{ writerSlideVariant = SlideousSlides }) ,("dzslides" , StringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = DZSlides - , writerHtml5 = True }) + writeHtml5String o{ writerSlideVariant = DZSlides }) ,("revealjs" , StringWriter $ \o -> - writeHtmlString o{ writerSlideVariant = RevealJsSlides - , writerHtml5 = True }) + writeHtml5String o{ writerSlideVariant = RevealJsSlides }) ,("docbook" , StringWriter writeDocbook) ,("docbook5" , StringWriter $ \o -> writeDocbook o{ writerDocbook5 = True }) @@ -342,6 +342,7 @@ getDefaultExtensions "html" = extensionsFromList [Ext_auto_identifiers, Ext_native_divs, Ext_native_spans] +getDefaultExtensions "html4" = getDefaultExtensions "html" getDefaultExtensions "html5" = getDefaultExtensions "html" getDefaultExtensions "epub" = extensionsFromList [Ext_raw_html, diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index cd525a3c1..6cb2d883a 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -168,7 +168,6 @@ data WriterOptions = WriterOptions , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites , writerDocbook5 :: Bool -- ^ Produce DocBook5 - , writerHtml5 :: Bool -- ^ Produce HTML5 , writerHtmlQTags :: Bool -- ^ Use @@ tags for quotes in HTML , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show , writerSlideLevel :: Maybe Int -- ^ Force header level of slides @@ -210,7 +209,6 @@ instance Default WriterOptions where , writerUserDataDir = Nothing , writerCiteMethod = Citeproc , writerDocbook5 = False - , writerHtml5 = False , writerHtmlQTags = False , writerBeamer = False , writerSlideLevel = Nothing diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index d15d27438..03dc917e6 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -60,6 +60,7 @@ getDefaultTemplate user writer = do "docx" -> return $ Right "" "fb2" -> return $ Right "" "odt" -> getDefaultTemplate user "opendocument" + "html" -> getDefaultTemplate user "html5" "markdown_strict" -> getDefaultTemplate user "markdown" "multimarkdown" -> getDefaultTemplate user "markdown" "markdown_github" -> getDefaultTemplate user "markdown" diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index c58e83f19..b83f6785d 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -31,7 +31,7 @@ CommonMark: -} module Text.Pandoc.Writers.CommonMark (writeCommonMark) where -import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Definition import Text.Pandoc.Shared (isTightList, linesToPara) import Text.Pandoc.Templates (renderTemplate') @@ -138,7 +138,7 @@ blockToNodes (DefinitionList items) ns = blockToNodes (BulletList items') ns dlToBullet (term, xs) = Para term : concat xs blockToNodes t@(Table _ _ _ _ _) ns = do - s <- writeHtmlString def $! Pandoc nullMeta [t] + s <- writeHtml5String def $! Pandoc nullMeta [t] return (node (HTML_BLOCK (T.pack $! s)) [] : ns) blockToNodes Null ns = return ns diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index d6c3ff533..bd95c170e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -59,7 +59,7 @@ import Control.Monad (mplus, when, zipWithM) import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) -import Text.Pandoc.Writers.HTML ( writeHtml ) +import Text.Pandoc.Writers.HTML ( writeHtml4, writeHtml5 ) import Data.Char ( toLower, isDigit, isAlphaNum ) import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) @@ -361,13 +361,15 @@ pandocToEPUB opts doc@(Pandoc meta _) = do : writerVariables opts let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerSectionDivs = True - , writerHtml5 = epub3 , writerVariables = vars , writerHTMLMathMethod = if epub3 then MathML Nothing else writerHTMLMathMethod opts , writerWrapText = WrapAuto } + let writeHtml = if epub3 + then writeHtml5 + else writeHtml4 metadata <- getEPUBMetadata opts' meta -- cover page @@ -376,7 +378,7 @@ pandocToEPUB opts doc@(Pandoc meta _) = do Nothing -> return ([],[]) Just img -> do let coverImage = "media/" ++ takeFileName img - cpContent <- renderHtml <$> (lift $ writeHtml + cpContent <- renderHtml <$> (lift $ writeHtml opts'{ writerVariables = ("coverpage","true"):vars } (Pandoc meta [RawBlock (Format "html") $ "
\n\"cover\n
"])) imgContent <- lift $ P.readFileLazy img @@ -484,8 +486,7 @@ pandocToEPUB opts doc@(Pandoc meta _) = do Chapter mbnum $ walk fixInternalReferences bs) chapters' - let chapToEntry :: PandocMonad m => Int -> Chapter -> m Entry - chapToEntry num (Chapter mbnum bs) = + let chapToEntry num (Chapter mbnum bs) = (mkEntry (showChapter num) . renderHtml) <$> (writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum } diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index c6d7b7f6a..ee1f260b6 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -28,7 +28,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to HTML. -} -module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where +module Text.Pandoc.Writers.HTML ( + writeHtml4, writeHtml4String, + writeHtml5, writeHtml5String ) where import Text.Pandoc.Definition import Text.Pandoc.Walk import Data.Monoid ((<>)) @@ -80,12 +82,13 @@ data WriterState = WriterState , stHighlighting :: Bool -- ^ Syntax highlighting is used , stSecNum :: [Int] -- ^ Number of current section , stElement :: Bool -- ^ Processing an Element + , stHtml5 :: Bool -- ^ Use HTML5 } defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, stHighlighting = False, stSecNum = [], - stElement = False} + stElement = False, stHtml5 = False} -- Helpers to render HTML with the appropriate function. @@ -102,19 +105,35 @@ nl opts = if writerWrapText opts == WrapNone then mempty else preEscapedString "\n" --- | Convert Pandoc document to Html string. -writeHtmlString :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeHtmlString opts d = do - (body, context) <- evalStateT (pandocToHtml opts d) defaultWriterState +-- | Convert Pandoc document to Html 5 string. +writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeHtml5String = writeHtmlString' True + +-- | Convert Pandoc document to Html 5 structure. +writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html +writeHtml5 = writeHtml' True + +-- | Convert Pandoc document to Html 4 string. +writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeHtml4String = writeHtmlString' False + +-- | Convert Pandoc document to Html 4 structure. +writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html +writeHtml4 = writeHtml' False + +writeHtmlString' :: PandocMonad m => Bool -> WriterOptions -> Pandoc -> m String +writeHtmlString' html5 opts d = do + (body, context) <- evalStateT (pandocToHtml opts d) + defaultWriterState{ stHtml5 = html5 } return $ case writerTemplate opts of Nothing -> renderHtml body Just tpl -> renderTemplate' tpl $ defField "body" (renderHtml body) context --- | Convert Pandoc document to Html structure. -writeHtml :: PandocMonad m => WriterOptions -> Pandoc -> m Html -writeHtml opts d = do - (body, context) <- evalStateT (pandocToHtml opts d) defaultWriterState +writeHtml' :: PandocMonad m => Bool -> WriterOptions -> Pandoc -> m Html +writeHtml' html5 opts d = do + (body, context) <- evalStateT (pandocToHtml opts d) + defaultWriterState{ stHtml5 = html5 } return $ case writerTemplate opts of Nothing -> body Just tpl -> renderTemplate' tpl $ @@ -144,8 +163,8 @@ pandocToHtml opts (Pandoc meta blocks) = do blocks' <- liftM (mconcat . intersperse (nl opts)) $ mapM (elementToHtml slideLevel opts) sects st <- get - let notes = reverse (stNotes st) - let thebody = blocks' >> footnoteSection opts notes + notes <- footnoteSection opts (reverse (stNotes st)) + let thebody = blocks' >> notes let math = case writerHTMLMathMethod opts of LaTeXMathML (Just url) -> H.script ! A.src (toValue url) @@ -172,7 +191,7 @@ pandocToHtml opts (Pandoc meta blocks) = do (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) _ -> case lookup "mathml-script" (writerVariables opts) of - Just s | not (writerHtml5 opts) -> + Just s | not (stHtml5 st) -> H.script ! A.type_ "text/javascript" $ preEscapedString ("/**/\n") @@ -199,7 +218,7 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "slideous-url" ("slideous" :: String) $ defField "revealjs-url" ("reveal.js" :: String) $ defField "s5-url" ("s5/default" :: String) $ - defField "html5" (writerHtml5 opts) $ + defField "html5" (stHtml5 st) $ metadata return (thebody, context) @@ -277,6 +296,7 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen let slide = writerSlideVariant opts /= NoSlides && level <= slideLevel let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0) modify $ \st -> st{stSecNum = num'} -- update section number + html5 <- gets stHtml5 let titleSlide = slide && level < slideLevel header' <- if title' == [Str "\0"] -- marker for hrule then return mempty @@ -307,10 +327,10 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && - not (writerHtml5 opts) ] ++ + not html5 ] ++ ["level" ++ show level | slide || writerSectionDivs opts ] ++ classes - let secttag = if writerHtml5 opts + let secttag = if html5 then H5.section else H.div let attr = (id',classes',keyvals) @@ -327,19 +347,22 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen -- | Convert list of Note blocks to a footnote
. -- Assumes notes are sorted. -footnoteSection :: WriterOptions -> [Html] -> Html -footnoteSection opts notes = - if null notes - then mempty - else nl opts >> (container - $ nl opts >> hrtag >> nl opts >> - H.ol (mconcat notes >> nl opts) >> nl opts) - where container x = if writerHtml5 opts - then H5.section ! A.class_ "footnotes" $ x - else if writerSlideVariant opts /= NoSlides - then H.div ! A.class_ "footnotes slide" $ x - else H.div ! A.class_ "footnotes" $ x - hrtag = if writerHtml5 opts then H5.hr else H.hr +footnoteSection :: PandocMonad m + => WriterOptions -> [Html] -> StateT WriterState m Html +footnoteSection opts notes = do + html5 <- gets stHtml5 + let hrtag = if html5 then H5.hr else H.hr + let container x = if html5 + then H5.section ! A.class_ "footnotes" $ x + else if writerSlideVariant opts /= NoSlides + then H.div ! A.class_ "footnotes slide" $ x + else H.div ! A.class_ "footnotes" $ x + return $ + if null notes + then mempty + else nl opts >> (container + $ nl opts >> hrtag >> nl opts >> + H.ol (mconcat notes >> nl opts) >> nl opts) -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: String -> Maybe (String, String) @@ -448,13 +471,14 @@ blockToHtml opts (Plain lst) = inlineListToHtml opts lst -- title beginning with fig: indicates that the image is a figure blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do img <- inlineToHtml opts (Image attr txt (s,tit)) - let tocapt = if writerHtml5 opts + html5 <- gets stHtml5 + let tocapt = if html5 then H5.figcaption else H.p ! A.class_ "caption" capt <- if null txt then return mempty else tocapt `fmap` inlineListToHtml opts txt - return $ if writerHtml5 opts + return $ if html5 then H5.figure $ mconcat [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat @@ -475,12 +499,13 @@ blockToHtml opts (LineBlock lns) = htmlLines <- mconcat . intersperse lf <$> mapM (inlineListToHtml opts) lns return $ H.div ! A.style "white-space: pre-line;" $ htmlLines blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do + html5 <- gets stHtml5 let speakerNotes = "notes" `elem` classes -- we don't want incremental output inside speaker notes, see #1394 let opts' = if speakerNotes then opts{ writerIncremental = False } else opts contents <- blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts - let (divtag, classes') = if writerHtml5 opts && "section" `elem` classes + let (divtag, classes') = if html5 && "section" `elem` classes then (H5.section, filter (/= "section") classes) else (H.div, classes) return $ @@ -498,7 +523,9 @@ blockToHtml opts (RawBlock f str) allowsMathEnvironments (writerHTMLMathMethod opts) && isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str] | otherwise = return mempty -blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr +blockToHtml _ (HorizontalRule) = do + html5 <- gets stHtml5 + return $ if html5 then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do let tolhs = isEnabled Ext_literate_haskell opts && any (\c -> map toLower c == "haskell") classes && @@ -564,6 +591,7 @@ blockToHtml opts (BulletList lst) = do return $ unordList opts contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst + html5 <- gets stHtml5 let numstyle' = case numstyle of Example -> "decimal" _ -> camelCaseToHyphenated $ show numstyle @@ -574,7 +602,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do then [A.class_ "example"] else []) ++ (if numstyle /= DefaultStyle - then if writerHtml5 opts + then if html5 then [A.type_ $ case numstyle of Decimal -> "1" @@ -603,6 +631,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do else do cs <- inlineListToHtml opts capt return $ H.caption cs >> nl opts + html5 <- gets stHtml5 let percent w = show (truncate (100*w) :: Integer) ++ "%" let coltags = if all (== 0.0) widths then mempty @@ -610,7 +639,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do H.colgroup $ do nl opts mapM_ (\w -> do - if writerHtml5 opts + if html5 then H.col ! A.style (toValue $ "width: " ++ percent w) else H.col ! A.width (toValue $ percent w) @@ -666,8 +695,9 @@ tableItemToHtml :: PandocMonad m -> StateT WriterState m Html tableItemToHtml opts tag' align' item = do contents <- blockListToHtml opts item + html5 <- gets stHtml5 let alignStr = alignmentToString align' - let attribs = if writerHtml5 opts + let attribs = if html5 then A.style (toValue $ "text-align: " ++ alignStr ++ ";") else A.align (toValue alignStr) let tag'' = if null alignStr @@ -707,7 +737,8 @@ annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, -- | Convert Pandoc inline element to HTML. inlineToHtml :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Html -inlineToHtml opts inline = +inlineToHtml opts inline = do + html5 <- gets stHtml5 case inline of (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " @@ -715,7 +746,7 @@ inlineToHtml opts inline = WrapNone -> preEscapedString " " WrapAuto -> preEscapedString " " WrapPreserve -> preEscapedString "\n" - (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br) + (LineBreak) -> return $ (if html5 then H5.br else H.br) <> strToHtml "\n" (Span (id',classes,kvs) ils) -> inlineListToHtml opts ils >>= @@ -784,12 +815,12 @@ inlineToHtml opts inline = InlineMath -> H.span ! A.class_ mathClass $ m DisplayMath -> H.div ! A.class_ mathClass $ m WebTeX url -> do - let imtag = if writerHtml5 opts then H5.img else H.img + let imtag = if html5 then H5.img else H.img let m = imtag ! A.style "vertical-align:middle" ! A.src (toValue $ url ++ urlEncode str) ! A.alt (toValue str) ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if html5 then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag @@ -817,7 +848,7 @@ inlineToHtml opts inline = PlainMath -> do x <- lift (texMathToInlines t str) >>= inlineListToHtml opts let m = H.span ! A.class_ mathClass $ x - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if html5 then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag @@ -847,7 +878,7 @@ inlineToHtml opts inline = [A.title $ toValue tit | not (null tit)] ++ [A.alt $ toValue alternate' | not (null txt)] ++ imgAttrsToHtml opts attr - let tag = if writerHtml5 opts then H5.img else H.img + let tag = if html5 then H5.img else H.img return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image attr _ (s,tit)) -> do @@ -880,7 +911,7 @@ inlineToHtml opts inline = (Cite cits il)-> do contents <- inlineListToHtml opts il let citationIds = unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents - return $ if writerHtml5 opts + return $ if html5 then result ! customAttribute "data-cites" (toValue citationIds) else result diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 8de09864a..e965528cc 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -47,7 +47,7 @@ import Text.Pandoc.Pretty import Control.Monad.Reader import Control.Monad.State import Control.Monad.Except (throwError) -import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Writers.Math (texMathToInlines) import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) import Network.URI (isURI) @@ -536,7 +536,7 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do rawHeaders rawRows | isEnabled Ext_raw_html opts -> fmap (id,) $ text <$> - (writeHtmlString def $ Pandoc nullMeta [t]) + (writeHtml5String def $ Pandoc nullMeta [t]) | otherwise -> return $ (id, text "[TABLE]") return $ nst $ tbl $$ blankline $$ caption'' $$ blankline blockToMarkdown' opts (BulletList items) = do @@ -1072,7 +1072,7 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . trim) <$> writeHtmlString def (Pandoc nullMeta [Plain [lnk]]) + (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt @@ -1111,7 +1111,7 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . trim) <$> writeHtmlString def (Pandoc nullMeta [Plain [img]]) + (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain let txt = if null alternate || alternate == [Str source] diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 38c96589a..bc0cfc300 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Writers.Markdown (writeMarkdown) import Text.Pandoc.Pretty import Text.Pandoc.Compat.Time @@ -65,7 +65,7 @@ writeOPML opts (Pandoc meta blocks) = do writeHtmlInlines :: PandocMonad m => [Inline] -> m String writeHtmlInlines ils = - trim <$> (writeHtmlString def $ Pandoc nullMeta [Plain ils]) + trim <$> (writeHtml5String def $ Pandoc nullMeta [Plain ils]) -- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT showDateTimeRFC822 :: UTCTime -> String diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 21e00b033..a46ac2260 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -81,16 +81,17 @@ tests = [ testGroup "markdown" ] ] , testGroup "html" - [ testGroup "writer" (writerTests "html" ++ lhsWriterTests "html") + [ testGroup "writer" (writerTests "html4" ++ writerTests "html5" ++ + lhsWriterTests "html") , test "reader" ["-r", "html", "-w", "native", "-s"] "html-reader.html" "html-reader.native" ] , testGroup "s5" [ s5WriterTest "basic" ["-s"] "s5" , s5WriterTest "fancy" ["-s","-m","-i"] "s5" - , s5WriterTest "fragment" [] "html" + , s5WriterTest "fragment" [] "html4" , s5WriterTest "inserts" ["-s", "-H", "insert", - "-B", "insert", "-A", "insert", "-c", "main.css"] "html" + "-B", "insert", "-A", "insert", "-c", "main.css"] "html4" ] , testGroup "textile" [ testGroup "writer" $ writerTests "textile" diff --git a/tests/Tests/Writers/HTML.hs b/tests/Tests/Writers/HTML.hs index d99698c21..45de2b042 100644 --- a/tests/Tests/Writers/HTML.hs +++ b/tests/Tests/Writers/HTML.hs @@ -8,7 +8,7 @@ import Tests.Helpers import Text.Pandoc.Arbitrary() html :: (ToPandoc a) => a -> String -html = purely (writeHtmlString def{ writerWrapText = WrapNone }) . toPandoc +html = purely (writeHtml4String def{ writerWrapText = WrapNone }) . toPandoc {- "my test" =: X =?> Y diff --git a/tests/lhs-test.html b/tests/lhs-test.html index e4a5b3868..2c3b6b0f8 100644 --- a/tests/lhs-test.html +++ b/tests/lhs-test.html @@ -1,9 +1,9 @@ - - + + - - - + + + +

lhs test

diff --git a/tests/lhs-test.html+lhs b/tests/lhs-test.html+lhs index 41e9ca283..443b0642f 100644 --- a/tests/lhs-test.html+lhs +++ b/tests/lhs-test.html+lhs @@ -1,9 +1,9 @@ - - + + - - - + + + +

lhs test

diff --git a/tests/tables.html b/tests/tables.html deleted file mode 100644 index 5bb7a7de2..000000000 --- a/tests/tables.html +++ /dev/null @@ -1,204 +0,0 @@ -

Simple table with caption:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
-

Simple table without caption:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
RightLeftCenterDefault
12121212
123123123123
1111
-

Simple table indented two spaces:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
-

Multiline table with caption:

- - ------ - - - - - - - - - - - - - - - - - - - - - - -
Here’s the caption. It may span multiple lines.
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
-

Multiline table without caption:

- ------ - - - - - - - - - - - - - - - - - - - - - - -
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
-

Table without column headers:

- - - - - - - - - - - - - - - - - - - - - -
12121212
123123123123
1111
-

Multiline table without column headers:

- ------ - - - - - - - - - - - - - - -
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
diff --git a/tests/tables.html4 b/tests/tables.html4 new file mode 100644 index 000000000..5bb7a7de2 --- /dev/null +++ b/tests/tables.html4 @@ -0,0 +1,204 @@ +

Simple table with caption:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
+

Simple table without caption:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RightLeftCenterDefault
12121212
123123123123
1111
+

Simple table indented two spaces:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
+

Multiline table with caption:

+ + ++++++ + + + + + + + + + + + + + + + + + + + + + + +
Here’s the caption. It may span multiple lines.
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
+

Multiline table without caption:

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + +
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
+

Table without column headers:

+ + + + + + + + + + + + + + + + + + + + + +
12121212
123123123123
1111
+

Multiline table without column headers:

+ ++++++ + + + + + + + + + + + + + + +
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
diff --git a/tests/tables.html5 b/tests/tables.html5 new file mode 100644 index 000000000..17a82110f --- /dev/null +++ b/tests/tables.html5 @@ -0,0 +1,204 @@ +

Simple table with caption:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
+

Simple table without caption:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
RightLeftCenterDefault
12121212
123123123123
1111
+

Simple table indented two spaces:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
+

Multiline table with caption:

+ + ++++++ + + + + + + + + + + + + + + + + + + + + + + +
Here’s the caption. It may span multiple lines.
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
+

Multiline table without caption:

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + +
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
+

Table without column headers:

+ + + + + + + + + + + + + + + + + + + + + +
12121212
123123123123
1111
+

Multiline table without column headers:

+ ++++++ + + + + + + + + + + + + + + +
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here’s another one. Note the blank line between rows.
diff --git a/tests/writer.html b/tests/writer.html deleted file mode 100644 index 3b63f4e16..000000000 --- a/tests/writer.html +++ /dev/null @@ -1,546 +0,0 @@ - - - - - - - - - - Pandoc Test Suite - - - - -

This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.

-
-

Headers

- -

Level 3 with emphasis

-

Level 4

-
Level 5
-

Level 1

-

Level 2 with emphasis

-

Level 3

-

with no blank line

-

Level 2

-

with no blank line

-
-

Paragraphs

-

Here’s a regular paragraph.

-

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

-

Here’s one with a bullet. * criminey.

-

There should be a hard line break
-here.

-
-

Block Quotes

-

E-mail style:

-
-

This is a block quote. It is pretty short.

-
-
-

Code in a block quote:

-
sub status {
-    print "working";
-}
-

A list:

-
    -
  1. item one
  2. -
  3. item two
  4. -
-

Nested block quotes:

-
-

nested

-
-
-

nested

-
-
-

This should not be a block quote: 2 > 1.

-

And a following paragraph.

-
-

Code Blocks

-

Code:

-
---- (should be four hyphens)
-
-sub status {
-    print "working";
-}
-
-this code block is indented by one tab
-

And:

-
    this code block is indented by two tabs
-
-These should not be escaped:  \$ \\ \> \[ \{
-
-

Lists

-

Unordered

-

Asterisks tight:

-
    -
  • asterisk 1
  • -
  • asterisk 2
  • -
  • asterisk 3
  • -
-

Asterisks loose:

-
    -
  • asterisk 1

  • -
  • asterisk 2

  • -
  • asterisk 3

  • -
-

Pluses tight:

-
    -
  • Plus 1
  • -
  • Plus 2
  • -
  • Plus 3
  • -
-

Pluses loose:

-
    -
  • Plus 1

  • -
  • Plus 2

  • -
  • Plus 3

  • -
-

Minuses tight:

-
    -
  • Minus 1
  • -
  • Minus 2
  • -
  • Minus 3
  • -
-

Minuses loose:

-
    -
  • Minus 1

  • -
  • Minus 2

  • -
  • Minus 3

  • -
-

Ordered

-

Tight:

-
    -
  1. First
  2. -
  3. Second
  4. -
  5. Third
  6. -
-

and:

-
    -
  1. One
  2. -
  3. Two
  4. -
  5. Three
  6. -
-

Loose using tabs:

-
    -
  1. First

  2. -
  3. Second

  4. -
  5. Third

  6. -
-

and using spaces:

-
    -
  1. One

  2. -
  3. Two

  4. -
  5. Three

  6. -
-

Multiple paragraphs:

-
    -
  1. Item 1, graf one.

    -

    Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.

  2. -
  3. Item 2.

  4. -
  5. Item 3.

  6. -
-

Nested

-
    -
  • Tab -
      -
    • Tab -
        -
      • Tab
      • -
    • -
  • -
-

Here’s another:

-
    -
  1. First
  2. -
  3. Second: -
      -
    • Fee
    • -
    • Fie
    • -
    • Foe
    • -
  4. -
  5. Third
  6. -
-

Same thing but with paragraphs:

-
    -
  1. First

  2. -
  3. Second:

    -
      -
    • Fee
    • -
    • Fie
    • -
    • Foe
    • -
  4. -
  5. Third

  6. -
-

Tabs and spaces

-
    -
  • this is a list item indented with tabs

  • -
  • this is a list item indented with spaces

    -
      -
    • this is an example list item indented with tabs

    • -
    • this is an example list item indented with spaces

    • -
  • -
-

Fancy list markers

-
    -
  1. begins with 2
  2. -
  3. and now 3

    -

    with a continuation

    -
      -
    1. sublist with roman numerals, starting with 4
    2. -
    3. more items -
        -
      1. a subsublist
      2. -
      3. a subsublist
      4. -
    4. -
  4. -
-

Nesting:

-
    -
  1. Upper Alpha -
      -
    1. Upper Roman. -
        -
      1. Decimal start with 6 -
          -
        1. Lower alpha with paren
        2. -
      2. -
    2. -
  2. -
-

Autonumbering:

-
    -
  1. Autonumber.
  2. -
  3. More. -
      -
    1. Nested.
    2. -
  4. -
-

Should not be a list item:

-

M.A. 2007

-

B. Williams

-
-

Definition Lists

-

Tight using spaces:

-
-
apple
-
red fruit -
-
orange
-
orange fruit -
-
banana
-
yellow fruit -
-
-

Tight using tabs:

-
-
apple
-
red fruit -
-
orange
-
orange fruit -
-
banana
-
yellow fruit -
-
-

Loose:

-
-
apple
-

red fruit

-
-
orange
-

orange fruit

-
-
banana
-

yellow fruit

-
-
-

Multiple blocks with italics:

-
-
apple
-

red fruit

-

contains seeds, crisp, pleasant to taste

-
-
orange
-

orange fruit

-
{ orange code block }
-
-

orange block quote

-
-
-
-

Multiple definitions, tight:

-
-
apple
-
red fruit -
-
computer -
-
orange
-
orange fruit -
-
bank -
-
-

Multiple definitions, loose:

-
-
apple
-

red fruit

-
-

computer

-
-
orange
-

orange fruit

-
-

bank

-
-
-

Blank line after term, indented marker, alternate markers:

-
-
apple
-

red fruit

-
-

computer

-
-
orange
-

orange fruit

-
    -
  1. sublist
  2. -
  3. sublist
  4. -
-
-
-

HTML Blocks

-

Simple block on one line:

-
-foo -
-

And nested without indentation:

-
-
-
-

foo

-
-
-
-bar -
-
-

Interpreted markdown in a table:

- - - - - -
-This is emphasized - -And this is strong -
- -

Here’s a simple block:

-
-

foo

-
-

This should be a code block, though:

-
<div>
-    foo
-</div>
-

As should this:

-
<div>foo</div>
-

Now, nested:

-
-
-
-foo -
-
-
-

This should just be an HTML comment:

- -

Multiline:

- - -

Code block:

-
<!-- Comment -->
-

Just plain comment, with trailing spaces on the line:

- -

Code:

-
<hr />
-

Hr’s:

-
-
-
-
-
-
-
-
-
-
-

Inline Markup

-

This is emphasized, and so is this.

-

This is strong, and so is this.

-

An emphasized link.

-

This is strong and em.

-

So is this word.

-

This is strong and em.

-

So is this word.

-

This is code: >, $, \, \$, <html>.

-

This is strikeout.

-

Superscripts: abcd ahello ahello there.

-

Subscripts: H2O, H23O, Hmany of themO.

-

These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d.

-
-

Smart quotes, ellipses, dashes

-

“Hello,” said the spider. “‘Shelob’ is my name.”

-

‘A’, ‘B’, and ‘C’ are letters.

-

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

-

‘He said, “I want to go.”’ Were you alive in the 70’s?

-

Here is some quoted ‘code’ and a “quoted link”.

-

Some dashes: one—two — three—four — five.

-

Dashes between numbers: 5–7, 255–66, 1987–1999.

-

Ellipses…and…and….

-
-

LaTeX

-
    -
  • -
  • 2 + 2 = 4
  • -
  • x ∈ y
  • -
  • α ∧ ω
  • -
  • 223
  • -
  • p-Tree
  • -
  • Here’s some display math:
    $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$
  • -
  • Here’s one that has a line break in it: α + ω × x2.
  • -
-

These shouldn’t be math:

-
    -
  • To get the famous equation, write $e = mc^2$.
  • -
  • $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.)
  • -
  • Shoes ($20) and socks ($5).
  • -
  • Escaped $: $73 this should be emphasized 23$.
  • -
-

Here’s a LaTeX table:

- -
-

Special Characters

-

Here is some unicode:

-
    -
  • I hat: Î
  • -
  • o umlaut: ö
  • -
  • section: §
  • -
  • set membership: ∈
  • -
  • copyright: ©
  • -
-

AT&T has an ampersand in their name.

-

AT&T is another way to write it.

-

This & that.

-

4 < 5.

-

6 > 5.

-

Backslash: \

-

Backtick: `

-

Asterisk: *

-

Underscore: _

-

Left brace: {

-

Right brace: }

-

Left bracket: [

-

Right bracket: ]

-

Left paren: (

-

Right paren: )

-

Greater-than: >

-

Hash: #

-

Period: .

-

Bang: !

-

Plus: +

-

Minus: -

-
-

Links

-

Explicit

-

Just a URL.

-

URL and title.

-

URL and title.

-

URL and title.

-

URL and title

-

URL and title

-

with_underscore

-

Email link

-

Empty.

-

Reference

-

Foo bar.

-

Foo bar.

-

Foo bar.

-

With embedded [brackets].

-

b by itself should be a link.

-

Indented once.

-

Indented twice.

-

Indented thrice.

-

This should [not][] be a link.

-
[not]: /url
-

Foo bar.

-

Foo biz.

-

With ampersands

-

Here’s a link with an ampersand in the URL.

-

Here’s a link with an amersand in the link text: AT&T.

-

Here’s an inline link.

-

Here’s an inline link in pointy braces.

- -

With an ampersand: http://example.com/?foo=1&bar=2

- -

An e-mail address: nobody@nowhere.net

-
-

Blockquoted: http://example.com/

-
-

Auto-links should not occur here: <http://example.com/>

-
or here: <http://example.com/>
-
-

Images

-

From “Voyage dans la Lune” by Georges Melies (1902):

-
-lalune -

lalune

-
-

Here is a movie movie icon.

-
-

Footnotes

-

Here is a footnote reference,1 and another.2 This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.3

-
-

Notes can go in quotes.4

-
-
    -
  1. And in list items.5
  2. -
-

This paragraph should not be part of the note, as it is not indented.

-
-
-
    -
  1. Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.

  2. -
  3. Here’s the long note. This one contains multiple blocks.

    -

    Subsequent blocks are indented to show that they belong to the footnote (as with list items).

    -
      { <code> }
    -

    If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.

  4. -
  5. This is easier to type. Inline notes may contain links and ] verbatim characters, as well as [bracketed text].

  6. -
  7. In quote.

  8. -
  9. In list.

  10. -
-
- - diff --git a/tests/writer.html4 b/tests/writer.html4 new file mode 100644 index 000000000..3b63f4e16 --- /dev/null +++ b/tests/writer.html4 @@ -0,0 +1,546 @@ + + + + + + + + + + Pandoc Test Suite + + + + +

This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.

+
+

Headers

+ +

Level 3 with emphasis

+

Level 4

+
Level 5
+

Level 1

+

Level 2 with emphasis

+

Level 3

+

with no blank line

+

Level 2

+

with no blank line

+
+

Paragraphs

+

Here’s a regular paragraph.

+

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

+

Here’s one with a bullet. * criminey.

+

There should be a hard line break
+here.

+
+

Block Quotes

+

E-mail style:

+
+

This is a block quote. It is pretty short.

+
+
+

Code in a block quote:

+
sub status {
+    print "working";
+}
+

A list:

+
    +
  1. item one
  2. +
  3. item two
  4. +
+

Nested block quotes:

+
+

nested

+
+
+

nested

+
+
+

This should not be a block quote: 2 > 1.

+

And a following paragraph.

+
+

Code Blocks

+

Code:

+
---- (should be four hyphens)
+
+sub status {
+    print "working";
+}
+
+this code block is indented by one tab
+

And:

+
    this code block is indented by two tabs
+
+These should not be escaped:  \$ \\ \> \[ \{
+
+

Lists

+

Unordered

+

Asterisks tight:

+
    +
  • asterisk 1
  • +
  • asterisk 2
  • +
  • asterisk 3
  • +
+

Asterisks loose:

+
    +
  • asterisk 1

  • +
  • asterisk 2

  • +
  • asterisk 3

  • +
+

Pluses tight:

+
    +
  • Plus 1
  • +
  • Plus 2
  • +
  • Plus 3
  • +
+

Pluses loose:

+
    +
  • Plus 1

  • +
  • Plus 2

  • +
  • Plus 3

  • +
+

Minuses tight:

+
    +
  • Minus 1
  • +
  • Minus 2
  • +
  • Minus 3
  • +
+

Minuses loose:

+
    +
  • Minus 1

  • +
  • Minus 2

  • +
  • Minus 3

  • +
+

Ordered

+

Tight:

+
    +
  1. First
  2. +
  3. Second
  4. +
  5. Third
  6. +
+

and:

+
    +
  1. One
  2. +
  3. Two
  4. +
  5. Three
  6. +
+

Loose using tabs:

+
    +
  1. First

  2. +
  3. Second

  4. +
  5. Third

  6. +
+

and using spaces:

+
    +
  1. One

  2. +
  3. Two

  4. +
  5. Three

  6. +
+

Multiple paragraphs:

+
    +
  1. Item 1, graf one.

    +

    Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.

  2. +
  3. Item 2.

  4. +
  5. Item 3.

  6. +
+

Nested

+
    +
  • Tab +
      +
    • Tab +
        +
      • Tab
      • +
    • +
  • +
+

Here’s another:

+
    +
  1. First
  2. +
  3. Second: +
      +
    • Fee
    • +
    • Fie
    • +
    • Foe
    • +
  4. +
  5. Third
  6. +
+

Same thing but with paragraphs:

+
    +
  1. First

  2. +
  3. Second:

    +
      +
    • Fee
    • +
    • Fie
    • +
    • Foe
    • +
  4. +
  5. Third

  6. +
+

Tabs and spaces

+
    +
  • this is a list item indented with tabs

  • +
  • this is a list item indented with spaces

    +
      +
    • this is an example list item indented with tabs

    • +
    • this is an example list item indented with spaces

    • +
  • +
+

Fancy list markers

+
    +
  1. begins with 2
  2. +
  3. and now 3

    +

    with a continuation

    +
      +
    1. sublist with roman numerals, starting with 4
    2. +
    3. more items +
        +
      1. a subsublist
      2. +
      3. a subsublist
      4. +
    4. +
  4. +
+

Nesting:

+
    +
  1. Upper Alpha +
      +
    1. Upper Roman. +
        +
      1. Decimal start with 6 +
          +
        1. Lower alpha with paren
        2. +
      2. +
    2. +
  2. +
+

Autonumbering:

+
    +
  1. Autonumber.
  2. +
  3. More. +
      +
    1. Nested.
    2. +
  4. +
+

Should not be a list item:

+

M.A. 2007

+

B. Williams

+
+

Definition Lists

+

Tight using spaces:

+
+
apple
+
red fruit +
+
orange
+
orange fruit +
+
banana
+
yellow fruit +
+
+

Tight using tabs:

+
+
apple
+
red fruit +
+
orange
+
orange fruit +
+
banana
+
yellow fruit +
+
+

Loose:

+
+
apple
+

red fruit

+
+
orange
+

orange fruit

+
+
banana
+

yellow fruit

+
+
+

Multiple blocks with italics:

+
+
apple
+

red fruit

+

contains seeds, crisp, pleasant to taste

+
+
orange
+

orange fruit

+
{ orange code block }
+
+

orange block quote

+
+
+
+

Multiple definitions, tight:

+
+
apple
+
red fruit +
+
computer +
+
orange
+
orange fruit +
+
bank +
+
+

Multiple definitions, loose:

+
+
apple
+

red fruit

+
+

computer

+
+
orange
+

orange fruit

+
+

bank

+
+
+

Blank line after term, indented marker, alternate markers:

+
+
apple
+

red fruit

+
+

computer

+
+
orange
+

orange fruit

+
    +
  1. sublist
  2. +
  3. sublist
  4. +
+
+
+

HTML Blocks

+

Simple block on one line:

+
+foo +
+

And nested without indentation:

+
+
+
+

foo

+
+
+
+bar +
+
+

Interpreted markdown in a table:

+ + + + + +
+This is emphasized + +And this is strong +
+ +

Here’s a simple block:

+
+

foo

+
+

This should be a code block, though:

+
<div>
+    foo
+</div>
+

As should this:

+
<div>foo</div>
+

Now, nested:

+
+
+
+foo +
+
+
+

This should just be an HTML comment:

+ +

Multiline:

+ + +

Code block:

+
<!-- Comment -->
+

Just plain comment, with trailing spaces on the line:

+ +

Code:

+
<hr />
+

Hr’s:

+
+
+
+
+
+
+
+
+
+
+

Inline Markup

+

This is emphasized, and so is this.

+

This is strong, and so is this.

+

An emphasized link.

+

This is strong and em.

+

So is this word.

+

This is strong and em.

+

So is this word.

+

This is code: >, $, \, \$, <html>.

+

This is strikeout.

+

Superscripts: abcd ahello ahello there.

+

Subscripts: H2O, H23O, Hmany of themO.

+

These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d.

+
+

Smart quotes, ellipses, dashes

+

“Hello,” said the spider. “‘Shelob’ is my name.”

+

‘A’, ‘B’, and ‘C’ are letters.

+

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

+

‘He said, “I want to go.”’ Were you alive in the 70’s?

+

Here is some quoted ‘code’ and a “quoted link”.

+

Some dashes: one—two — three—four — five.

+

Dashes between numbers: 5–7, 255–66, 1987–1999.

+

Ellipses…and…and….

+
+

LaTeX

+
    +
  • +
  • 2 + 2 = 4
  • +
  • x ∈ y
  • +
  • α ∧ ω
  • +
  • 223
  • +
  • p-Tree
  • +
  • Here’s some display math:
    $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$
  • +
  • Here’s one that has a line break in it: α + ω × x2.
  • +
+

These shouldn’t be math:

+
    +
  • To get the famous equation, write $e = mc^2$.
  • +
  • $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.)
  • +
  • Shoes ($20) and socks ($5).
  • +
  • Escaped $: $73 this should be emphasized 23$.
  • +
+

Here’s a LaTeX table:

+ +
+

Special Characters

+

Here is some unicode:

+
    +
  • I hat: Î
  • +
  • o umlaut: ö
  • +
  • section: §
  • +
  • set membership: ∈
  • +
  • copyright: ©
  • +
+

AT&T has an ampersand in their name.

+

AT&T is another way to write it.

+

This & that.

+

4 < 5.

+

6 > 5.

+

Backslash: \

+

Backtick: `

+

Asterisk: *

+

Underscore: _

+

Left brace: {

+

Right brace: }

+

Left bracket: [

+

Right bracket: ]

+

Left paren: (

+

Right paren: )

+

Greater-than: >

+

Hash: #

+

Period: .

+

Bang: !

+

Plus: +

+

Minus: -

+
+

Links

+

Explicit

+

Just a URL.

+

URL and title.

+

URL and title.

+

URL and title.

+

URL and title

+

URL and title

+

with_underscore

+

Email link

+

Empty.

+

Reference

+

Foo bar.

+

Foo bar.

+

Foo bar.

+

With embedded [brackets].

+

b by itself should be a link.

+

Indented once.

+

Indented twice.

+

Indented thrice.

+

This should [not][] be a link.

+
[not]: /url
+

Foo bar.

+

Foo biz.

+

With ampersands

+

Here’s a link with an ampersand in the URL.

+

Here’s a link with an amersand in the link text: AT&T.

+

Here’s an inline link.

+

Here’s an inline link in pointy braces.

+ +

With an ampersand: http://example.com/?foo=1&bar=2

+ +

An e-mail address: nobody@nowhere.net

+
+

Blockquoted: http://example.com/

+
+

Auto-links should not occur here: <http://example.com/>

+
or here: <http://example.com/>
+
+

Images

+

From “Voyage dans la Lune” by Georges Melies (1902):

+
+lalune +

lalune

+
+

Here is a movie movie icon.

+
+

Footnotes

+

Here is a footnote reference,1 and another.2 This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.3

+
+

Notes can go in quotes.4

+
+
    +
  1. And in list items.5
  2. +
+

This paragraph should not be part of the note, as it is not indented.

+
+
+
    +
  1. Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.

  2. +
  3. Here’s the long note. This one contains multiple blocks.

    +

    Subsequent blocks are indented to show that they belong to the footnote (as with list items).

    +
      { <code> }
    +

    If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.

  4. +
  5. This is easier to type. Inline notes may contain links and ] verbatim characters, as well as [bracketed text].

  6. +
  7. In quote.

  8. +
  9. In list.

  10. +
+
+ + diff --git a/tests/writer.html5 b/tests/writer.html5 new file mode 100644 index 000000000..8e0dff764 --- /dev/null +++ b/tests/writer.html5 @@ -0,0 +1,548 @@ + + + + + + + + + + Pandoc Test Suite + + + + +
+

Pandoc Test Suite

+

John MacFarlane

+

Anonymous

+

July 17, 2006

+
+

This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.

+
+

Headers

+ +

Level 3 with emphasis

+

Level 4

+
Level 5
+

Level 1

+

Level 2 with emphasis

+

Level 3

+

with no blank line

+

Level 2

+

with no blank line

+
+

Paragraphs

+

Here’s a regular paragraph.

+

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

+

Here’s one with a bullet. * criminey.

+

There should be a hard line break
+here.

+
+

Block Quotes

+

E-mail style:

+
+

This is a block quote. It is pretty short.

+
+
+

Code in a block quote:

+
sub status {
+    print "working";
+}
+

A list:

+
    +
  1. item one
  2. +
  3. item two
  4. +
+

Nested block quotes:

+
+

nested

+
+
+

nested

+
+
+

This should not be a block quote: 2 > 1.

+

And a following paragraph.

+
+

Code Blocks

+

Code:

+
---- (should be four hyphens)
+
+sub status {
+    print "working";
+}
+
+this code block is indented by one tab
+

And:

+
    this code block is indented by two tabs
+
+These should not be escaped:  \$ \\ \> \[ \{
+
+

Lists

+

Unordered

+

Asterisks tight:

+
    +
  • asterisk 1
  • +
  • asterisk 2
  • +
  • asterisk 3
  • +
+

Asterisks loose:

+
    +
  • asterisk 1

  • +
  • asterisk 2

  • +
  • asterisk 3

  • +
+

Pluses tight:

+
    +
  • Plus 1
  • +
  • Plus 2
  • +
  • Plus 3
  • +
+

Pluses loose:

+
    +
  • Plus 1

  • +
  • Plus 2

  • +
  • Plus 3

  • +
+

Minuses tight:

+
    +
  • Minus 1
  • +
  • Minus 2
  • +
  • Minus 3
  • +
+

Minuses loose:

+
    +
  • Minus 1

  • +
  • Minus 2

  • +
  • Minus 3

  • +
+

Ordered

+

Tight:

+
    +
  1. First
  2. +
  3. Second
  4. +
  5. Third
  6. +
+

and:

+
    +
  1. One
  2. +
  3. Two
  4. +
  5. Three
  6. +
+

Loose using tabs:

+
    +
  1. First

  2. +
  3. Second

  4. +
  5. Third

  6. +
+

and using spaces:

+
    +
  1. One

  2. +
  3. Two

  4. +
  5. Three

  6. +
+

Multiple paragraphs:

+
    +
  1. Item 1, graf one.

    +

    Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.

  2. +
  3. Item 2.

  4. +
  5. Item 3.

  6. +
+

Nested

+
    +
  • Tab +
      +
    • Tab +
        +
      • Tab
      • +
    • +
  • +
+

Here’s another:

+
    +
  1. First
  2. +
  3. Second: +
      +
    • Fee
    • +
    • Fie
    • +
    • Foe
    • +
  4. +
  5. Third
  6. +
+

Same thing but with paragraphs:

+
    +
  1. First

  2. +
  3. Second:

    +
      +
    • Fee
    • +
    • Fie
    • +
    • Foe
    • +
  4. +
  5. Third

  6. +
+

Tabs and spaces

+
    +
  • this is a list item indented with tabs

  • +
  • this is a list item indented with spaces

    +
      +
    • this is an example list item indented with tabs

    • +
    • this is an example list item indented with spaces

    • +
  • +
+

Fancy list markers

+
    +
  1. begins with 2
  2. +
  3. and now 3

    +

    with a continuation

    +
      +
    1. sublist with roman numerals, starting with 4
    2. +
    3. more items +
        +
      1. a subsublist
      2. +
      3. a subsublist
      4. +
    4. +
  4. +
+

Nesting:

+
    +
  1. Upper Alpha +
      +
    1. Upper Roman. +
        +
      1. Decimal start with 6 +
          +
        1. Lower alpha with paren
        2. +
      2. +
    2. +
  2. +
+

Autonumbering:

+
    +
  1. Autonumber.
  2. +
  3. More. +
      +
    1. Nested.
    2. +
  4. +
+

Should not be a list item:

+

M.A. 2007

+

B. Williams

+
+

Definition Lists

+

Tight using spaces:

+
+
apple
+
red fruit +
+
orange
+
orange fruit +
+
banana
+
yellow fruit +
+
+

Tight using tabs:

+
+
apple
+
red fruit +
+
orange
+
orange fruit +
+
banana
+
yellow fruit +
+
+

Loose:

+
+
apple
+

red fruit

+
+
orange
+

orange fruit

+
+
banana
+

yellow fruit

+
+
+

Multiple blocks with italics:

+
+
apple
+

red fruit

+

contains seeds, crisp, pleasant to taste

+
+
orange
+

orange fruit

+
{ orange code block }
+
+

orange block quote

+
+
+
+

Multiple definitions, tight:

+
+
apple
+
red fruit +
+
computer +
+
orange
+
orange fruit +
+
bank +
+
+

Multiple definitions, loose:

+
+
apple
+

red fruit

+
+

computer

+
+
orange
+

orange fruit

+
+

bank

+
+
+

Blank line after term, indented marker, alternate markers:

+
+
apple
+

red fruit

+
+

computer

+
+
orange
+

orange fruit

+
    +
  1. sublist
  2. +
  3. sublist
  4. +
+
+
+

HTML Blocks

+

Simple block on one line:

+
+foo +
+

And nested without indentation:

+
+
+
+

foo

+
+
+
+bar +
+
+

Interpreted markdown in a table:

+ + + + + +
+This is emphasized + +And this is strong +
+ +

Here’s a simple block:

+
+

foo

+
+

This should be a code block, though:

+
<div>
+    foo
+</div>
+

As should this:

+
<div>foo</div>
+

Now, nested:

+
+
+
+foo +
+
+
+

This should just be an HTML comment:

+ +

Multiline:

+ + +

Code block:

+
<!-- Comment -->
+

Just plain comment, with trailing spaces on the line:

+ +

Code:

+
<hr />
+

Hr’s:

+
+
+
+
+
+
+
+
+
+
+

Inline Markup

+

This is emphasized, and so is this.

+

This is strong, and so is this.

+

An emphasized link.

+

This is strong and em.

+

So is this word.

+

This is strong and em.

+

So is this word.

+

This is code: >, $, \, \$, <html>.

+

This is strikeout.

+

Superscripts: abcd ahello ahello there.

+

Subscripts: H2O, H23O, Hmany of themO.

+

These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d.

+
+

Smart quotes, ellipses, dashes

+

“Hello,” said the spider. “‘Shelob’ is my name.”

+

‘A’, ‘B’, and ‘C’ are letters.

+

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

+

‘He said, “I want to go.”’ Were you alive in the 70’s?

+

Here is some quoted ‘code’ and a “quoted link”.

+

Some dashes: one—two — three—four — five.

+

Dashes between numbers: 5–7, 255–66, 1987–1999.

+

Ellipses…and…and….

+
+

LaTeX

+
    +
  • +
  • 2 + 2 = 4
  • +
  • x ∈ y
  • +
  • α ∧ ω
  • +
  • 223
  • +
  • p-Tree
  • +
  • Here’s some display math:
    $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$
  • +
  • Here’s one that has a line break in it: α + ω × x2.
  • +
+

These shouldn’t be math:

+
    +
  • To get the famous equation, write $e = mc^2$.
  • +
  • $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.)
  • +
  • Shoes ($20) and socks ($5).
  • +
  • Escaped $: $73 this should be emphasized 23$.
  • +
+

Here’s a LaTeX table:

+ +
+

Special Characters

+

Here is some unicode:

+
    +
  • I hat: Î
  • +
  • o umlaut: ö
  • +
  • section: §
  • +
  • set membership: ∈
  • +
  • copyright: ©
  • +
+

AT&T has an ampersand in their name.

+

AT&T is another way to write it.

+

This & that.

+

4 < 5.

+

6 > 5.

+

Backslash: \

+

Backtick: `

+

Asterisk: *

+

Underscore: _

+

Left brace: {

+

Right brace: }

+

Left bracket: [

+

Right bracket: ]

+

Left paren: (

+

Right paren: )

+

Greater-than: >

+

Hash: #

+

Period: .

+

Bang: !

+

Plus: +

+

Minus: -

+
+

Links

+

Explicit

+

Just a URL.

+

URL and title.

+

URL and title.

+

URL and title.

+

URL and title

+

URL and title

+

with_underscore

+

Email link

+

Empty.

+

Reference

+

Foo bar.

+

Foo bar.

+

Foo bar.

+

With embedded [brackets].

+

b by itself should be a link.

+

Indented once.

+

Indented twice.

+

Indented thrice.

+

This should [not][] be a link.

+
[not]: /url
+

Foo bar.

+

Foo biz.

+

With ampersands

+

Here’s a link with an ampersand in the URL.

+

Here’s a link with an amersand in the link text: AT&T.

+

Here’s an inline link.

+

Here’s an inline link in pointy braces.

+ +

With an ampersand: http://example.com/?foo=1&bar=2

+ +

An e-mail address: nobody@nowhere.net

+
+

Blockquoted: http://example.com/

+
+

Auto-links should not occur here: <http://example.com/>

+
or here: <http://example.com/>
+
+

Images

+

From “Voyage dans la Lune” by Georges Melies (1902):

+
+lalune
lalune
+
+

Here is a movie movie icon.

+
+

Footnotes

+

Here is a footnote reference,1 and another.2 This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.3

+
+

Notes can go in quotes.4

+
+
    +
  1. And in list items.5
  2. +
+

This paragraph should not be part of the note, as it is not indented.

+
+
+
    +
  1. Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.

  2. +
  3. Here’s the long note. This one contains multiple blocks.

    +

    Subsequent blocks are indented to show that they belong to the footnote (as with list items).

    +
      { <code> }
    +

    If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.

  4. +
  5. This is easier to type. Inline notes may contain links and ] verbatim characters, as well as [bracketed text].

  6. +
  7. In quote.

  8. +
  9. In list.

  10. +
+
+ + -- cgit v1.2.3