Demonstration of simple table syntax.
Right	Left	Center	Default
12	12	12	12
123	123	123	123
1	1	1	1

Right	Left	Center	Default
12	12	12	12
123	123	123	123
1	1	1	1

Demonstration of simple table syntax.
Right	Left	Center	Default
12	12	12	12
123	123	123	123
1	1	1	1

Here's the caption. It may span multiple lines.
Centered Header	Left Aligned	Right Aligned	Default aligned
First	row	12.0	Example of a row that spans multiple lines.
Second	row	5.0	Here's another one. Note the blank line between rows.

Centered Header	Left Aligned	Right Aligned	Default aligned
First	row	12.0	Example of a row that spans multiple lines.
Second	row	5.0	Here's another one. Note the blank line between rows.

-- cgit v1.2.3 From 0fd2176e29bd1118d314c6179455fb78bed35aea Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 17 Nov 2013 08:47:14 -0800 Subject: MediaWiki reader: Add automatic header identifiers. --- src/Text/Pandoc/Readers/MediaWiki.hs | 22 ++++++++++++-- tests/mediawiki-reader.native | 58 ++++++++++++++++++------------------ 2 files changed, 49 insertions(+), 31 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 136701bd0..1c074e3de 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} +-- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- Copyright (C) 2012 John MacFarlane @@ -51,6 +52,7 @@ import Data.List (intersperse, intercalate, isPrefixOf ) import Text.HTML.TagSoup import Data.Sequence (viewl, ViewL(..), (<|)) import qualified Data.Foldable as F +import qualified Data.Map as M import Data.Char (isDigit, isSpace) -- | Read mediawiki from an input string and return a Pandoc document. @@ -62,6 +64,8 @@ readMediaWiki opts s = , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 , mwCategoryLinks = [] + , mwHeaderMap = M.empty + , mwIdentifierList = [] } "source" (s ++ "\n") of Left err' -> error $ "\nError:\n" ++ show err' @@ -71,10 +75,23 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] + , mwHeaderMap :: M.Map Inlines String + , mwIdentifierList :: [String] } type MWParser = Parser [Char] MWState +instance HasReaderOptions MWParser where + askReaderOption f = (f . mwOptions) `fmap` getState + +instance HasHeaderMap MWParser where + getHeaderMap = fmap mwHeaderMap getState + putHeaderMap hm = updateState $ \st -> st{ mwHeaderMap = hm } + +instance HasIdentifierList MWParser where + getIdentifierList = fmap mwIdentifierList getState + putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l } + -- -- auxiliary functions -- @@ -351,7 +368,8 @@ header = try $ do let lev = length eqs guard $ lev <= 6 contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=') - return $ B.header lev contents + attr <- registerHeader nullAttr contents + return $ B.headerWith attr lev contents bulletList :: MWParser Blocks bulletList = B.bulletList <$> diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index 0ab51a3aa..238413445 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -1,39 +1,39 @@ Pandoc (Meta {unMeta = fromList []}) -[Header 1 ("",[],[]) [Str "header"] -,Header 2 ("",[],[]) [Str "header",Space,Str "level",Space,Str "two"] -,Header 3 ("",[],[]) [Str "header",Space,Str "level",Space,Str "3"] -,Header 4 ("",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"] -,Header 5 ("",[],[]) [Str "header",Space,Str "level",Space,Str "5"] -,Header 6 ("",[],[]) [Str "header",Space,Str "level",Space,Str "6"] +[Header 1 ("header",[],[]) [Str "header"] +,Header 2 ("header-level-two",[],[]) [Str "header",Space,Str "level",Space,Str "two"] +,Header 3 ("header-level-3",[],[]) [Str "header",Space,Str "level",Space,Str "3"] +,Header 4 ("header-level-four",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"] +,Header 5 ("header-level-5",[],[]) [Str "header",Space,Str "level",Space,Str "5"] +,Header 6 ("header-level-6",[],[]) [Str "header",Space,Str "level",Space,Str "6"] ,Para [Str "=======",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "========"] ,Para [Code ("",[],[]) "==\160not\160a\160header\160=="] -,Header 2 ("",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"] +,Header 2 ("emph-and-strong",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"] ,Para [Emph [Str "emph"],Space,Strong [Str "strong"]] ,Para [Strong [Emph [Str "strong",Space,Str "and",Space,Str "emph"]]] ,Para [Strong [Emph [Str "emph",Space,Str "inside"],Space,Str "strong"]] ,Para [Strong [Str "strong",Space,Str "with",Space,Emph [Str "emph"]]] ,Para [Emph [Strong [Str "strong",Space,Str "inside"],Space,Str "emph"]] -,Header 2 ("",[],[]) [Str "horizontal",Space,Str "rule"] +,Header 2 ("horizontal-rule",[],[]) [Str "horizontal",Space,Str "rule"] ,Para [Str "top"] ,HorizontalRule ,Para [Str "bottom"] ,HorizontalRule -,Header 2 ("",[],[]) [Str "nowiki"] +,Header 2 ("nowiki",[],[]) [Str "nowiki"] ,Para [Str "''not",Space,Str "emph''"] -,Header 2 ("",[],[]) [Str "strikeout"] +,Header 2 ("strikeout",[],[]) [Str "strikeout"] ,Para [Strikeout [Str "This",Space,Str "is",Space,Emph [Str "struck",Space,Str "out"]]] -,Header 2 ("",[],[]) [Str "entities"] +,Header 2 ("entities",[],[]) [Str "entities"] ,Para [Str "hi",Space,Str "&",Space,Str "low"] ,Para [Str "hi",Space,Str "&",Space,Str "low"] ,Para [Str "G\246del"] ,Para [Str "\777\2730"] -,Header 2 ("",[],[]) [Str "comments"] +,Header 2 ("comments",[],[]) [Str "comments"] ,Para [Str "inline",Space,Str "comment"] ,Para [Str "between",Space,Str "blocks"] -,Header 2 ("",[],[]) [Str "linebreaks"] +,Header 2 ("linebreaks",[],[]) [Str "linebreaks"] ,Para [Str "hi",LineBreak,Str "there"] ,Para [Str "hi",LineBreak,Str "there"] -,Header 2 ("",[],[]) [Str ":",Space,Str "indents"] +,Header 2 ("indents",[],[]) [Str ":",Space,Str "indents"] ,Para [Str "hi"] ,DefinitionList [([], @@ -46,36 +46,36 @@ Pandoc (Meta {unMeta = fromList []}) [([], [[Plain [Str "there"]]])]]])] ,Para [Str "bud"] -,Header 2 ("",[],[]) [Str "p",Space,Str "tags"] +,Header 2 ("p-tags",[],[]) [Str "p",Space,Str "tags"] ,Para [Str "hi",Space,Str "there"] ,Para [Str "bud"] ,Para [Str "another"] -,Header 2 ("",[],[]) [Str "raw",Space,Str "html"] +,Header 2 ("raw-html",[],[]) [Str "raw",Space,Str "html"] ,Para [Str "hi",Space,RawInline (Format "html") "",Emph [Str "there"],RawInline (Format "html") "",Str "."] ,Para [RawInline (Format "html") "",Str "inserted",RawInline (Format "html") ""] ,RawBlock (Format "html") "

" ,Para [Str "hi",Space,Emph [Str "there"]] ,RawBlock (Format "html") "

" -,Header 2 ("",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"] +,Header 2 ("sup-sub-del",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"] ,Para [Str "H",Subscript [Str "2"],Str "O",Space,Str "base",Superscript [Emph [Str "exponent"]],Space,Strikeout [Str "hello"]] -,Header 2 ("",[],[]) [Str "inline",Space,Str "code"] +,Header 2 ("inline-code",[],[]) [Str "inline",Space,Str "code"] ,Para [Code ("",[],[]) "*\8594*",Space,Code ("",[],[]) "typed",Space,Code ("",["haskell"],[]) ">>="] -,Header 2 ("",[],[]) [Str "code",Space,Str "blocks"] +,Header 2 ("code-blocks",[],[]) [Str "code",Space,Str "blocks"] ,CodeBlock ("",[],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']" ,CodeBlock ("",["haskell"],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']" ,CodeBlock ("",["ruby","numberLines"],[("startFrom","100")]) "widgets.each do |w|\n print w.price\nend" -,Header 2 ("",[],[]) [Str "block",Space,Str "quotes"] +,Header 2 ("block-quotes",[],[]) [Str "block",Space,Str "quotes"] ,Para [Str "Regular",Space,Str "paragraph"] ,BlockQuote [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote."] ,Para [Str "With",Space,Str "two",Space,Str "paragraphs."]] ,Para [Str "Nother",Space,Str "paragraph."] -,Header 2 ("",[],[]) [Str "external",Space,Str "links"] +,Header 2 ("external-links",[],[]) [Str "external",Space,Str "links"] ,Para [Link [Emph [Str "Google"],Space,Str "search",Space,Str "engine"] ("http://google.com","")] ,Para [Link [Str "http://johnmacfarlane.net/pandoc/"] ("http://johnmacfarlane.net/pandoc/","")] ,Para [Link [Str "1"] ("http://google.com",""),Space,Link [Str "2"] ("http://yahoo.com","")] ,Para [Link [Str "email",Space,Str "me"] ("mailto:info@example.org","")] -,Header 2 ("",[],[]) [Str "internal",Space,Str "links"] +,Header 2 ("internal-links",[],[]) [Str "internal",Space,Str "links"] ,Para [Link [Str "Help"] ("Help","wikilink")] ,Para [Link [Str "the",Space,Str "help",Space,Str "page"] ("Help","wikilink")] ,Para [Link [Str "Helpers"] ("Help","wikilink")] @@ -83,12 +83,12 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Link [Str "Contents"] ("Help:Contents","wikilink")] ,Para [Link [Str "#My",Space,Str "anchor"] ("#My_anchor","wikilink")] ,Para [Link [Str "and",Space,Str "text"] ("Page#with_anchor","wikilink")] -,Header 2 ("",[],[]) [Str "images"] +,Header 2 ("images",[],[]) [Str "images"] ,Para [Image [Str "caption"] ("example.jpg","fig:caption")] ,Para [Image [Str "the",Space,Emph [Str "caption"],Space,Str "with",Space,Link [Str "external",Space,Str "link"] ("http://google.com","")] ("example.jpg","fig:the caption with external link")] ,Para [Image [Str "caption"] ("example.jpg","fig:caption")] ,Para [Image [Str "example.jpg"] ("example.jpg","fig:example.jpg")] -,Header 2 ("",[],[]) [Str "lists"] +,Header 2 ("lists",[],[]) [Str "lists"] ,BulletList [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] ,[Plain [Str "with",Space,Str "an",Space,Str "asterisk",Space,Str "(*)."] @@ -161,10 +161,10 @@ Pandoc (Meta {unMeta = fromList []}) [[Plain [Str "Amsterdam"]] ,[Plain [Str "Rotterdam"]] ,[Plain [Str "The",Space,Str "Hague"]]] -,Header 2 ("",[],[]) [Str "math"] +,Header 2 ("math",[],[]) [Str "math"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."] ,Para [Str "With",Space,Str "spaces:",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."] -,Header 2 ("",[],[]) [Str "preformatted",Space,Str "blocks"] +,Header 2 ("preformatted-blocks",[],[]) [Str "preformatted",Space,Str "blocks"] ,Para [Code ("",[],[]) "Start\160each\160line\160with\160a\160space.",LineBreak,Code ("",[],[]) "Text\160is\160",Strong [Code ("",[],[]) "preformatted"],Code ("",[],[]) "\160and",LineBreak,Emph [Code ("",[],[]) "markups"],Code ("",[],[]) "\160",Strong [Emph [Code ("",[],[]) "can"]],Code ("",[],[]) "\160be\160done."] ,Para [Code ("",[],[]) "\160hell\160\160\160\160\160\160yeah"] ,Para [Code ("",[],[]) "Start\160with\160a\160space\160in\160the\160first\160column,",LineBreak,Code ("",[],[]) "(before\160the\160).",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "Then\160your\160block\160format\160will\160be",LineBreak,Code ("",[],[]) "\160\160\160\160maintained.",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "This\160is\160good\160for\160copying\160in\160code\160blocks:",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "def\160function():",LineBreak,Code ("",[],[]) "\160\160\160\160\"\"\"documentation\160string\"\"\"",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\160\160\160\160if\160True:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160True",LineBreak,Code ("",[],[]) "\160\160\160\160else:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160False"] @@ -174,12 +174,12 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "Don't",Space,Str "need"] ,Para [Code ("",[],[]) "a\160blank\160line"] ,Para [Str "around",Space,Str "a",Space,Str "preformatted",Space,Str "block."] -,Header 2 ("",[],[]) [Str "templates"] +,Header 2 ("templates",[],[]) [Str "templates"] ,RawBlock (Format "mediawiki") "{{Welcome}}" ,RawBlock (Format "mediawiki") "{{Foo:Bar}}" ,RawBlock (Format "mediawiki") "{{Thankyou|all your effort|Me}}" ,Para [Str "Written",Space,RawInline (Format "mediawiki") "{{{date}}}",Space,Str "by",Space,RawInline (Format "mediawiki") "{{{name}}}",Str "."] -,Header 2 ("",[],[]) [Str "tables"] +,Header 2 ("tables",[],[]) [Str "tables"] ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] [[] ,[]] @@ -245,6 +245,6 @@ Pandoc (Meta {unMeta = fromList []}) [[]] [[[Para [Str "Orange"]]]] ,Para [Str "Paragraph",Space,Str "after",Space,Str "the",Space,Str "table."] -,Header 2 ("",[],[]) [Str "notes"] +,Header 2 ("notes",[],[]) [Str "notes"] ,Para [Str "My",Space,Str "note!",Note [Plain [Str "This."]]] ,Para [Str "URL",Space,Str "note.",Note [Plain [Link [Str "http://docs.python.org/library/functions.html#range"] ("http://docs.python.org/library/functions.html#range","")]]]] -- cgit v1.2.3 From a3eba6ee848497f98f72aef5dad112c49bdd4fec Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 18 Nov 2013 20:28:27 -0800 Subject: LaTeX reader: Parse contents of curly quotes or matched `"` as quotes. --- src/Text/Pandoc/Readers/LaTeX.hs | 24 +++++++++++++++++------- tests/latex-reader.native | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 689b12c8e..75e29ebb9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -163,13 +163,23 @@ mathChars = concat <$> <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar) ) +quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines +quoted' f starter ender = do + startchs <- starter + try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs + double_quote :: LP Inlines -double_quote = (doubleQuoted . mconcat) <$> - (try $ string "``" *> manyTill inline (try $ string "''")) +double_quote = + ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") + <|> quoted' doubleQuoted (string "“") (void $ char '”') + <|> quoted' doubleQuoted (string "\"") (void $ char '"') + ) single_quote :: LP Inlines -single_quote = (singleQuoted . mconcat) <$> - (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter)) +single_quote = + ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) + <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) + ) inline :: LP Inlines inline = (mempty <$ comment) @@ -181,10 +191,10 @@ inline = (mempty <$ comment) ((char '-') *> option (str "–") (str "—" <$ char '-'))) <|> double_quote <|> single_quote - <|> (str "“" <$ try (string "``")) -- nb. {``} won't be caught by double_quote <|> (str "”" <$ try (string "''")) - <|> (str "‘" <$ char '`') -- nb. {`} won't be caught by single_quote + <|> (str "”" <$ char '”') <|> (str "’" <$ char '\'') + <|> (str "’" <$ char '’') <|> (str "\160" <$ char '~') <|> (mathDisplay $ string "$$" *> mathChars <* string "$$") <|> (mathInline $ char '$' *> mathChars <* char '$') @@ -755,7 +765,7 @@ inlineText :: LP Inlines inlineText = str <$> many1 inlineChar inlineChar :: LP Char -inlineChar = noneOf "\\$%^_&~#{}^'`-[] \t\n" +inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n" environment :: LP Blocks environment = do diff --git a/tests/latex-reader.native b/tests/latex-reader.native index 15b667b2f..fcc3153cf 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -302,7 +302,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "4",Space,Str "<",Space,Str "5."] ,Para [Str "6",Space,Str ">",Space,Str "5."] ,Para [Str "Backslash:",Space,Str "\\"] -,Para [Str "Backtick:",Space,Str "\8216"] +,Para [Str "Backtick:",Space,Str "`"] ,Para [Str "Asterisk:",Space,Str "*"] ,Para [Str "Underscore:",Space,Str "_"] ,Para [Str "Left",Space,Str "brace:",Space,Str "{"] -- cgit v1.2.3 From cf149fcf38d98b1bee79ecd9056fa0f46264e7ce Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 22 Nov 2013 19:41:08 -0800 Subject: Fixed bug with intraword emphasis. Closes #1066. --- src/Text/Pandoc/Readers/Markdown.hs | 3 ++- tests/Tests/Readers/Markdown.hs | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ea49d8c1d..33d1a9620 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1455,6 +1455,7 @@ enclosure c = do -- Parse inlines til you hit one c or a sequence of two cs. -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. +-- Otherwise, emit ccc then the results. three :: Char -> MarkdownParser (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (char c) >> inline) @@ -1479,7 +1480,7 @@ one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (char c) >> inline) <|> try (string [c,c] >> notFollowedBy (char c) >> - two c prefix') ) + two c mempty) ) (char c >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index ccca147ab..b04ff9a0d 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -136,6 +136,11 @@ tests = [ testGroup "inline code" "`*` {.haskell .special x=\"7\"}" =?> para (codeWith ("",["haskell","special"],[("x","7")]) "*") ] + , testGroup "emph and strong" + [ "two strongs in emph" =: + "***a**b **c**d*" =?> para (emph (strong (str "a") <> str "b" <> space + <> strong (str "c") <> str "d")) + ] , testGroup "raw LaTeX" [ "in URL" =: "\\begin\n" =?> para (text "\\begin") -- cgit v1.2.3 From 526762bf222dbab199f6ff90c925fe18535c698f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 22 Nov 2013 19:51:07 -0800 Subject: ConTeXt writer: Use setupcaption to separate style from content. Instead of adding 'nunumber' every time we place a figure... Closes #1067. --- data/templates | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 2 +- tests/writer.context | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'tests') diff --git a/data/templates b/data/templates index 4bdebc73b..8cadd4f20 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 4bdebc73b0b2025cf01704e7e564088c34d8f86c +Subproject commit 8cadd4f2044c0c25842eeb5a2370a6e3384f4bd4 diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 0379f8b0a..179d9bc5b 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do capt <- inlineListToConTeXt txt - return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <> + return $ blankline $$ "\\placefigure[here]" <> braces capt <> braces ("\\externalfigure" <> brackets (text src)) <> blankline blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst diff --git a/tests/writer.context b/tests/writer.context index 114d00b3c..fb95f5615 100644 --- a/tests/writer.context +++ b/tests/writer.context @@ -30,6 +30,8 @@ \setupitemize[autointro] % prevent orphan list intro \setupitemize[indentnext=no] +\setupcaption[figure][number=no] % don't number figures + \setupthinrules[width=15em] % width of horizontal rules \setupdelimitedtext @@ -842,7 +844,7 @@ or here: From \quotation{Voyage dans la Lune} by Georges Melies (1902): -\placefigure[here,nonumber]{lalune}{\externalfigure[lalune.jpg]} +\placefigure[here]{lalune}{\externalfigure[lalune.jpg]} Here is a movie {\externalfigure[movie.jpg]} icon. -- cgit v1.2.3 From 303e42a94f16e00ecb65fb9de2d282d050a626c1 Mon Sep 17 00:00:00 2001 From: Jaime Marquínez Ferrándiz Date: Sun, 24 Nov 2013 12:51:41 +0100 Subject: MediaWiki reader: Accept image links in more languages In some of the Wikipedia versions the local version of 'File' is used (for example 'Archivo' in Spanish) --- src/Text/Pandoc/Readers/MediaWiki.hs | 6 +++++- tests/mediawiki-reader.native | 1 + tests/mediawiki-reader.wiki | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 1c074e3de..7f99af528 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -549,10 +549,14 @@ endline = () <$ try (newline <* notFollowedBy' header <* notFollowedBy anyListStart) +imageIdentifiers :: [MWParser ()] +imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] + where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier"] + image :: MWParser Inlines image = try $ do sym "[[" - sym "File:" <|> sym "Image:" + choice imageIdentifiers fname <- many1 (noneOf "|]") _ <- many (try $ char '|' *> imageOption) caption <- (B.str fname <$ sym "]]") diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index 238413445..87e4043f7 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -88,6 +88,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Image [Str "the",Space,Emph [Str "caption"],Space,Str "with",Space,Link [Str "external",Space,Str "link"] ("http://google.com","")] ("example.jpg","fig:the caption with external link")] ,Para [Image [Str "caption"] ("example.jpg","fig:caption")] ,Para [Image [Str "example.jpg"] ("example.jpg","fig:example.jpg")] +,Para [Image [Str "example_es.jpg"] ("example_es.jpg","fig:example_es.jpg")] ,Header 2 ("lists",[],[]) [Str "lists"] ,BulletList [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] diff --git a/tests/mediawiki-reader.wiki b/tests/mediawiki-reader.wiki index 26f4ef164..15f586bda 100644 --- a/tests/mediawiki-reader.wiki +++ b/tests/mediawiki-reader.wiki @@ -173,6 +173,8 @@ http://johnmacfarlane.net/pandoc/ [[File:example.jpg]] +[[Archivo:example_es.jpg]] + == lists == * Start each line -- cgit v1.2.3 From dde484f80985411d1e51038e9347b47ff90a1b7e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 30 Nov 2013 16:34:32 -0800 Subject: Fixed tests for new ConTeXt writer behavior. --- tests/tables.context | 14 +++++++------- tests/writer.context | 5 +++-- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'tests') diff --git a/tests/tables.context b/tests/tables.context index e113a8e6a..371e559e5 100644 --- a/tests/tables.context +++ b/tests/tables.context @@ -1,6 +1,6 @@ Simple table with caption: -\placetable[here]{Demonstration of simple table syntax.} +\placetable{Demonstration of simple table syntax.} \starttable[|r|l|c|l|] \HL \NC Right @@ -29,7 +29,7 @@ Simple table with caption: Simple table without caption: -\placetable[here,none]{} +\placetable[none]{} \starttable[|r|l|c|l|] \HL \NC Right @@ -58,7 +58,7 @@ Simple table without caption: Simple table indented two spaces: -\placetable[here]{Demonstration of simple table syntax.} +\placetable{Demonstration of simple table syntax.} \starttable[|r|l|c|l|] \HL \NC Right @@ -87,7 +87,7 @@ Simple table indented two spaces: Multiline table with caption: -\placetable[here]{Here's the caption. It may span multiple lines.} +\placetable{Here's the caption. It may span multiple lines.} \starttable[|cp(0.15\textwidth)|lp(0.14\textwidth)|rp(0.16\textwidth)|lp(0.34\textwidth)|] \HL \NC Centered Header @@ -111,7 +111,7 @@ Multiline table with caption: Multiline table without caption: -\placetable[here,none]{} +\placetable[none]{} \starttable[|cp(0.15\textwidth)|lp(0.14\textwidth)|rp(0.16\textwidth)|lp(0.34\textwidth)|] \HL \NC Centered Header @@ -135,7 +135,7 @@ Multiline table without caption: Table without column headers: -\placetable[here,none]{} +\placetable[none]{} \starttable[|r|l|c|r|] \HL \NC 12 @@ -158,7 +158,7 @@ Table without column headers: Multiline table without column headers: -\placetable[here,none]{} +\placetable[none]{} \starttable[|cp(0.15\textwidth)|lp(0.14\textwidth)|rp(0.16\textwidth)|lp(0.34\textwidth)|] \HL \NC First diff --git a/tests/writer.context b/tests/writer.context index fb95f5615..0b031fd76 100644 --- a/tests/writer.context +++ b/tests/writer.context @@ -30,7 +30,8 @@ \setupitemize[autointro] % prevent orphan list intro \setupitemize[indentnext=no] -\setupcaption[figure][number=no] % don't number figures +\setupfloat[figure][default={here,nonumber}] +\setupfloat[table][default={here,nonumber}] \setupthinrules[width=15em] % width of horizontal rules @@ -844,7 +845,7 @@ or here: From \quotation{Voyage dans la Lune} by Georges Melies (1902): -\placefigure[here]{lalune}{\externalfigure[lalune.jpg]} +\placefigure{lalune}{\externalfigure[lalune.jpg]} Here is a movie {\externalfigure[movie.jpg]} icon. -- cgit v1.2.3 From 7aa4d519686af1416eaf3b380f8584ab89569c41 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 30 Nov 2013 17:00:58 -0800 Subject: ODT writer: Add `draw:name` attribute to `draw:frame` elements. This is reported to be necessary to avoid an error from recent versions of Libre Office when files contain more than one image. Closes #1069. Thanks to wmanley for reporting and diagnosing the problem. --- src/Text/Pandoc/Writers/OpenDocument.hs | 10 ++++++++-- tests/writer.opendocument | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 565f5f869..4ddfd7166 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -64,6 +64,7 @@ data WriterState = , stInDefinition :: Bool , stTight :: Bool , stFirstPara :: Bool + , stImageId :: Int } defaultWriterState :: WriterState @@ -78,6 +79,7 @@ defaultWriterState = , stInDefinition = False , stTight = False , stFirstPara = False + , stImageId = 1 } when :: Bool -> Doc -> Doc @@ -380,7 +382,7 @@ inlineToOpenDocument o ils then return $ preformatted s else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l - | Image _ (s,t) <- ils = return $ mkImg s t + | Image _ (s,t) <- ils = mkImg s t | Note l <- ils = mkNote l | otherwise = return empty where @@ -389,7 +391,11 @@ inlineToOpenDocument o ils , ("xlink:href" , s ) , ("office:name", t ) ] . inSpanTags "Definition" - mkImg s t = inTags False "draw:frame" (attrsFromTitle t) $ + mkImg s t = do + id' <- gets stImageId + modify (\st -> st{ stImageId = id' + 1 }) + return $ inTags False "draw:frame" + (("draw:name", "img" ++ show id'):attrsFromTitle t) $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") , ("xlink:show" , "embed" ) diff --git a/tests/writer.opendocument b/tests/writer.opendocument index b3888e34d..81c793a62 100644 --- a/tests/writer.opendocument +++ b/tests/writer.opendocument @@ -1576,9 +1576,9 @@ link in pointy braces. Images From “Voyage dans la Lune” by Georges Melies (1902): - + Here is a movie - + icon. Footnotes -- cgit v1.2.3 From 7f09c1834da9f87e7715f5c9dc52f4b730da8f3f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 30 Nov 2013 17:59:28 -0800 Subject: Markdown writer: Fix rendering of tight sublists. E.g. - foo - bar - baz Previously a spurious blank line was included before the last item. Closes #1050. --- src/Text/Pandoc/Writers/Markdown.hs | 9 ++++++++- tests/Tests/Writers/Markdown.hs | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index eefcd547a..60d474263 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -555,7 +555,14 @@ bulletListItemToMarkdown opts items = do contents <- blockListToMarkdown opts items let sps = replicate (writerTabStop opts - 2) ' ' let start = text ('-' : ' ' : sps) - return $ hang (writerTabStop opts) start $ contents <> cr + -- remove trailing blank line if it is a tight list + let contents' = case reverse items of + (BulletList xs:_) | isTightList xs -> + chomp contents <> cr + (OrderedList _ xs:_) | isTightList xs -> + chomp contents <> cr + _ -> contents + return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: WriterOptions -- ^ options diff --git a/tests/Tests/Writers/Markdown.hs b/tests/Tests/Writers/Markdown.hs index 99b85dfb7..c2a8f5903 100644 --- a/tests/Tests/Writers/Markdown.hs +++ b/tests/Tests/Writers/Markdown.hs @@ -31,4 +31,8 @@ tests :: [Test] tests = [ "indented code after list" =: (orderedList [ para "one" <> para "two" ] <> codeBlock "test") =?> "1. one\n\n two\n\n\n\n test" + , "list with tight sublist" + =: bulletList [ plain "foo" <> bulletList [ plain "bar" ], + plain "baz" ] + =?> "- foo\n - bar\n- baz\n" ] -- cgit v1.2.3 From fdaeec0c48d742489ddf0ec0c0261ca9c53f989b Mon Sep 17 00:00:00 2001 From: Jose Luis Duran Date: Mon, 2 Dec 2013 09:55:58 +0000 Subject: Add booktabs package for LaTeX tables [ci skip] --- README | 2 +- data/templates | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 10 ++--- tests/tables.latex | 90 ++++++++++++++++++++-------------------- 4 files changed, 52 insertions(+), 52 deletions(-) (limited to 'tests') diff --git a/README b/README index 2a5ecc6ad..3ba1e364e 100644 --- a/README +++ b/README @@ -108,7 +108,7 @@ to PDF: Production of a PDF requires that a LaTeX engine be installed (see `--latex-engine`, below), and assumes that the following LaTeX packages are available: `amssymb`, `amsmath`, `ifxetex`, `ifluatex`, `listings` (if the -`--listings` option is used), `fancyvrb`, `longtable`, `url`, +`--listings` option is used), `fancyvrb`, `longtable`, `booktabs`, `url`, `graphicx`, `hyperref`, `ulem`, `babel` (if the `lang` variable is set), `fontspec` (if `xelatex` or `lualatex` is used as the LaTeX engine), `xltxtra` and `xunicode` (if `xelatex` is used). diff --git a/data/templates b/data/templates index 713a8f63d..f643a076d 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 713a8f63d5589ab9313869e47b03cf7f49e00e98 +Subproject commit f643a076d8c2b0b21391fd6aa1dedb2dd84c7e63 diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f3cbcf19f..a2e0b016f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -453,12 +453,12 @@ blockToLaTeX (Header level (id',classes,_) lst) = blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else ($$ "\\hline\\noalign{\\medskip}") `fmap` + else ($$ "\\midrule\\endhead") `fmap` (tableRowToLaTeX True aligns widths) heads captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\noalign{\\medskip}" + else text "\\addlinespace" $$ text "\\caption" <> braces captionText rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns @@ -466,10 +466,10 @@ blockToLaTeX (Table caption aligns widths heads rows) = do return $ "\\begin{longtable}[c]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end - $$ "\\hline\\noalign{\\medskip}" + $$ "\\toprule\\addlinespace" $$ headers $$ vcat rows' - $$ "\\hline" + $$ "\\bottomrule" $$ capt $$ "\\end{longtable}" @@ -506,7 +506,7 @@ tableRowToLaTeX header aligns widths cols = do (w * scaleFactor))) <> (halign a <> cr <> c <> cr) <> "\\end{minipage}" let cells = zipWith3 toCell widths aligns renderedCells - return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}" + return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace" listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . diff --git a/tests/tables.latex b/tests/tables.latex index c27e10461..1a87c4f71 100644 --- a/tests/tables.latex +++ b/tests/tables.latex @@ -1,59 +1,59 @@ Simple table with caption: \begin{longtable}[c]{@{}rlcl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace Right & Left & Center & Default -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead 12 & 12 & 12 & 12 -\\\noalign{\medskip} +\\\addlinespace 123 & 123 & 123 & 123 -\\\noalign{\medskip} +\\\addlinespace 1 & 1 & 1 & 1 -\\\noalign{\medskip} -\hline -\noalign{\medskip} +\\\addlinespace +\bottomrule +\addlinespace \caption{Demonstration of simple table syntax.} \end{longtable} Simple table without caption: \begin{longtable}[c]{@{}rlcl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace Right & Left & Center & Default -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead 12 & 12 & 12 & 12 -\\\noalign{\medskip} +\\\addlinespace 123 & 123 & 123 & 123 -\\\noalign{\medskip} +\\\addlinespace 1 & 1 & 1 & 1 -\\\noalign{\medskip} -\hline +\\\addlinespace +\bottomrule \end{longtable} Simple table indented two spaces: \begin{longtable}[c]{@{}rlcl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace Right & Left & Center & Default -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead 12 & 12 & 12 & 12 -\\\noalign{\medskip} +\\\addlinespace 123 & 123 & 123 & 123 -\\\noalign{\medskip} +\\\addlinespace 1 & 1 & 1 & 1 -\\\noalign{\medskip} -\hline -\noalign{\medskip} +\\\addlinespace +\bottomrule +\addlinespace \caption{Demonstration of simple table syntax.} \end{longtable} Multiline table with caption: \begin{longtable}[c]{@{}clrl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace \begin{minipage}[b]{0.13\columnwidth}\centering Centered Header \end{minipage} & \begin{minipage}[b]{0.12\columnwidth}\raggedright @@ -63,8 +63,8 @@ Right Aligned \end{minipage} & \begin{minipage}[b]{0.30\columnwidth}\raggedright Default aligned \end{minipage} -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead \begin{minipage}[t]{0.13\columnwidth}\centering First \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -74,7 +74,7 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} -\\\noalign{\medskip} +\\\addlinespace \begin{minipage}[t]{0.13\columnwidth}\centering Second \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -84,16 +84,16 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} -\\\noalign{\medskip} -\hline -\noalign{\medskip} +\\\addlinespace +\bottomrule +\addlinespace \caption{Here's the caption. It may span multiple lines.} \end{longtable} Multiline table without caption: \begin{longtable}[c]{@{}clrl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace \begin{minipage}[b]{0.13\columnwidth}\centering Centered Header \end{minipage} & \begin{minipage}[b]{0.12\columnwidth}\raggedright @@ -103,8 +103,8 @@ Right Aligned \end{minipage} & \begin{minipage}[b]{0.30\columnwidth}\raggedright Default aligned \end{minipage} -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead \begin{minipage}[t]{0.13\columnwidth}\centering First \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -114,7 +114,7 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} -\\\noalign{\medskip} +\\\addlinespace \begin{minipage}[t]{0.13\columnwidth}\centering Second \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -124,27 +124,27 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} -\\\noalign{\medskip} -\hline +\\\addlinespace +\bottomrule \end{longtable} Table without column headers: \begin{longtable}[c]{@{}rlcr@{}} -\hline\noalign{\medskip} +\toprule\addlinespace 12 & 12 & 12 & 12 -\\\noalign{\medskip} +\\\addlinespace 123 & 123 & 123 & 123 -\\\noalign{\medskip} +\\\addlinespace 1 & 1 & 1 & 1 -\\\noalign{\medskip} -\hline +\\\addlinespace +\bottomrule \end{longtable} Multiline table without column headers: \begin{longtable}[c]{@{}clrl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace \begin{minipage}[t]{0.13\columnwidth}\centering First \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -154,7 +154,7 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} -\\\noalign{\medskip} +\\\addlinespace \begin{minipage}[t]{0.13\columnwidth}\centering Second \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -164,6 +164,6 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} -\\\noalign{\medskip} -\hline +\\\addlinespace +\bottomrule \end{longtable} -- cgit v1.2.3 From bc2c0fd443cf67046b6690400fa7a3049cf164fa Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Dec 2013 17:33:11 -0800 Subject: Small change to HTML reader tests. "$" is now a special character. --- tests/html-reader.native | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tests') diff --git a/tests/html-reader.native b/tests/html-reader.native index 15937e594..794512426 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -207,18 +207,18 @@ Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "T ,BulletList [[Plain [Str "\\cite[22",Str "-",Str "23]{smith",Str ".",Str "1899}"]] ,[Plain [Str "\\doublespacing"]] - ,[Plain [Str "$2+2=4$"]] - ,[Plain [Str "$x",Space,Str "\\in",Space,Str "y$"]] - ,[Plain [Str "$\\alpha",Space,Str "\\wedge",Space,Str "\\omega$"]] - ,[Plain [Str "$223$"]] - ,[Plain [Str "$p$",Str "-",Str "Tree"]] - ,[Plain [Str "$\\frac{d}{dx}f(x)=\\lim_{h\\to",Space,Str "0}\\frac{f(x+h)",Str "-",Str "f(x)}{h}$"]] - ,[Plain [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "that",Space,Str "has",Space,Str "a",Space,Str "line",Space,Str "break",Space,Str "in",Space,Str "it:",Space,Str "$\\alpha",Space,Str "+",Space,Str "\\omega",Space,Str "\\times",Space,Str "x^2$",Str "."]]] + ,[Plain [Str "$",Str "2+2=4",Str "$"]] + ,[Plain [Str "$",Str "x",Space,Str "\\in",Space,Str "y",Str "$"]] + ,[Plain [Str "$",Str "\\alpha",Space,Str "\\wedge",Space,Str "\\omega",Str "$"]] + ,[Plain [Str "$",Str "223",Str "$"]] + ,[Plain [Str "$",Str "p",Str "$",Str "-",Str "Tree"]] + ,[Plain [Str "$",Str "\\frac{d}{dx}f(x)=\\lim_{h\\to",Space,Str "0}\\frac{f(x+h)",Str "-",Str "f(x)}{h}",Str "$"]] + ,[Plain [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "that",Space,Str "has",Space,Str "a",Space,Str "line",Space,Str "break",Space,Str "in",Space,Str "it:",Space,Str "$",Str "\\alpha",Space,Str "+",Space,Str "\\omega",Space,Str "\\times",Space,Str "x^2",Str "$",Str "."]]] ,Para [Str "These",Space,Str "shouldn",Str "'",Str "t",Space,Str "be",Space,Str "math:"] ,BulletList [[Plain [Str "To",Space,Str "get",Space,Str "the",Space,Str "famous",Space,Str "equation,",Space,Str "write",Space,Code ("",[],[]) "$e = mc^2$",Str "."]] - ,[Plain [Str "$22,000",Space,Str "is",Space,Str "a",Space,Emph [Str "lot"],Space,Str "of",Space,Str "money",Str ".",Space,Str "So",Space,Str "is",Space,Str "$34,000",Str ".",Space,Str "(It",Space,Str "worked",Space,Str "if",Space,Str "\"",Str "lot",Str "\"",Space,Str "is",Space,Str "emphasized",Str ".",Str ")"]] - ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23$",Str "."]]] + ,[Plain [Str "$",Str "22,000",Space,Str "is",Space,Str "a",Space,Emph [Str "lot"],Space,Str "of",Space,Str "money",Str ".",Space,Str "So",Space,Str "is",Space,Str "$",Str "34,000",Str ".",Space,Str "(It",Space,Str "worked",Space,Str "if",Space,Str "\"",Str "lot",Str "\"",Space,Str "is",Space,Str "emphasized",Str ".",Str ")"]] + ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$",Str "73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23",Str "$",Str "."]]] ,Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] ,Para [Str "\\begin{tabular}{|l|l|}\\hline",Space,Str "Animal",Space,Str "&",Space,Str "Number",Space,Str "\\\\",Space,Str "\\hline",Space,Str "Dog",Space,Str "&",Space,Str "2",Space,Str "\\\\",Space,Str "Cat",Space,Str "&",Space,Str "1",Space,Str "\\\\",Space,Str "\\hline",Space,Str "\\end{tabular}"] ,HorizontalRule -- cgit v1.2.3 From f9662957704ebfe83d7764dd64151d37c476c0b0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 9 Dec 2013 20:31:47 -0800 Subject: Don't use tilde code blocks with braced attributes in gfm output. A consequence of this change is that the backtick form will be preferred in general if both are enabled. I think that is good, as it is much more widespread than the tilde form. Closes #1084. --- src/Text/Pandoc/Writers/Markdown.hs | 18 +++++++++--------- tests/lhs-test.markdown | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 60d474263..c0b189b75 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -381,13 +381,11 @@ blockToMarkdown opts (CodeBlock (_,classes,_) str) isEnabled Ext_literate_haskell opts = return $ prefixed "> " (text str) <> blankline blockToMarkdown opts (CodeBlock attribs str) = return $ - case attribs of - x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts -> - tildes <> " " <> attrs <> cr <> text str <> - cr <> tildes <> blankline - (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts -> - backticks <> " " <> text cls <> cr <> text str <> - cr <> backticks <> blankline + case attribs == nullAttr of + False | isEnabled Ext_backtick_code_blocks opts -> + backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline + | isEnabled Ext_fenced_code_blocks opts -> + tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline _ -> nest (writerTabStop opts) (text str) <> blankline where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of [] -> "~~~~" @@ -396,8 +394,10 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ | otherwise -> replicate (n+1) '~' backticks = text "```" attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ attrsToMarkdown attribs - else empty + then nowrap $ " " <> attrsToMarkdown attribs + else case attribs of + (_,[cls],_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks diff --git a/tests/lhs-test.markdown b/tests/lhs-test.markdown index 47ec920d3..75a253bf4 100644 --- a/tests/lhs-test.markdown +++ b/tests/lhs-test.markdown @@ -4,11 +4,11 @@ lhs test `unsplit` is an arrow that takes a pair of values and combines them to return a single value: -~~~~ {.sourceCode .literate .haskell} +``` {.sourceCode .literate .haskell} unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d unsplit = arr . uncurry -- arr (\op (x,y) -> x `op` y) -~~~~ +``` `(***)` combines two arrows into a new arrow by running the two arrows on a pair of values (one arrow on the first item of the pair and one arrow on the -- cgit v1.2.3 From c35f5ba42df094cef5f69a191315385a0a1e12b0 Mon Sep 17 00:00:00 2001 From: Henry de Valence Date: Thu, 19 Dec 2013 20:28:53 -0500 Subject: HLint: Remove lambdas. --- src/Text/Pandoc/Writers/Shared.hs | 3 +-- tests/Tests/Walk.hs | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 9cb08803c..33091ea94 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -65,8 +65,7 @@ metaToJSON opts blockWriter inlineWriter (Meta metamap) renderedMap <- Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap - return $ M.foldWithKey (\key val obj -> defField key val obj) - baseContext renderedMap + return $ M.foldWithKey defField baseContext renderedMap | otherwise = return (Object H.empty) metaValueToJSON :: Monad m diff --git a/tests/Tests/Walk.hs b/tests/Tests/Walk.hs index f6aa1beae..34350e28a 100644 --- a/tests/Tests/Walk.hs +++ b/tests/Tests/Walk.hs @@ -21,11 +21,11 @@ tests = [ testGroup "Walk" p_walk :: (Typeable a, Walkable a Pandoc) => (a -> a) -> Pandoc -> Bool -p_walk f = (\(d :: Pandoc) -> everywhere (mkT f) d == walk f d) +p_walk f d = everywhere (mkT f) d == walk f d p_query :: (Eq a, Typeable a1, Monoid a, Walkable a1 Pandoc) => (a1 -> a) -> Pandoc -> Bool -p_query f = (\(d :: Pandoc) -> everything mappend (mempty `mkQ` f) d == query f d) +p_query f d = everything mappend (mempty `mkQ` f) d == query f d inlineTrans :: Inline -> Inline inlineTrans (Str xs) = Str $ map toUpper xs -- cgit v1.2.3 From f6d151889c8fff303be8ee8a4f9be67a04de9210 Mon Sep 17 00:00:00 2001 From: Henry de Valence Date: Thu, 19 Dec 2013 20:43:25 -0500 Subject: HLint: redundant parens Remove parens enclosing a single element. --- pandoc.hs | 6 ++---- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Readers/Textile.hs | 2 +- src/Text/Pandoc/Writers/HTML.hs | 2 +- src/Text/Pandoc/Writers/MediaWiki.hs | 2 +- src/Text/Pandoc/Writers/Textile.hs | 2 +- tests/Tests/Readers/LaTeX.hs | 2 +- tests/Tests/Readers/Markdown.hs | 4 ++-- 8 files changed, 10 insertions(+), 12 deletions(-) (limited to 'tests') diff --git a/pandoc.hs b/pandoc.hs index ccd3e57fb..574c89771 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1034,12 +1034,10 @@ main = do variables' <- case mathMethod of LaTeXMathML Nothing -> do - s <- readDataFileUTF8 datadir - ("LaTeXMathML.js") + s <- readDataFileUTF8 datadir "LaTeXMathML.js" return $ ("mathml-script", s) : variables MathML Nothing -> do - s <- readDataFileUTF8 datadir - ("MathMLinHTML.js") + s <- readDataFileUTF8 datadir "MathMLinHTML.js" return $ ("mathml-script", s) : variables _ -> return variables diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index f483ab059..166c524ef 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -730,7 +730,7 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ()) listLine :: MarkdownParser String listLine = try $ do notFollowedBy' (do indentSpaces - many (spaceChar) + many spaceChar listStart) notFollowedBy' $ htmlTag (~== TagClose "div") chunks <- manyTill diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 23e07f621..93658cdea 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -594,7 +594,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> ([Inline] -> Inline) -- ^ Inline constructor -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) -simpleInline border construct = surrounded border (inlineWithAttribute) >>= +simpleInline border construct = surrounded border inlineWithAttribute >>= return . construct . normalizeSpaces where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 641652276..2c6435457 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -524,7 +524,7 @@ blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- if null term then return mempty - else liftM (H.dt) $ inlineListToHtml opts term + else liftM H.dt $ inlineListToHtml opts term defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 61741a61e..83fefaa29 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -51,7 +51,7 @@ data WriterState = WriterState { writeMediaWiki :: WriterOptions -> Pandoc -> String writeMediaWiki opts document = evalState (pandocToMediaWiki opts document) - (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) + WriterState { stNotes = False, stListLevel = [], stUseTags = False } -- | Return MediaWiki representation of document. pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 7c102cc86..95aedf780 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -51,7 +51,7 @@ data WriterState = WriterState { writeTextile :: WriterOptions -> Pandoc -> String writeTextile opts document = evalState (pandocToTextile opts document) - (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) + WriterState { stNotes = [], stListLevel = [], stUseTags = False } -- | Return Textile representation of document. pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index c1efd1b68..8ff23ebc1 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -21,7 +21,7 @@ tests = [ testGroup "basic" [ "simple" =: "word" =?> para "word" , "space" =: - "some text" =?> para ("some text") + "some text" =?> para "some text" , "emphasized" =: "\\emph{emphasized}" =?> para (emph "emphasized") ] diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index b04ff9a0d..492680a35 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -171,13 +171,13 @@ tests = [ testGroup "inline code" , testGroup "smart punctuation" [ test markdownSmart "quote before ellipses" ("'...hi'" - =?> para (singleQuoted ("…hi"))) + =?> para (singleQuoted "…hi")) , test markdownSmart "apostrophe before emph" ("D'oh! A l'*aide*!" =?> para ("D’oh! A l’" <> emph "aide" <> "!")) , test markdownSmart "apostrophe in French" ("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»" - =?> para ("À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»")) + =?> para "À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»") ] , testGroup "mixed emphasis and strong" [ "emph and strong emph alternating" =: -- cgit v1.2.3 From 8e79bbde1e11399d5fac1fa5ae4c83bb1d3b4fc7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 27 Dec 2013 13:36:28 -0800 Subject: Removed old MarkdownTest_1.0.3 directory (not currently used). Closes #1104. --- tests/MarkdownTest_1.0.3/MarkdownTest.pl | 176 ---- .../Tests/Amps and angle encoding.html | 17 - .../Tests/Amps and angle encoding.text | 21 - tests/MarkdownTest_1.0.3/Tests/Auto links.html | 18 - tests/MarkdownTest_1.0.3/Tests/Auto links.text | 13 - .../Tests/Backslash escapes.html | 118 --- .../Tests/Backslash escapes.text | 120 --- .../Tests/Blockquotes with code blocks.html | 15 - .../Tests/Blockquotes with code blocks.text | 11 - tests/MarkdownTest_1.0.3/Tests/Code Blocks.html | 18 - tests/MarkdownTest_1.0.3/Tests/Code Blocks.text | 14 - tests/MarkdownTest_1.0.3/Tests/Code Spans.html | 6 - tests/MarkdownTest_1.0.3/Tests/Code Spans.text | 6 - ...rd-wrapped paragraphs with list-like lines.html | 8 - ...rd-wrapped paragraphs with list-like lines.text | 8 - .../MarkdownTest_1.0.3/Tests/Horizontal rules.html | 71 -- .../MarkdownTest_1.0.3/Tests/Horizontal rules.text | 67 -- .../Tests/Inline HTML (Advanced).html | 15 - .../Tests/Inline HTML (Advanced).text | 15 - .../Tests/Inline HTML (Simple).html | 72 -- .../Tests/Inline HTML (Simple).text | 69 -- .../Tests/Inline HTML comments.html | 13 - .../Tests/Inline HTML comments.text | 13 - .../Tests/Links, inline style.html | 11 - .../Tests/Links, inline style.text | 12 - .../Tests/Links, reference style.html | 52 -- .../Tests/Links, reference style.text | 71 -- .../Tests/Links, shortcut references.html | 9 - .../Tests/Links, shortcut references.text | 20 - .../Tests/Literal quotes in titles.html | 3 - .../Tests/Literal quotes in titles.text | 7 - .../Tests/Markdown Documentation - Basics.html | 314 ------- .../Tests/Markdown Documentation - Basics.text | 306 ------- .../Tests/Markdown Documentation - Syntax.html | 942 --------------------- .../Tests/Markdown Documentation - Syntax.text | 888 ------------------- .../Tests/Nested blockquotes.html | 9 - .../Tests/Nested blockquotes.text | 5 - .../Tests/Ordered and unordered lists.html | 148 ---- .../Tests/Ordered and unordered lists.text | 131 --- .../Tests/Strong and em together.html | 7 - .../Tests/Strong and em together.text | 7 - tests/MarkdownTest_1.0.3/Tests/Tabs.html | 25 - tests/MarkdownTest_1.0.3/Tests/Tabs.text | 21 - tests/MarkdownTest_1.0.3/Tests/Tidyness.html | 8 - tests/MarkdownTest_1.0.3/Tests/Tidyness.text | 5 - 45 files changed, 3905 deletions(-) delete mode 100755 tests/MarkdownTest_1.0.3/MarkdownTest.pl delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Amps and angle encoding.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Amps and angle encoding.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Auto links.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Auto links.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Backslash escapes.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Backslash escapes.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Code Blocks.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Code Blocks.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Code Spans.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Code Spans.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Horizontal rules.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Horizontal rules.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Inline HTML (Simple).html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Inline HTML (Simple).text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Inline HTML comments.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Inline HTML comments.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Links, inline style.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Links, inline style.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Links, reference style.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Links, reference style.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Links, shortcut references.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Links, shortcut references.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Literal quotes in titles.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Literal quotes in titles.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Nested blockquotes.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Nested blockquotes.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Ordered and unordered lists.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Ordered and unordered lists.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Strong and em together.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Strong and em together.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Tabs.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Tabs.text delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Tidyness.html delete mode 100644 tests/MarkdownTest_1.0.3/Tests/Tidyness.text (limited to 'tests') diff --git a/tests/MarkdownTest_1.0.3/MarkdownTest.pl b/tests/MarkdownTest_1.0.3/MarkdownTest.pl deleted file mode 100755 index 55553d09c..000000000 --- a/tests/MarkdownTest_1.0.3/MarkdownTest.pl +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/perl - -# -# MarkdownTester -- Run tests for Markdown implementations -# -# Copyright (c) 2004-2005 John Gruber -# -# - -use strict; -use warnings; -use Getopt::Long; -use Benchmark; - -our $VERSION = '1.0.2'; -# Sat 24 Dec 2005 - -my $time_start = new Benchmark; -my $test_dir = "Tests"; -my $script = "./Markdown.pl"; -my $use_tidy = 0; -my ($flag_version); - -GetOptions ( - "script=s" => \$script, - "testdir=s" => \$test_dir, - "tidy" => \$use_tidy, - "version" => \$flag_version, - ); - -if($flag_version) { - my $progname = $0; - $progname =~ s{.*/}{}; - die "$progname version $VERSION\n"; -} - -unless (-d $test_dir) { die "'$test_dir' is not a directory.\n"; } -unless (-f $script) { die "$script does not exist.\n"; } -unless (-x $script) { die "$script is not executable.\n"; } - -my $tests_passed = 0; -my $tests_failed = 0; - -TEST: -foreach my $testfile (glob "$test_dir/*.text") { - my $testname = $testfile; - $testname =~ s{.*/(.+)\.text$}{$1}i; - print "$testname ... "; - - # Look for a corresponding .html file for each .text file: - my $resultfile = $testfile; - $resultfile =~ s{\.text$}{\.html}i; - unless (-f $resultfile) { - print "'$resultfile' does not exist.\n\n"; - next TEST; - } - - # open(TEST, $testfile) || die("Can't open testfile: $!"); - open(RESULT, $resultfile) || die("Can't open resultfile: $!"); - undef $/; - # my $t_input = ; - my $t_result = ; - - my $t_output = `'$script' '$testfile'`; - - # Normalize the output and expected result strings: - $t_result =~ s/\s+\z//; # trim trailing whitespace - $t_output =~ s/\s+\z//; # trim trailing whitespace - if ($use_tidy) { - # Escape the strings, pass them through to CLI tidy tool for tag-level equivalency - $t_result =~ s{'}{'\\''}g; # escape ' chars for shell - $t_output =~ s{'}{'\\''}g; - $t_result = `echo '$t_result' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`; - $t_output = `echo '$t_output' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`; - } - - if ($t_output eq $t_result) { - print "OK\n"; - $tests_passed++; - } - else { - print "FAILED\n\n"; -# This part added by JM to print diffs - open(OUT, '>tmp1') or die $!; - print OUT $t_output or die $!; - open(RES, '>tmp2') or die $!; - print RES $t_result or die $!; - print `diff tmp1 tmp2`; - close RES; - close OUT; - print "\n"; - `rm tmp?`; -# End of added part - $tests_failed++; - } -} - -print "\n\n"; -print "$tests_passed passed; $tests_failed failed.\n"; - -my $time_end = new Benchmark; -my $time_diff = timediff($time_end, $time_start); -print "Benchmark: ", timestr($time_diff), "\n"; - - -__END__ - -=pod - -=head1 NAME - -B - - -=head1 SYNOPSIS - -B [ B<--options> ] [ I ... ] - - -=head1 DESCRIPTION - - -=head1 OPTIONS - -Use "--" to end switch parsing. For example, to open a file named "-z", use: - - MarkdownTest.pl -- -z - -=over 4 - -=item B<--script> - -Specify the path to the Markdown script to test. Defaults to -"./Markdown.pl". Example: - - ./MarkdownTest.pl --script ./PHP-Markdown/php-markdown - -=item B<--testdir> - -Specify the path to a directory containing test data. Defaults to "Tests". - -=item B<--tidy> - -Flag to turn on using the command line 'tidy' tool to normalize HTML -output before comparing script output to the expected test result. -Assumes that the 'tidy' command is available in your PATH. Defaults to -off. - -=back - - - -=head1 BUGS - - - -=head1 VERSION HISTORY - -1.0 Mon 13 Dec 2004-2005 - -1.0.1 Mon 19 Sep 2005 - - + Better handling of case when foo.text exists, but foo.html doesn't. - It now prints a message and moves on, rather than dying. - - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2004-2005 John Gruber - -All rights reserved. - -This is free software; you may redistribute it and/or modify it under -the same terms as Perl itself. - -=cut diff --git a/tests/MarkdownTest_1.0.3/Tests/Amps and angle encoding.html b/tests/MarkdownTest_1.0.3/Tests/Amps and angle encoding.html deleted file mode 100644 index 9606860b6..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Amps and angle encoding.html +++ /dev/null @@ -1,17 +0,0 @@ -

AT&T has an ampersand in their name.

- -

AT&T is another way to write it.

- -

This & that.

- -

4 < 5.

- -

6 > 5.

- -

Here's a link with an ampersand in the URL.

- -

Here's a link with an amersand in the link text: AT&T.

- -

Here's an inline link.

- -

Here's an inline link.

diff --git a/tests/MarkdownTest_1.0.3/Tests/Amps and angle encoding.text b/tests/MarkdownTest_1.0.3/Tests/Amps and angle encoding.text deleted file mode 100644 index 0e9527f93..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Amps and angle encoding.text +++ /dev/null @@ -1,21 +0,0 @@ -AT&T has an ampersand in their name. - -AT&T is another way to write it. - -This & that. - -4 < 5. - -6 > 5. - -Here's a [link] [1] with an ampersand in the URL. - -Here's a link with an amersand in the link text: [AT&T] [2]. - -Here's an inline [link](/script?foo=1&bar=2). - -Here's an inline [link](). - - -[1]: http://example.com/?foo=1&bar=2 -[2]: http://att.com/ "AT&T" \ No newline at end of file diff --git a/tests/MarkdownTest_1.0.3/Tests/Auto links.html b/tests/MarkdownTest_1.0.3/Tests/Auto links.html deleted file mode 100644 index f8df9852c..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Auto links.html +++ /dev/null @@ -1,18 +0,0 @@ -

Link: http://example.com/.

- -

With an ampersand: http://example.com/?foo=1&bar=2

- -

In a list?
http://example.com/
It should.

- -

-
Blockquoted: http://example.com/
-

- -

Auto-links should not occur here: <http://example.com/>

- -

or here: <http://example.com/>
-

diff --git a/tests/MarkdownTest_1.0.3/Tests/Auto links.text b/tests/MarkdownTest_1.0.3/Tests/Auto links.text deleted file mode 100644 index abbc48869..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Auto links.text +++ /dev/null @@ -1,13 +0,0 @@ -Link: . - -With an ampersand: - -* In a list? -* -* It should. - -> Blockquoted: - -Auto-links should not occur here: `` - - or here: \ No newline at end of file diff --git a/tests/MarkdownTest_1.0.3/Tests/Backslash escapes.html b/tests/MarkdownTest_1.0.3/Tests/Backslash escapes.html deleted file mode 100644 index 29870dac5..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Backslash escapes.html +++ /dev/null @@ -1,118 +0,0 @@ -

These should all get escaped:

- -

Backslash: \

- -

Backtick: `

- -

Asterisk: *

- -

Underscore: _

- -

Left brace: {

- -

Right brace: }

- -

Left bracket: [

- -

Right bracket: ]

- -

Left paren: (

- -

Right paren: )

- -

Greater-than: >

- -

Hash: #

- -

Period: .

- -

Bang: !

- -

Plus: +

- -

Minus: -

- -

These should not, because they occur within a code block:

- -

Backslash: \\
-
-Backtick: \`
-
-Asterisk: \*
-
-Underscore: \_
-
-Left brace: \{
-
-Right brace: \}
-
-Left bracket: \[
-
-Right bracket: \]
-
-Left paren: \(
-
-Right paren: \)
-
-Greater-than: \>
-
-Hash: \#
-
-Period: \.
-
-Bang: \!
-
-Plus: \+
-
-Minus: \-
-

- -

Nor should these, which occur in code spans:

- -

Backslash: \\

- -

Backtick: \`

- -

Asterisk: \*

- -

Underscore: \_

- -

Left brace: \{

- -

Right brace: \}

- -

Left bracket: \[

- -

Right bracket: \]

- -

Left paren: \(

- -

Right paren: \)

- -

Greater-than: \>

- -

Hash: \#

- -

Period: \.

- -

Bang: \!

- -

Plus: \+

- -

Minus: \-

- - -

These should get escaped, even though they're matching pairs for -other Markdown constructs:

- -

*asterisks*

- -

_underscores_

- -

`backticks`

- -

This is a code span with a literal backslash-backtick sequence: \`

- -

This is a tag with unescaped backticks bar.

- -

This is a tag with backslashes bar.

diff --git a/tests/MarkdownTest_1.0.3/Tests/Backslash escapes.text b/tests/MarkdownTest_1.0.3/Tests/Backslash escapes.text deleted file mode 100644 index 5b014cb33..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Backslash escapes.text +++ /dev/null @@ -1,120 +0,0 @@ -These should all get escaped: - -Backslash: \\ - -Backtick: \` - -Asterisk: \* - -Underscore: \_ - -Left brace: \{ - -Right brace: \} - -Left bracket: \[ - -Right bracket: \] - -Left paren: $ - -Right paren: $ - -Greater-than: \> - -Hash: \# - -Period: \. - -Bang: \! - -Plus: \+ - -Minus: \- - - - -These should not, because they occur within a code block: - - Backslash: \\ - - Backtick: \` - - Asterisk: \* - - Underscore: \_ - - Left brace: \{ - - Right brace: \} - - Left bracket: \[ - - Right bracket: \] - - Left paren: $ - - Right paren: $ - - Greater-than: \> - - Hash: \# - - Period: \. - - Bang: \! - - Plus: \+ - - Minus: \- - - -Nor should these, which occur in code spans: - -Backslash: `\\` - -Backtick: `` \` `` - -Asterisk: `\*` - -Underscore: `\_` - -Left brace: `\{` - -Right brace: `\}` - -Left bracket: `\[` - -Right bracket: `\]` - -Left paren: `$` - -Right paren: `$` - -Greater-than: `\>` - -Hash: `\#` - -Period: `\.` - -Bang: `\!` - -Plus: `\+` - -Minus: `\-` - - -These should get escaped, even though they're matching pairs for -other Markdown constructs: - -\*asterisks\* - -\_underscores\_ - -\`backticks\` - -This is a code span with a literal backslash-backtick sequence: `` \` `` - -This is a tag with unescaped backticks bar. - -This is a tag with backslashes bar. diff --git a/tests/MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.html b/tests/MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.html deleted file mode 100644 index 990202a1b..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.html +++ /dev/null @@ -1,15 +0,0 @@ -

Example:

- -

sub status {
-    print "working";
-}
-

- -

Or:

- -

sub status {
-    return "working";
-}
-

diff --git a/tests/MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.text b/tests/MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.text deleted file mode 100644 index c31d17104..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.text +++ /dev/null @@ -1,11 +0,0 @@ -> Example: -> -> sub status { -> print "working"; -> } -> -> Or: -> -> sub status { -> return "working"; -> } diff --git a/tests/MarkdownTest_1.0.3/Tests/Code Blocks.html b/tests/MarkdownTest_1.0.3/Tests/Code Blocks.html deleted file mode 100644 index 32703f5cb..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Code Blocks.html +++ /dev/null @@ -1,18 +0,0 @@ -

code block on the first line
-

- -

Regular text.

- -

code block indented by spaces
-

- -

Regular text.

- -

the lines in this block  
-all contain trailing spaces  
-

- -

Regular Text.

- -

code block on the last line
-

diff --git a/tests/MarkdownTest_1.0.3/Tests/Code Blocks.text b/tests/MarkdownTest_1.0.3/Tests/Code Blocks.text deleted file mode 100644 index b54b09285..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Code Blocks.text +++ /dev/null @@ -1,14 +0,0 @@ - code block on the first line - -Regular text. - - code block indented by spaces - -Regular text. - - the lines in this block - all contain trailing spaces - -Regular Text. - - code block on the last line \ No newline at end of file diff --git a/tests/MarkdownTest_1.0.3/Tests/Code Spans.html b/tests/MarkdownTest_1.0.3/Tests/Code Spans.html deleted file mode 100644 index 4b8afbb70..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Code Spans.html +++ /dev/null @@ -1,6 +0,0 @@ -

<test a=" content of attribute ">

- -

Fix for backticks within HTML tag: like this

- -

Here's how you put `backticks` in a code span.

- diff --git a/tests/MarkdownTest_1.0.3/Tests/Code Spans.text b/tests/MarkdownTest_1.0.3/Tests/Code Spans.text deleted file mode 100644 index 750a1973d..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Code Spans.text +++ /dev/null @@ -1,6 +0,0 @@ -`` - -Fix for backticks within HTML tag: like this - -Here's how you put `` `backticks` `` in a code span. - diff --git a/tests/MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.html b/tests/MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.html deleted file mode 100644 index e21ac79a2..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.html +++ /dev/null @@ -1,8 +0,0 @@ -

In Markdown 1.0.0 and earlier. Version -8. This line turns into a list item. -Because a hard-wrapped line in the -middle of a paragraph looked like a -list item.

- -

Here's one with a bullet. -* criminey.

diff --git a/tests/MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.text b/tests/MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.text deleted file mode 100644 index f8a5b27bf..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.text +++ /dev/null @@ -1,8 +0,0 @@ -In Markdown 1.0.0 and earlier. Version -8. This line turns into a list item. -Because a hard-wrapped line in the -middle of a paragraph looked like a -list item. - -Here's one with a bullet. -* criminey. diff --git a/tests/MarkdownTest_1.0.3/Tests/Horizontal rules.html b/tests/MarkdownTest_1.0.3/Tests/Horizontal rules.html deleted file mode 100644 index 2dc2ab656..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Horizontal rules.html +++ /dev/null @@ -1,71 +0,0 @@ -

Dashes:

- -

---
-

- -

- - -
-

- -

Asterisks:

- -

***
-

- -

* * *
-

- -

Underscores:

- -

___
-

- -

_ _ _
-

diff --git a/tests/MarkdownTest_1.0.3/Tests/Horizontal rules.text b/tests/MarkdownTest_1.0.3/Tests/Horizontal rules.text deleted file mode 100644 index 1594bda27..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Horizontal rules.text +++ /dev/null @@ -1,67 +0,0 @@ -Dashes: - ---- - - --- - - --- - - --- - - --- - -- - - - - - - - - - - - - - - - - - - - - - - - - -Asterisks: - -*** - - *** - - *** - - *** - - *** - -* * * - - * * * - - * * * - - * * * - - * * * - - -Underscores: - -___ - - ___ - - ___ - - ___ - - ___ - -_ _ _ - - _ _ _ - - _ _ _ - - _ _ _ - - _ _ _ diff --git a/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).html b/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).html deleted file mode 100644 index 3af9cafb1..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).html +++ /dev/null @@ -1,15 +0,0 @@ -

Simple block on one line:

- -

foo

- -

And nested without indentation:

- -

-foo -

bar

diff --git a/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).text b/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).text deleted file mode 100644 index 86b7206d2..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).text +++ /dev/null @@ -1,15 +0,0 @@ -Simple block on one line: - -

foo

- -And nested without indentation: - -

-foo -

bar

diff --git a/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Simple).html b/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Simple).html deleted file mode 100644 index 6bf78f8fc..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Simple).html +++ /dev/null @@ -1,72 +0,0 @@ -

Here's a simple block:

- -

- foo -

- -

This should be a code block, though:

- -

<div>
-    foo
-</div>
-

- -

As should this:

- -

<div>foo</div>
-

- -

Now, nested:

- -

- foo -

- -

This should just be an HTML comment:

- - - -

Multiline:

- - - -

Code block:

- -

<!-- Comment -->
-

- -

Just plain comment, with trailing spaces on the line:

- - - -

Code:

- -

<hr />
-

- -

Hr's:

- -

diff --git a/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Simple).text b/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Simple).text deleted file mode 100644 index 14aa2dc27..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Inline HTML (Simple).text +++ /dev/null @@ -1,69 +0,0 @@ -Here's a simple block: - -

- foo -

- -This should be a code block, though: - -

- foo -

- -As should this: - -

foo

- -Now, nested: - -

- foo -

- -This should just be an HTML comment: - - - -Multiline: - - - -Code block: - - - -Just plain comment, with trailing spaces on the line: - - - -Code: - -

- -Hr's: - -

- -

- diff --git a/tests/MarkdownTest_1.0.3/Tests/Inline HTML comments.html b/tests/MarkdownTest_1.0.3/Tests/Inline HTML comments.html deleted file mode 100644 index 3f167a161..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Inline HTML comments.html +++ /dev/null @@ -1,13 +0,0 @@ -

Paragraph one.

- - - - - -

Paragraph two.

- - - -

The end.

diff --git a/tests/MarkdownTest_1.0.3/Tests/Inline HTML comments.text b/tests/MarkdownTest_1.0.3/Tests/Inline HTML comments.text deleted file mode 100644 index 41d830d03..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Inline HTML comments.text +++ /dev/null @@ -1,13 +0,0 @@ -Paragraph one. - - - - - -Paragraph two. - - - -The end. diff --git a/tests/MarkdownTest_1.0.3/Tests/Links, inline style.html b/tests/MarkdownTest_1.0.3/Tests/Links, inline style.html deleted file mode 100644 index f36607ddd..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Links, inline style.html +++ /dev/null @@ -1,11 +0,0 @@ -

Just a URL.

- -

- -

- -

- -

- -

diff --git a/tests/MarkdownTest_1.0.3/Tests/Links, inline style.text b/tests/MarkdownTest_1.0.3/Tests/Links, inline style.text deleted file mode 100644 index 09017a90c..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Links, inline style.text +++ /dev/null @@ -1,12 +0,0 @@ -Just a [URL](/url/). - -[URL and title](/url/ "title"). - -[URL and title](/url/ "title preceded by two spaces"). - -[URL and title](/url/ "title preceded by a tab"). - -[URL and title](/url/ "title has spaces afterward" ). - - -[Empty](). diff --git a/tests/MarkdownTest_1.0.3/Tests/Links, reference style.html b/tests/MarkdownTest_1.0.3/Tests/Links, reference style.html deleted file mode 100644 index 8e70c32f4..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Links, reference style.html +++ /dev/null @@ -1,52 +0,0 @@ -

Foo bar.

- -

Foo bar.

- -

Foo bar.

- -

With embedded [brackets].

- -

Indented once.

- -

Indented twice.

- -

Indented thrice.

- -

Indented [four][] times.

- -

[four]: /url
-

- -

this should work

- -

So should this.

- -

And this.

- -

And this.

- -

And this.

- -

But not [that] [].

- -

Nor [that][].

- -

Nor [that].

- -

[Something in brackets like this should work]

- -

[Same with this.]

- -

In this case, this points to something else.

- -

Backslashing should suppress [this] and [this].

- -

Here's one where the link -breaks across lines.

- -

Here's another where the link -breaks across lines, but with a line-ending space.

diff --git a/tests/MarkdownTest_1.0.3/Tests/Links, reference style.text b/tests/MarkdownTest_1.0.3/Tests/Links, reference style.text deleted file mode 100644 index 341ec88e3..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Links, reference style.text +++ /dev/null @@ -1,71 +0,0 @@ -Foo [bar] [1]. - -Foo [bar][1]. - -Foo [bar] -[1]. - -[1]: /url/ "Title" - - -With [embedded [brackets]] [b]. - - -Indented [once][]. - -Indented [twice][]. - -Indented [thrice][]. - -Indented [four][] times. - - [once]: /url - - [twice]: /url - - [thrice]: /url - - [four]: /url - - -[b]: /url/ - -* * * - -[this] [this] should work - -So should [this][this]. - -And [this] []. - -And [this][]. - -And [this]. - -But not [that] []. - -Nor [that][]. - -Nor [that]. - -[Something in brackets like [this][] should work] - -[Same with [this].] - -In this case, [this](/somethingelse/) points to something else. - -Backslashing should suppress \[this] and [this\]. - -[this]: foo - - -* * * - -Here's one where the [link -breaks] across lines. - -Here's another where the [link -breaks] across lines, but with a line-ending space. - - -[link breaks]: /url/ diff --git a/tests/MarkdownTest_1.0.3/Tests/Links, shortcut references.html b/tests/MarkdownTest_1.0.3/Tests/Links, shortcut references.html deleted file mode 100644 index bf81e939f..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Links, shortcut references.html +++ /dev/null @@ -1,9 +0,0 @@ -

This is the simple case.

- -

This one has a line -break.

- -

This one has a line -break with a line-ending space.

- -

this and the other

diff --git a/tests/MarkdownTest_1.0.3/Tests/Links, shortcut references.text b/tests/MarkdownTest_1.0.3/Tests/Links, shortcut references.text deleted file mode 100644 index 8c44c98fe..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Links, shortcut references.text +++ /dev/null @@ -1,20 +0,0 @@ -This is the [simple case]. - -[simple case]: /simple - - - -This one has a [line -break]. - -This one has a [line -break] with a line-ending space. - -[line break]: /foo - - -[this] [that] and the [other] - -[this]: /this -[that]: /that -[other]: /other diff --git a/tests/MarkdownTest_1.0.3/Tests/Literal quotes in titles.html b/tests/MarkdownTest_1.0.3/Tests/Literal quotes in titles.html deleted file mode 100644 index 611c1ac61..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Literal quotes in titles.html +++ /dev/null @@ -1,3 +0,0 @@ -

Foo bar.

- -

Foo bar.

diff --git a/tests/MarkdownTest_1.0.3/Tests/Literal quotes in titles.text b/tests/MarkdownTest_1.0.3/Tests/Literal quotes in titles.text deleted file mode 100644 index 29d0e4235..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Literal quotes in titles.text +++ /dev/null @@ -1,7 +0,0 @@ -Foo [bar][]. - -Foo [bar](/url/ "Title with "quotes" inside"). - - - [bar]: /url/ "Title with "quotes" inside" - diff --git a/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.html b/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.html deleted file mode 100644 index d5bdbb29a..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.html +++ /dev/null @@ -1,314 +0,0 @@ -

Markdown: Basics

- -

Getting the Gist of Markdown's Formatting Syntax

- -

This page offers a brief overview of what it's like to use Markdown. -The syntax page provides complete, detailed documentation for -every feature, but Markdown should be very easy to pick up simply by -looking at a few examples of it in action. The examples on this page -are written in a before/after style, showing example syntax and the -HTML output produced by Markdown.

- -

It's also helpful to simply try Markdown out; the Dingus is a -web application that allows you type your own Markdown-formatted text -and translate it to XHTML.

- -

Note: This document is itself written using Markdown; you -can see the source for it by adding '.text' to the URL.

- -

Paragraphs, Headers, Blockquotes

- -

A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

- -

Markdown offers two styles of headers: Setext and atx. -Setext-style headers for <h1> and <h2> are created by -"underlining" with equal signs (=) and hyphens (-), respectively. -To create an atx-style header, you put 1-6 hash marks (#) at the -beginning of the line -- the number of hashes equals the resulting -HTML header level.

- -

Blockquotes are indicated using email-style '>' angle brackets.

- -

Markdown:

- -

A First Level Header
-====================
-
-A Second Level Header
----------------------
-
-Now is the time for all good men to come to
-the aid of their country. This is just a
-regular paragraph.
-
-The quick brown fox jumped over the lazy
-dog's back.
-
-### Header 3
-
-> This is a blockquote.
-> 
-> This is the second paragraph in the blockquote.
->
-> ## This is an H2 in a blockquote
-

- -

Output:

- -

<h1>A First Level Header</h1>
-
-<h2>A Second Level Header</h2>
-
-<p>Now is the time for all good men to come to
-the aid of their country. This is just a
-regular paragraph.</p>
-
-<p>The quick brown fox jumped over the lazy
-dog's back.</p>
-
-<h3>Header 3</h3>
-
-<blockquote>
-    <p>This is a blockquote.</p>
-
-    <p>This is the second paragraph in the blockquote.</p>
-
-    <h2>This is an H2 in a blockquote</h2>
-</blockquote>
-

- -

Phrase Emphasis

- -

Markdown uses asterisks and underscores to indicate spans of emphasis.

- -

Markdown:

- -

Some of these words *are emphasized*.
-Some of these words _are emphasized also_.
-
-Use two asterisks for **strong emphasis**.
-Or, if you prefer, __use two underscores instead__.
-

- -

Output:

- -

<p>Some of these words <em>are emphasized</em>.
-Some of these words <em>are emphasized also</em>.</p>
-
-<p>Use two asterisks for <strong>strong emphasis</strong>.
-Or, if you prefer, <strong>use two underscores instead</strong>.</p>
-

- -

Lists

- -

Unordered (bulleted) lists use asterisks, pluses, and hyphens (*, -+, and -) as list markers. These three markers are -interchangable; this:

- -

*   Candy.
-*   Gum.
-*   Booze.
-

- -

this:

- -

+   Candy.
-+   Gum.
-+   Booze.
-

- -

and this:

- -

-   Candy.
--   Gum.
--   Booze.
-

- -

all produce the same output:

- -

<ul>
-<li>Candy.</li>
-<li>Gum.</li>
-<li>Booze.</li>
-</ul>
-

- -

Ordered (numbered) lists use regular numbers, followed by periods, as -list markers:

- -

1.  Red
-2.  Green
-3.  Blue
-

- -

Output:

- -

<ol>
-<li>Red</li>
-<li>Green</li>
-<li>Blue</li>
-</ol>
-

- -

If you put blank lines between items, you'll get <p> tags for the -list item text. You can create multi-paragraph list items by indenting -the paragraphs by 4 spaces or 1 tab:

- -

*   A list item.
-
-    With multiple paragraphs.
-
-*   Another item in the list.
-

- -

Output:

- -

<ul>
-<li><p>A list item.</p>
-<p>With multiple paragraphs.</p></li>
-<li><p>Another item in the list.</p></li>
-</ul>
-

- -

Links

- -

Markdown supports two styles for creating links: inline and -reference. With both styles, you use square brackets to delimit the -text you want to turn into a link.

- -

Inline-style links use parentheses immediately after the link text. -For example:

- -

This is an [example link](http://example.com/).
-

- -

Output:

- -

<p>This is an <a href="http://example.com/">
-example link</a>.</p>
-

- -

Optionally, you may include a title attribute in the parentheses:

- -

This is an [example link](http://example.com/ "With a Title").
-

- -

Output:

- -

<p>This is an <a href="http://example.com/" title="With a Title">
-example link</a>.</p>
-

- -

Reference-style links allow you to refer to your links by names, which -you define elsewhere in your document:

- -

I get 10 times more traffic from [Google][1] than from
-[Yahoo][2] or [MSN][3].
-
-[1]: http://google.com/        "Google"
-[2]: http://search.yahoo.com/  "Yahoo Search"
-[3]: http://search.msn.com/    "MSN Search"
-

- -

Output:

- -

<p>I get 10 times more traffic from <a href="http://google.com/"
-title="Google">Google</a> than from <a href="http://search.yahoo.com/"
-title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
-title="MSN Search">MSN</a>.</p>
-

- -

The title attribute is optional. Link names may contain letters, -numbers and spaces, but are not case sensitive:

- -

I start my morning with a cup of coffee and
-[The New York Times][NY Times].
-
-[ny times]: http://www.nytimes.com/
-

- -

Output:

- -

<p>I start my morning with a cup of coffee and
-<a href="http://www.nytimes.com/">The New York Times</a>.</p>
-

- -

Images

- -

Image syntax is very much like link syntax.

- -

Inline (titles are optional):

- -

![alt text](/path/to/img.jpg "Title")
-

- -

Reference-style:

- -

![alt text][id]
-
-[id]: /path/to/img.jpg "Title"
-

- -

Both of the above examples produce the same output:

- -

<img src="/path/to/img.jpg" alt="alt text" title="Title" />
-

- -

Code

- -

In a regular paragraph, you can create code span by wrapping text in -backtick quotes. Any ampersands (&) and angle brackets (< or ->) will automatically be translated into HTML entities. This makes -it easy to use Markdown to write about HTML example code:

- -

I strongly recommend against using any `<blink>` tags.
-
-I wish SmartyPants used named entities like `&mdash;`
-instead of decimal-encoded entites like `&#8212;`.
-

- -

Output:

- -

<p>I strongly recommend against using any
-<code>&lt;blink&gt;</code> tags.</p>
-
-<p>I wish SmartyPants used named entities like
-<code>&amp;mdash;</code> instead of decimal-encoded
-entites like <code>&amp;#8212;</code>.</p>
-

- -

To specify an entire block of pre-formatted code, indent every line of -the block by 4 spaces or 1 tab. Just like with code spans, &, <, -and > characters will be escaped automatically.

- -

Markdown:

- -

If you want your page to validate under XHTML 1.0 Strict,
-you've got to put paragraph tags in your blockquotes:
-
-    <blockquote>
-        <p>For example.</p>
-    </blockquote>
-

- -

Output:

- -

<p>If you want your page to validate under XHTML 1.0 Strict,
-you've got to put paragraph tags in your blockquotes:</p>
-
-<pre><code>&lt;blockquote&gt;
-    &lt;p&gt;For example.&lt;/p&gt;
-&lt;/blockquote&gt;
-</code></pre>
-

diff --git a/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.text b/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.text deleted file mode 100644 index 486055ca7..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.text +++ /dev/null @@ -1,306 +0,0 @@ -Markdown: Basics -================ - -

- - -Getting the Gist of Markdown's Formatting Syntax ------------------------------------------------- - -This page offers a brief overview of what it's like to use Markdown. -The [syntax page] [s] provides complete, detailed documentation for -every feature, but Markdown should be very easy to pick up simply by -looking at a few examples of it in action. The examples on this page -are written in a before/after style, showing example syntax and the -HTML output produced by Markdown. - -It's also helpful to simply try Markdown out; the [Dingus] [d] is a -web application that allows you type your own Markdown-formatted text -and translate it to XHTML. - -**Note:** This document is itself written using Markdown; you -can [see the source for it by adding '.text' to the URL] [src]. - - [s]: /projects/markdown/syntax "Markdown Syntax" - [d]: /projects/markdown/dingus "Markdown Dingus" - [src]: /projects/markdown/basics.text - - -## Paragraphs, Headers, Blockquotes ## - -A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs. - -Markdown offers two styles of headers: *Setext* and *atx*. -Setext-style headers for `

` and `

` are created by -"underlining" with equal signs (`=`) and hyphens (`-`), respectively. -To create an atx-style header, you put 1-6 hash marks (`#`) at the -beginning of the line -- the number of hashes equals the resulting -HTML header level. - -Blockquotes are indicated using email-style '`>`' angle brackets. - -Markdown: - - A First Level Header - ==================== - - A Second Level Header - --------------------- - - Now is the time for all good men to come to - the aid of their country. This is just a - regular paragraph. - - The quick brown fox jumped over the lazy - dog's back. - - ### Header 3 - - > This is a blockquote. - > - > This is the second paragraph in the blockquote. - > - > ## This is an H2 in a blockquote - - -Output: - -

A First Level Header

- -

A Second Level Header

- -

Now is the time for all good men to come to - the aid of their country. This is just a - regular paragraph.

- -

The quick brown fox jumped over the lazy - dog's back.

- -

Header 3

- -

-
This is a blockquote.
- -
This is the second paragraph in the blockquote.
- -
This is an H2 in a blockquote
-

- - - -### Phrase Emphasis ### - -Markdown uses asterisks and underscores to indicate spans of emphasis. - -Markdown: - - Some of these words *are emphasized*. - Some of these words _are emphasized also_. - - Use two asterisks for **strong emphasis**. - Or, if you prefer, __use two underscores instead__. - -Output: - -

Some of these words are emphasized. - Some of these words are emphasized also.

- -

Use two asterisks for strong emphasis. - Or, if you prefer, use two underscores instead.

- - - -## Lists ## - -Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`, -`+`, and `-`) as list markers. These three markers are -interchangable; this: - - * Candy. - * Gum. - * Booze. - -this: - - + Candy. - + Gum. - + Booze. - -and this: - - - Candy. - - Gum. - - Booze. - -all produce the same output: - -

Candy.
Gum.
Booze.

- -Ordered (numbered) lists use regular numbers, followed by periods, as -list markers: - - 1. Red - 2. Green - 3. Blue - -Output: - -

Red
Green
Blue

- -If you put blank lines between items, you'll get `

` tags for the -list item text. You can create multi-paragraph list items by indenting -the paragraphs by 4 spaces or 1 tab: - - * A list item. - - With multiple paragraphs. - - * Another item in the list. - -Output: - -

A list item.
-
With multiple paragraphs.
Another item in the list.

- - - -### Links ### - -Markdown supports two styles for creating links: *inline* and -*reference*. With both styles, you use square brackets to delimit the -text you want to turn into a link. - -Inline-style links use parentheses immediately after the link text. -For example: - - This is an [example link](http://example.com/). - -Output: - -

This is an - example link.

- -Optionally, you may include a title attribute in the parentheses: - - This is an [example link](http://example.com/ "With a Title"). - -Output: - -

This is an - example link.

- -Reference-style links allow you to refer to your links by names, which -you define elsewhere in your document: - - I get 10 times more traffic from [Google][1] than from - [Yahoo][2] or [MSN][3]. - - [1]: http://google.com/ "Google" - [2]: http://search.yahoo.com/ "Yahoo Search" - [3]: http://search.msn.com/ "MSN Search" - -Output: - -

I get 10 times more traffic from Google than from Yahoo or MSN.

- -The title attribute is optional. Link names may contain letters, -numbers and spaces, but are *not* case sensitive: - - I start my morning with a cup of coffee and - [The New York Times][NY Times]. - - [ny times]: http://www.nytimes.com/ - -Output: - -

I start my morning with a cup of coffee and - The New York Times.

- - -### Images ### - -Image syntax is very much like link syntax. - -Inline (titles are optional): - - ![alt text](/path/to/img.jpg "Title") - -Reference-style: - - ![alt text][id] - - [id]: /path/to/img.jpg "Title" - -Both of the above examples produce the same output: - -

- - - -### Code ### - -In a regular paragraph, you can create code span by wrapping text in -backtick quotes. Any ampersands (`&`) and angle brackets (`<` or -`>`) will automatically be translated into HTML entities. This makes -it easy to use Markdown to write about HTML example code: - - I strongly recommend against using any `` tags. - - I wish SmartyPants used named entities like `—` - instead of decimal-encoded entites like `—`. - -Output: - -

I strongly recommend against using any - <blink> tags.

- -

I wish SmartyPants used named entities like - — instead of decimal-encoded - entites like —.

- - -To specify an entire block of pre-formatted code, indent every line of -the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`, -and `>` characters will be escaped automatically. - -Markdown: - - If you want your page to validate under XHTML 1.0 Strict, - you've got to put paragraph tags in your blockquotes: - -

-
For example.
-

- -Output: - -

If you want your page to validate under XHTML 1.0 Strict, - you've got to put paragraph tags in your blockquotes:

- -

<blockquote>
-        <p>For example.</p>
-    </blockquote>
-

diff --git a/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.html b/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.html deleted file mode 100644 index 5c01306cc..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.html +++ /dev/null @@ -1,942 +0,0 @@ -

Markdown: Syntax

- -

Overview -
Block Elements -
- Paragraphs and Line Breaks
- Headers
- Blockquotes
- Lists
- Code Blocks
- Horizontal Rules
Span Elements -
- Links
- Emphasis
- Code
- Images
Miscellaneous -
- Backslash Escapes
- Automatic Links

- -

Note: This document is itself written using Markdown; you -can see the source for it by adding '.text' to the URL.

- -

Overview

- -

Philosophy

- -

Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

- -

Readability, however, is emphasized above all else. A Markdown-formatted -document should be publishable as-is, as plain text, without looking -like it's been marked up with tags or formatting instructions. While -Markdown's syntax has been influenced by several existing text-to-HTML -filters -- including Setext, atx, Textile, reStructuredText, -Grutatext, and EtText -- the single biggest source of -inspiration for Markdown's syntax is the format of plain text email.

- -

To this end, Markdown's syntax is comprised entirely of punctuation -characters, which punctuation characters have been carefully chosen so -as to look like what they mean. E.g., asterisks around a word actually -look like *emphasis*. Markdown lists look like, well, lists. Even -blockquotes look like quoted passages of text, assuming you've ever -used email.

- -

Inline HTML

- -

Markdown's syntax is intended for one purpose: to be used as a -format for writing for the web.

- -

Markdown is not a replacement for HTML, or even close to it. Its -syntax is very small, corresponding only to a very small subset of -HTML tags. The idea is not to create a syntax that makes it easier -to insert HTML tags. In my opinion, HTML tags are already easy to -insert. The idea for Markdown is to make it easy to read, write, and -edit prose. HTML is a publishing format; Markdown is a writing -format. Thus, Markdown's formatting syntax only addresses issues that -can be conveyed in plain text.

- -

For any markup that is not covered by Markdown's syntax, you simply -use HTML itself. There's no need to preface it or delimit it to -indicate that you're switching from Markdown to HTML; you just use -the tags.

- -

The only restrictions are that block-level HTML elements -- e.g. <div>, -<table>, <pre>, <p>, etc. -- must be separated from surrounding -content by blank lines, and the start and end tags of the block should -not be indented with tabs or spaces. Markdown is smart enough not -to add extra (unwanted) <p> tags around HTML block-level tags.

- -

For example, to add an HTML table to a Markdown article:

- -

This is a regular paragraph.
-
-<table>
-    <tr>
-        <td>Foo</td>
-    </tr>
-</table>
-
-This is another regular paragraph.
-

- -

Note that Markdown formatting syntax is not processed within block-level -HTML tags. E.g., you can't use Markdown-style *emphasis* inside an -HTML block.

- -

Span-level HTML tags -- e.g. <span>, <cite>, or <del> -- can be -used anywhere in a Markdown paragraph, list item, or header. If you -want, you can even use HTML tags instead of Markdown formatting; e.g. if -you'd prefer to use HTML <a> or <img> tags instead of Markdown's -link or image syntax, go right ahead.

- -

Unlike block-level HTML tags, Markdown syntax is processed within -span-level tags.

- -

Automatic Escaping for Special Characters

- -

In HTML, there are two characters that demand special treatment: < -and &. Left angle brackets are used to start tags; ampersands are -used to denote HTML entities. If you want to use them as literal -characters, you must escape them as entities, e.g. <, and -&.

- -

Ampersands in particular are bedeviling for web writers. If you want to -write about 'AT&T', you need to write 'AT&T'. You even need to -escape ampersands within URLs. Thus, if you want to link to:

- -

http://images.google.com/images?num=30&q=larry+bird
-

- -

you need to encode the URL as:

- -

http://images.google.com/images?num=30&amp;q=larry+bird
-

- -

in your anchor tag href attribute. Needless to say, this is easy to -forget, and is probably the single most common source of HTML validation -errors in otherwise well-marked-up web sites.

- -

Markdown allows you to use these characters naturally, taking care of -all the necessary escaping for you. If you use an ampersand as part of -an HTML entity, it remains unchanged; otherwise it will be translated -into &.

- -

So, if you want to include a copyright symbol in your article, you can write:

- -

&copy;
-

- -

and Markdown will leave it alone. But if you write:

- -

AT&T
-

- -

Markdown will translate it to:

- -

AT&amp;T
-

- -

Similarly, because Markdown supports inline HTML, if you use -angle brackets as delimiters for HTML tags, Markdown will treat them as -such. But if you write:

- -

4 < 5
-

- -

Markdown will translate it to:

- -

4 &lt; 5
-

- -

However, inside Markdown code spans and blocks, angle brackets and -ampersands are always encoded automatically. This makes it easy to use -Markdown to write about HTML code. (As opposed to raw HTML, which is a -terrible format for writing about HTML syntax, because every single < -and & in your example code needs to be escaped.)

- -

Block Elements

- -

Paragraphs and Line Breaks

- -

A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

- -

The implication of the "one or more consecutive lines of text" rule is -that Markdown supports "hard-wrapped" text paragraphs. This differs -significantly from most other text-to-HTML formatters (including Movable -Type's "Convert Line Breaks" option) which translate every line break -character in a paragraph into a <br /> tag.

- -

When you do want to insert a <br /> break tag using Markdown, you -end a line with two or more spaces, then type return.

- -

Yes, this takes a tad more effort to create a <br />, but a simplistic -"every line break is a <br />" rule wouldn't work for Markdown. -Markdown's email-style blockquoting and multi-paragraph list items -work best -- and look better -- when you format them with hard breaks.

- - - -

Markdown supports two styles of headers, Setext and atx.

- -

Setext-style headers are "underlined" using equal signs (for first-level -headers) and dashes (for second-level headers). For example:

- -

This is an H1
-=============
-
-This is an H2
--------------
-

- -

Any number of underlining ='s or -'s will work.

- -

Atx-style headers use 1-6 hash characters at the start of the line, -corresponding to header levels 1-6. For example:

- -

# This is an H1
-
-## This is an H2
-
-###### This is an H6
-

- -

Optionally, you may "close" atx-style headers. This is purely -cosmetic -- you can use this if you think it looks better. The -closing hashes don't even need to match the number of hashes -used to open the header. (The number of opening hashes -determines the header level.) :

- -

# This is an H1 #
-
-## This is an H2 ##
-
-### This is an H3 ######
-

- -

Blockquotes

- -

Markdown uses email-style > characters for blockquoting. If you're -familiar with quoting passages of text in an email message, then you -know how to create a blockquote in Markdown. It looks best if you hard -wrap the text and put a > before every line:

- -

> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
-> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
-> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
-> 
-> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
-> id sem consectetuer libero luctus adipiscing.
-

- -

Markdown allows you to be lazy and only put the > before the first -line of a hard-wrapped paragraph:

- -

> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
-consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
-Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
-
-> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
-id sem consectetuer libero luctus adipiscing.
-

- -

Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by -adding additional levels of >:

- -

> This is the first level of quoting.
->
-> > This is nested blockquote.
->
-> Back to the first level.
-

- -

Blockquotes can contain other Markdown elements, including headers, lists, -and code blocks:

- -

> ## This is a header.
-> 
-> 1.   This is the first list item.
-> 2.   This is the second list item.
-> 
-> Here's some example code:
-> 
->     return shell_exec("echo $input | $markdown_script");
-

- -

Any decent text editor should make email-style quoting easy. For -example, with BBEdit, you can make a selection and choose Increase -Quote Level from the Text menu.

- -

Lists

- -

Markdown supports ordered (numbered) and unordered (bulleted) lists.

- -

Unordered lists use asterisks, pluses, and hyphens -- interchangably --- as list markers:

- -

*   Red
-*   Green
-*   Blue
-

- -

is equivalent to:

- -

+   Red
-+   Green
-+   Blue
-

- -

and:

- -

-   Red
--   Green
--   Blue
-

- -

Ordered lists use numbers followed by periods:

- -

1.  Bird
-2.  McHale
-3.  Parish
-

- -

It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is:

- -

<ol>
-<li>Bird</li>
-<li>McHale</li>
-<li>Parish</li>
-</ol>
-

- -

If you instead wrote the list in Markdown like this:

- -

1.  Bird
-1.  McHale
-1.  Parish
-

- -

or even:

- -

3. Bird
-1. McHale
-8. Parish
-

- -

you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to.

- -

If you do use lazy list numbering, however, you should still start the -list with the number 1. At some point in the future, Markdown may support -starting ordered lists at an arbitrary number.

- -

List markers typically start at the left margin, but may be indented by -up to three spaces. List markers must be followed by one or more spaces -or a tab.

- -

To make lists look nice, you can wrap items with hanging indents:

- -

*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
-    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
-    viverra nec, fringilla in, laoreet vitae, risus.
-*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
-    Suspendisse id sem consectetuer libero luctus adipiscing.
-

- -

But if you want to be lazy, you don't have to:

- -

*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
-Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
-viverra nec, fringilla in, laoreet vitae, risus.
-*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
-Suspendisse id sem consectetuer libero luctus adipiscing.
-

- -

If list items are separated by blank lines, Markdown will wrap the -items in <p> tags in the HTML output. For example, this input:

- -

*   Bird
-*   Magic
-

- -

will turn into:

- -

<ul>
-<li>Bird</li>
-<li>Magic</li>
-</ul>
-

- -

But this:

- -

*   Bird
-
-*   Magic
-

- -

will turn into:

- -

<ul>
-<li><p>Bird</p></li>
-<li><p>Magic</p></li>
-</ul>
-

- -

List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces -or one tab:

- -

1.  This is a list item with two paragraphs. Lorem ipsum dolor
-    sit amet, consectetuer adipiscing elit. Aliquam hendrerit
-    mi posuere lectus.
-
-    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
-    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
-    sit amet velit.
-
-2.  Suspendisse id sem consectetuer libero luctus adipiscing.
-

- -

It looks nice if you indent every line of the subsequent -paragraphs, but here again, Markdown will allow you to be -lazy:

- -

*   This is a list item with two paragraphs.
-
-    This is the second paragraph in the list item. You're
-only required to indent the first line. Lorem ipsum dolor
-sit amet, consectetuer adipiscing elit.
-
-*   Another item in the same list.
-

- -

To put a blockquote within a list item, the blockquote's > -delimiters need to be indented:

- -

*   A list item with a blockquote:
-
-    > This is a blockquote
-    > inside a list item.
-

- -

To put a code block within a list item, the code block needs -to be indented twice -- 8 spaces or two tabs:

- -

*   A list item with a code block:
-
-        <code goes here>
-

- -

It's worth noting that it's possible to trigger an ordered list by -accident, by writing something like this:

- -

1986. What a great season.
-

- -

In other words, a number-period-space sequence at the beginning of a -line. To avoid this, you can backslash-escape the period:

- -

1986\. What a great season.
-

- -

Code Blocks

- -

Pre-formatted code blocks are used for writing about programming or -markup source code. Rather than forming normal paragraphs, the lines -of a code block are interpreted literally. Markdown wraps a code block -in both <pre> and <code> tags.

- -

To produce a code block in Markdown, simply indent every line of the -block by at least 4 spaces or 1 tab. For example, given this input:

- -

This is a normal paragraph:
-
-    This is a code block.
-

- -

Markdown will generate:

- -

<p>This is a normal paragraph:</p>
-
-<pre><code>This is a code block.
-</code></pre>
-

- -

One level of indentation -- 4 spaces or 1 tab -- is removed from each -line of the code block. For example, this:

- -

Here is an example of AppleScript:
-
-    tell application "Foo"
-        beep
-    end tell
-

- -

will turn into:

- -

<p>Here is an example of AppleScript:</p>
-
-<pre><code>tell application "Foo"
-    beep
-end tell
-</code></pre>
-

- -

A code block continues until it reaches a line that is not indented -(or the end of the article).

- -

Within a code block, ampersands (&) and angle brackets (< and >) -are automatically converted into HTML entities. This makes it very -easy to include example HTML source code using Markdown -- just paste -it and indent it, and Markdown will handle the hassle of encoding the -ampersands and angle brackets. For example, this:

- -

    <div class="footer">
-        &copy; 2004 Foo Corporation
-    </div>
-

- -

will turn into:

- -

<pre><code>&lt;div class="footer"&gt;
-    &amp;copy; 2004 Foo Corporation
-&lt;/div&gt;
-</code></pre>
-

- -

Regular Markdown syntax is not processed within code blocks. E.g., -asterisks are just literal asterisks within a code block. This means -it's also easy to use Markdown to write about Markdown's own syntax.

- -

Horizontal Rules

- -

You can produce a horizontal rule tag (<hr />) by placing three or -more hyphens, asterisks, or underscores on a line by themselves. If you -wish, you may use spaces between the hyphens or asterisks. Each of the -following lines will produce a horizontal rule:

- -

* * *
-
-***
-
-*****
-
-- - -
-
----------------------------------------
-
-_ _ _
-

- -

Span Elements

- -

Links

- -

Markdown supports two style of links: inline and reference.

- -

In both styles, the link text is delimited by [square brackets].

- -

To create an inline link, use a set of regular parentheses immediately -after the link text's closing square bracket. Inside the parentheses, -put the URL where you want the link to point, along with an optional -title for the link, surrounded in quotes. For example:

- -

This is [an example](http://example.com/ "Title") inline link.
-
-[This link](http://example.net/) has no title attribute.
-

- -

Will produce:

- -

<p>This is <a href="http://example.com/" title="Title">
-an example</a> inline link.</p>
-
-<p><a href="http://example.net/">This link</a> has no
-title attribute.</p>
-

- -

If you're referring to a local resource on the same server, you can -use relative paths:

- -

See my [About](/about/) page for details.
-

- -

Reference-style links use a second set of square brackets, inside -which you place a label of your choosing to identify the link:

- -

This is [an example][id] reference-style link.
-

- -

You can optionally use a space to separate the sets of brackets:

- -

This is [an example] [id] reference-style link.
-

- -

Then, anywhere in the document, you define your link label like this, -on a line by itself:

- -

[id]: http://example.com/  "Optional Title Here"
-

- -

That is:

- -

Square brackets containing the link identifier (optionally -indented from the left margin using up to three spaces);
followed by a colon;
followed by one or more spaces (or tabs);
followed by the URL for the link;
optionally followed by a title attribute for the link, enclosed -in double or single quotes.

- -

The link URL may, optionally, be surrounded by angle brackets:

- -

[id]: <http://example.com/>  "Optional Title Here"
-

- -

You can put the title attribute on the next line and use extra spaces -or tabs for padding, which tends to look better with longer URLs:

- -

[id]: http://example.com/longish/path/to/resource/here
-    "Optional Title Here"
-

- -

Link definitions are only used for creating links during Markdown -processing, and are stripped from your document in the HTML output.

- -

Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are not case sensitive. E.g. these two links:

- -

[link text][a]
-[link text][A]
-

- -

are equivalent.

- -

The implicit link name shortcut allows you to omit the name of the -link, in which case the link text itself is used as the name. -Just use an empty set of square brackets -- e.g., to link the word -"Google" to the google.com web site, you could simply write:

- -

[Google][]
-

- -

And then define the link:

- -

[Google]: http://google.com/
-

- -

Because link names may contain spaces, this shortcut even works for -multiple words in the link text:

- -

Visit [Daring Fireball][] for more information.
-

- -

And then define the link:

- -

[Daring Fireball]: http://daringfireball.net/
-

- -

Link definitions can be placed anywhere in your Markdown document. I -tend to put them immediately after each paragraph in which they're -used, but if you want, you can put them all at the end of your -document, sort of like footnotes.

- -

Here's an example of reference links in action:

- -

I get 10 times more traffic from [Google] [1] than from
-[Yahoo] [2] or [MSN] [3].
-
-  [1]: http://google.com/        "Google"
-  [2]: http://search.yahoo.com/  "Yahoo Search"
-  [3]: http://search.msn.com/    "MSN Search"
-

- -

Using the implicit link name shortcut, you could instead write:

- -

I get 10 times more traffic from [Google][] than from
-[Yahoo][] or [MSN][].
-
-  [google]: http://google.com/        "Google"
-  [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
-  [msn]:    http://search.msn.com/    "MSN Search"
-

- -

Both of the above examples will produce the following HTML output:

- -

<p>I get 10 times more traffic from <a href="http://google.com/"
-title="Google">Google</a> than from
-<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
-or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
-

- -

For comparison, here is the same paragraph written using -Markdown's inline link style:

- -

I get 10 times more traffic from [Google](http://google.com/ "Google")
-than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
-[MSN](http://search.msn.com/ "MSN Search").
-

- -

The point of reference-style links is not that they're easier to -write. The point is that with reference-style links, your document -source is vastly more readable. Compare the above examples: using -reference-style links, the paragraph itself is only 81 characters -long; with inline-style links, it's 176 characters; and as raw HTML, -it's 234 characters. In the raw HTML, there's more markup than there -is text.

- -

With Markdown's reference-style links, a source document much more -closely resembles the final output, as rendered in a browser. By -allowing you to move the markup-related metadata out of the paragraph, -you can add links without interrupting the narrative flow of your -prose.

- -

Emphasis

- -

Markdown treats asterisks (*) and underscores (_) as indicators of -emphasis. Text wrapped with one * or _ will be wrapped with an -HTML <em> tag; double *'s or _'s will be wrapped with an HTML -<strong> tag. E.g., this input:

- -

*single asterisks*
-
-_single underscores_
-
-**double asterisks**
-
-__double underscores__
-

- -

will produce:

- -

<em>single asterisks</em>
-
-<em>single underscores</em>
-
-<strong>double asterisks</strong>
-
-<strong>double underscores</strong>
-

- -

You can use whichever style you prefer; the lone restriction is that -the same character must be used to open and close an emphasis span.

- -

Emphasis can be used in the middle of a word:

- -

un*fucking*believable
-

- -

But if you surround an * or _ with spaces, it'll be treated as a -literal asterisk or underscore.

- -

To produce a literal asterisk or underscore at a position where it -would otherwise be used as an emphasis delimiter, you can backslash -escape it:

- -

\*this text is surrounded by literal asterisks\*
-

- -

Code

- -

To indicate a span of code, wrap it with backtick quotes (`). -Unlike a pre-formatted code block, a code span indicates code within a -normal paragraph. For example:

- -

Use the `printf()` function.
-

- -

will produce:

- -

<p>Use the <code>printf()</code> function.</p>
-

- -

To include a literal backtick character within a code span, you can use -multiple backticks as the opening and closing delimiters:

- -

``There is a literal backtick (`) here.``
-

- -

which will produce this:

- -

<p><code>There is a literal backtick (`) here.</code></p>
-

- -

The backtick delimiters surrounding a code span may include spaces -- -one after the opening, one before the closing. This allows you to place -literal backtick characters at the beginning or end of a code span:

- -

A single backtick in a code span: `` ` ``
-
-A backtick-delimited string in a code span: `` `foo` ``
-

- -

will produce:

- -

<p>A single backtick in a code span: <code>`</code></p>
-
-<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
-

- -

With a code span, ampersands and angle brackets are encoded as HTML -entities automatically, which makes it easy to include example HTML -tags. Markdown will turn this:

- -

Please don't use any `<blink>` tags.
-

- -

into:

- -

<p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
-

- -

You can write this:

- -

`&#8212;` is the decimal-encoded equivalent of `&mdash;`.
-

- -

to produce:

- -

<p><code>&amp;#8212;</code> is the decimal-encoded
-equivalent of <code>&amp;mdash;</code>.</p>
-

- -

Images

- -

Admittedly, it's fairly difficult to devise a "natural" syntax for -placing images into a plain text document format.

- -

Markdown uses an image syntax that is intended to resemble the syntax -for links, allowing for two styles: inline and reference.

- -

Inline image syntax looks like this:

- -

![Alt text](/path/to/img.jpg)
-
-![Alt text](/path/to/img.jpg "Optional title")
-

- -

That is:

- -

An exclamation mark: !;
followed by a set of square brackets, containing the alt -attribute text for the image;
followed by a set of parentheses, containing the URL or path to -the image, and an optional title attribute enclosed in double -or single quotes.

- -

Reference-style image syntax looks like this:

- -

![Alt text][id]
-

- -

Where "id" is the name of a defined image reference. Image references -are defined using syntax identical to link references:

- -

[id]: url/to/image  "Optional title attribute"
-

- -

As of this writing, Markdown has no syntax for specifying the -dimensions of an image; if this is important to you, you can simply -use regular HTML <img> tags.

- -

Miscellaneous

- -

Automatic Links

- -

Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

- -

<http://example.com/>
-

- -

Markdown will turn this into:

- -

<a href="http://example.com/">http://example.com/</a>
-

- -

Automatic links for email addresses work similarly, except that -Markdown will also perform a bit of randomized decimal and hex -entity-encoding to help obscure your address from address-harvesting -spambots. For example, Markdown will turn this:

- -

<address@example.com>
-

- -

into something like this:

- -

<a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
-&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
-&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
-&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
-

- -

which will render in a browser as a clickable link to "address@example.com".

- -

(This sort of entity-encoding trick will indeed fool many, if not -most, address-harvesting bots, but it definitely won't fool all of -them. It's better than nothing, but an address published in this way -will probably eventually start receiving spam.)

- -

Backslash Escapes

- -

Markdown allows you to use backslash escapes to generate literal -characters which would otherwise have special meaning in Markdown's -formatting syntax. For example, if you wanted to surround a word with -literal asterisks (instead of an HTML <em> tag), you can backslashes -before the asterisks, like this:

- -

\*literal asterisks\*
-

- -

Markdown provides backslash escapes for the following characters:

- -

\   backslash
-`   backtick
-*   asterisk
-_   underscore
-{}  curly braces
-[]  square brackets
-()  parentheses
-#   hash mark
-+   plus sign
--   minus sign (hyphen)
-.   dot
-!   exclamation mark
-

diff --git a/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.text b/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.text deleted file mode 100644 index 57360a16c..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.text +++ /dev/null @@ -1,888 +0,0 @@ -Markdown: Syntax -================ - -

- - -* [Overview](#overview) - * [Philosophy](#philosophy) - * [Inline HTML](#html) - * [Automatic Escaping for Special Characters](#autoescape) -* [Block Elements](#block) - * [Paragraphs and Line Breaks](#p) - * [Headers](#header) - * [Blockquotes](#blockquote) - * [Lists](#list) - * [Code Blocks](#precode) - * [Horizontal Rules](#hr) -* [Span Elements](#span) - * [Links](#link) - * [Emphasis](#em) - * [Code](#code) - * [Images](#img) -* [Miscellaneous](#misc) - * [Backslash Escapes](#backslash) - * [Automatic Links](#autolink) - - -**Note:** This document is itself written using Markdown; you -can [see the source for it by adding '.text' to the URL][src]. - - [src]: /projects/markdown/syntax.text - -* * * - -

Overview

- -

Philosophy

- -Markdown is intended to be as easy-to-read and easy-to-write as is feasible. - -Readability, however, is emphasized above all else. A Markdown-formatted -document should be publishable as-is, as plain text, without looking -like it's been marked up with tags or formatting instructions. While -Markdown's syntax has been influenced by several existing text-to-HTML -filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4], -[Grutatext] [5], and [EtText] [6] -- the single biggest source of -inspiration for Markdown's syntax is the format of plain text email. - - [1]: http://docutils.sourceforge.net/mirror/setext.html - [2]: http://www.aaronsw.com/2002/atx/ - [3]: http://textism.com/tools/textile/ - [4]: http://docutils.sourceforge.net/rst.html - [5]: http://www.triptico.com/software/grutatxt.html - [6]: http://ettext.taint.org/doc/ - -To this end, Markdown's syntax is comprised entirely of punctuation -characters, which punctuation characters have been carefully chosen so -as to look like what they mean. E.g., asterisks around a word actually -look like \*emphasis\*. Markdown lists look like, well, lists. Even -blockquotes look like quoted passages of text, assuming you've ever -used email. - - - -

Inline HTML

- -Markdown's syntax is intended for one purpose: to be used as a -format for *writing* for the web. - -Markdown is not a replacement for HTML, or even close to it. Its -syntax is very small, corresponding only to a very small subset of -HTML tags. The idea is *not* to create a syntax that makes it easier -to insert HTML tags. In my opinion, HTML tags are already easy to -insert. The idea for Markdown is to make it easy to read, write, and -edit prose. HTML is a *publishing* format; Markdown is a *writing* -format. Thus, Markdown's formatting syntax only addresses issues that -can be conveyed in plain text. - -For any markup that is not covered by Markdown's syntax, you simply -use HTML itself. There's no need to preface it or delimit it to -indicate that you're switching from Markdown to HTML; you just use -the tags. - -The only restrictions are that block-level HTML elements -- e.g. `

`, -``, `

`, `

`, etc. -- must be separated from surrounding -content by blank lines, and the start and end tags of the block should -not be indented with tabs or spaces. Markdown is smart enough not -to add extra (unwanted) `

` tags around HTML block-level tags. - -For example, to add an HTML table to a Markdown article: - - This is a regular paragraph. - -

- - - -

Foo

- - This is another regular paragraph. - -Note that Markdown formatting syntax is not processed within block-level -HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an -HTML block. - -Span-level HTML tags -- e.g. ``, ``, or `` -- can be -used anywhere in a Markdown paragraph, list item, or header. If you -want, you can even use HTML tags instead of Markdown formatting; e.g. if -you'd prefer to use HTML `` or `` tags instead of Markdown's -link or image syntax, go right ahead. - -Unlike block-level HTML tags, Markdown syntax *is* processed within -span-level tags. - - -
Automatic Escaping for Special Characters
- -In HTML, there are two characters that demand special treatment: `<` -and `&`. Left angle brackets are used to start tags; ampersands are -used to denote HTML entities. If you want to use them as literal -characters, you must escape them as entities, e.g. `<`, and -`&`. - -Ampersands in particular are bedeviling for web writers. If you want to -write about 'AT&T', you need to write '`AT&T`'. You even need to -escape ampersands within URLs. Thus, if you want to link to: - - http://images.google.com/images?num=30&q=larry+bird - -you need to encode the URL as: - - http://images.google.com/images?num=30&q=larry+bird - -in your anchor tag `href` attribute. Needless to say, this is easy to -forget, and is probably the single most common source of HTML validation -errors in otherwise well-marked-up web sites. - -Markdown allows you to use these characters naturally, taking care of -all the necessary escaping for you. If you use an ampersand as part of -an HTML entity, it remains unchanged; otherwise it will be translated -into `&`. - -So, if you want to include a copyright symbol in your article, you can write: - - © - -and Markdown will leave it alone. But if you write: - - AT&T - -Markdown will translate it to: - - AT&T - -Similarly, because Markdown supports [inline HTML](#html), if you use -angle brackets as delimiters for HTML tags, Markdown will treat them as -such. But if you write: - - 4 < 5 - -Markdown will translate it to: - - 4 < 5 - -However, inside Markdown code spans and blocks, angle brackets and -ampersands are *always* encoded automatically. This makes it easy to use -Markdown to write about HTML code. (As opposed to raw HTML, which is a -terrible format for writing about HTML syntax, because every single `<` -and `&` in your example code needs to be escaped.) - - -* * * - - -
Block Elements
- - -
Paragraphs and Line Breaks
- -A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs. - -The implication of the "one or more consecutive lines of text" rule is -that Markdown supports "hard-wrapped" text paragraphs. This differs -significantly from most other text-to-HTML formatters (including Movable -Type's "Convert Line Breaks" option) which translate every line break -character in a paragraph into a `
` tag. - -When you *do* want to insert a `
` break tag using Markdown, you -end a line with two or more spaces, then type return. - -Yes, this takes a tad more effort to create a `
`, but a simplistic -"every line break is a `
`" rule wouldn't work for Markdown. -Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l] -work best -- and look better -- when you format them with hard breaks. - - [bq]: #blockquote - [l]: #list - - - -
Headers
- -Markdown supports two styles of headers, [Setext] [1] and [atx] [2]. - -Setext-style headers are "underlined" using equal signs (for first-level -headers) and dashes (for second-level headers). For example: - - This is an H1 - ============= - - This is an H2 - ------------- - -Any number of underlining `=`'s or `-`'s will work. - -Atx-style headers use 1-6 hash characters at the start of the line, -corresponding to header levels 1-6. For example: - - # This is an H1 - - ## This is an H2 - - ###### This is an H6 - -Optionally, you may "close" atx-style headers. This is purely -cosmetic -- you can use this if you think it looks better. The -closing hashes don't even need to match the number of hashes -used to open the header. (The number of opening hashes -determines the header level.) : - - # This is an H1 # - - ## This is an H2 ## - - ### This is an H3 ###### - - -
Blockquotes
- -Markdown uses email-style `>` characters for blockquoting. If you're -familiar with quoting passages of text in an email message, then you -know how to create a blockquote in Markdown. It looks best if you hard -wrap the text and put a `>` before every line: - - > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, - > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. - > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - > - > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse - > id sem consectetuer libero luctus adipiscing. - -Markdown allows you to be lazy and only put the `>` before the first -line of a hard-wrapped paragraph: - - > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, - consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. - Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - - > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse - id sem consectetuer libero luctus adipiscing. - -Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by -adding additional levels of `>`: - - > This is the first level of quoting. - > - > > This is nested blockquote. - > - > Back to the first level. - -Blockquotes can contain other Markdown elements, including headers, lists, -and code blocks: - - > ## This is a header. - > - > 1. This is the first list item. - > 2. This is the second list item. - > - > Here's some example code: - > - > return shell_exec("echo $input | $markdown_script"); - -Any decent text editor should make email-style quoting easy. For -example, with BBEdit, you can make a selection and choose Increase -Quote Level from the Text menu. - - -
Lists
- -Markdown supports ordered (numbered) and unordered (bulleted) lists. - -Unordered lists use asterisks, pluses, and hyphens -- interchangably --- as list markers: - - * Red - * Green - * Blue - -is equivalent to: - - + Red - + Green - + Blue - -and: - - - Red - - Green - - Blue - -Ordered lists use numbers followed by periods: - - 1. Bird - 2. McHale - 3. Parish - -It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is: - -
-
Bird
-
McHale
-
Parish
-
- -If you instead wrote the list in Markdown like this: - - 1. Bird - 1. McHale - 1. Parish - -or even: - - 3. Bird - 1. McHale - 8. Parish - -you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to. - -If you do use lazy list numbering, however, you should still start the -list with the number 1. At some point in the future, Markdown may support -starting ordered lists at an arbitrary number. - -List markers typically start at the left margin, but may be indented by -up to three spaces. List markers must be followed by one or more spaces -or a tab. - -To make lists look nice, you can wrap items with hanging indents: - - * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. - Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, - viverra nec, fringilla in, laoreet vitae, risus. - * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. - Suspendisse id sem consectetuer libero luctus adipiscing. - -But if you want to be lazy, you don't have to: - - * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. - Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, - viverra nec, fringilla in, laoreet vitae, risus. - * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. - Suspendisse id sem consectetuer libero luctus adipiscing. - -If list items are separated by blank lines, Markdown will wrap the -items in `
` tags in the HTML output. For example, this input: - - * Bird - * Magic - -will turn into: - -
-
Bird
-
Magic
-
- -But this: - - * Bird - - * Magic - -will turn into: - -
-
Bird
-
Magic
-
- -List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces -or one tab: - - 1. This is a list item with two paragraphs. Lorem ipsum dolor - sit amet, consectetuer adipiscing elit. Aliquam hendrerit - mi posuere lectus. - - Vestibulum enim wisi, viverra nec, fringilla in, laoreet - vitae, risus. Donec sit amet nisl. Aliquam semper ipsum - sit amet velit. - - 2. Suspendisse id sem consectetuer libero luctus adipiscing. - -It looks nice if you indent every line of the subsequent -paragraphs, but here again, Markdown will allow you to be -lazy: - - * This is a list item with two paragraphs. - - This is the second paragraph in the list item. You're - only required to indent the first line. Lorem ipsum dolor - sit amet, consectetuer adipiscing elit. - - * Another item in the same list. - -To put a blockquote within a list item, the blockquote's `>` -delimiters need to be indented: - - * A list item with a blockquote: - - > This is a blockquote - > inside a list item. - -To put a code block within a list item, the code block needs -to be indented *twice* -- 8 spaces or two tabs: - - * A list item with a code block: - - - - -It's worth noting that it's possible to trigger an ordered list by -accident, by writing something like this: - - 1986. What a great season. - -In other words, a *number-period-space* sequence at the beginning of a -line. To avoid this, you can backslash-escape the period: - - 1986\. What a great season. - - - -Code Blocks - -Pre-formatted code blocks are used for writing about programming or -markup source code. Rather than forming normal paragraphs, the lines -of a code block are interpreted literally. Markdown wraps a code block -in both `` and `` tags. - -To produce a code block in Markdown, simply indent every line of the -block by at least 4 spaces or 1 tab. For example, given this input: - - This is a normal paragraph: - - This is a code block. - -Markdown will generate: - - This is a normal paragraph: - - This is a code block. - - -One level of indentation -- 4 spaces or 1 tab -- is removed from each -line of the code block. For example, this: - - Here is an example of AppleScript: - - tell application "Foo" - beep - end tell - -will turn into: - - Here is an example of AppleScript: - - tell application "Foo" - beep - end tell - - -A code block continues until it reaches a line that is not indented -(or the end of the article). - -Within a code block, ampersands (`&`) and angle brackets (`<` and `>`) -are automatically converted into HTML entities. This makes it very -easy to include example HTML source code using Markdown -- just paste -it and indent it, and Markdown will handle the hassle of encoding the -ampersands and angle brackets. For example, this: - - - © 2004 Foo Corporation - - -will turn into: - - <div class="footer"> - © 2004 Foo Corporation - </div> - - -Regular Markdown syntax is not processed within code blocks. E.g., -asterisks are just literal asterisks within a code block. This means -it's also easy to use Markdown to write about Markdown's own syntax. - - - -Horizontal Rules - -You can produce a horizontal rule tag (``) by placing three or -more hyphens, asterisks, or underscores on a line by themselves. If you -wish, you may use spaces between the hyphens or asterisks. Each of the -following lines will produce a horizontal rule: - - * * * - - *** - - ***** - - - - - - - --------------------------------------- - - _ _ _ - - -* * * - -Span Elements - -Links - -Markdown supports two style of links: *inline* and *reference*. - -In both styles, the link text is delimited by [square brackets]. - -To create an inline link, use a set of regular parentheses immediately -after the link text's closing square bracket. Inside the parentheses, -put the URL where you want the link to point, along with an *optional* -title for the link, surrounded in quotes. For example: - - This is [an example](http://example.com/ "Title") inline link. - - [This link](http://example.net/) has no title attribute. - -Will produce: - - This is - an example inline link. - - This link has no - title attribute. - -If you're referring to a local resource on the same server, you can -use relative paths: - - See my [About](/about/) page for details. - -Reference-style links use a second set of square brackets, inside -which you place a label of your choosing to identify the link: - - This is [an example][id] reference-style link. - -You can optionally use a space to separate the sets of brackets: - - This is [an example] [id] reference-style link. - -Then, anywhere in the document, you define your link label like this, -on a line by itself: - - [id]: http://example.com/ "Optional Title Here" - -That is: - -* Square brackets containing the link identifier (optionally - indented from the left margin using up to three spaces); -* followed by a colon; -* followed by one or more spaces (or tabs); -* followed by the URL for the link; -* optionally followed by a title attribute for the link, enclosed - in double or single quotes. - -The link URL may, optionally, be surrounded by angle brackets: - - [id]: "Optional Title Here" - -You can put the title attribute on the next line and use extra spaces -or tabs for padding, which tends to look better with longer URLs: - - [id]: http://example.com/longish/path/to/resource/here - "Optional Title Here" - -Link definitions are only used for creating links during Markdown -processing, and are stripped from your document in the HTML output. - -Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links: - - [link text][a] - [link text][A] - -are equivalent. - -The *implicit link name* shortcut allows you to omit the name of the -link, in which case the link text itself is used as the name. -Just use an empty set of square brackets -- e.g., to link the word -"Google" to the google.com web site, you could simply write: - - [Google][] - -And then define the link: - - [Google]: http://google.com/ - -Because link names may contain spaces, this shortcut even works for -multiple words in the link text: - - Visit [Daring Fireball][] for more information. - -And then define the link: - - [Daring Fireball]: http://daringfireball.net/ - -Link definitions can be placed anywhere in your Markdown document. I -tend to put them immediately after each paragraph in which they're -used, but if you want, you can put them all at the end of your -document, sort of like footnotes. - -Here's an example of reference links in action: - - I get 10 times more traffic from [Google] [1] than from - [Yahoo] [2] or [MSN] [3]. - - [1]: http://google.com/ "Google" - [2]: http://search.yahoo.com/ "Yahoo Search" - [3]: http://search.msn.com/ "MSN Search" - -Using the implicit link name shortcut, you could instead write: - - I get 10 times more traffic from [Google][] than from - [Yahoo][] or [MSN][]. - - [google]: http://google.com/ "Google" - [yahoo]: http://search.yahoo.com/ "Yahoo Search" - [msn]: http://search.msn.com/ "MSN Search" - -Both of the above examples will produce the following HTML output: - - I get 10 times more traffic from Google than from - Yahoo - or MSN. - -For comparison, here is the same paragraph written using -Markdown's inline link style: - - I get 10 times more traffic from [Google](http://google.com/ "Google") - than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or - [MSN](http://search.msn.com/ "MSN Search"). - -The point of reference-style links is not that they're easier to -write. The point is that with reference-style links, your document -source is vastly more readable. Compare the above examples: using -reference-style links, the paragraph itself is only 81 characters -long; with inline-style links, it's 176 characters; and as raw HTML, -it's 234 characters. In the raw HTML, there's more markup than there -is text. - -With Markdown's reference-style links, a source document much more -closely resembles the final output, as rendered in a browser. By -allowing you to move the markup-related metadata out of the paragraph, -you can add links without interrupting the narrative flow of your -prose. - - -Emphasis - -Markdown treats asterisks (`*`) and underscores (`_`) as indicators of -emphasis. Text wrapped with one `*` or `_` will be wrapped with an -HTML `` tag; double `*`'s or `_`'s will be wrapped with an HTML -`` tag. E.g., this input: - - *single asterisks* - - _single underscores_ - - **double asterisks** - - __double underscores__ - -will produce: - - single asterisks - - single underscores - - double asterisks - - double underscores - -You can use whichever style you prefer; the lone restriction is that -the same character must be used to open and close an emphasis span. - -Emphasis can be used in the middle of a word: - - un*fucking*believable - -But if you surround an `*` or `_` with spaces, it'll be treated as a -literal asterisk or underscore. - -To produce a literal asterisk or underscore at a position where it -would otherwise be used as an emphasis delimiter, you can backslash -escape it: - - \*this text is surrounded by literal asterisks\* - - - -Code - -To indicate a span of code, wrap it with backtick quotes (`` ` ``). -Unlike a pre-formatted code block, a code span indicates code within a -normal paragraph. For example: - - Use the `printf()` function. - -will produce: - - Use the printf() function. - -To include a literal backtick character within a code span, you can use -multiple backticks as the opening and closing delimiters: - - ``There is a literal backtick (`) here.`` - -which will produce this: - - There is a literal backtick (`) here. - -The backtick delimiters surrounding a code span may include spaces -- -one after the opening, one before the closing. This allows you to place -literal backtick characters at the beginning or end of a code span: - - A single backtick in a code span: `` ` `` - - A backtick-delimited string in a code span: `` `foo` `` - -will produce: - - A single backtick in a code span: ` - - A backtick-delimited string in a code span: `foo` - -With a code span, ampersands and angle brackets are encoded as HTML -entities automatically, which makes it easy to include example HTML -tags. Markdown will turn this: - - Please don't use any `` tags. - -into: - - Please don't use any <blink> tags. - -You can write this: - - `—` is the decimal-encoded equivalent of `—`. - -to produce: - - — is the decimal-encoded - equivalent of —. - - - -Images - -Admittedly, it's fairly difficult to devise a "natural" syntax for -placing images into a plain text document format. - -Markdown uses an image syntax that is intended to resemble the syntax -for links, allowing for two styles: *inline* and *reference*. - -Inline image syntax looks like this: - - ![Alt text](/path/to/img.jpg) - - ![Alt text](/path/to/img.jpg "Optional title") - -That is: - -* An exclamation mark: `!`; -* followed by a set of square brackets, containing the `alt` - attribute text for the image; -* followed by a set of parentheses, containing the URL or path to - the image, and an optional `title` attribute enclosed in double - or single quotes. - -Reference-style image syntax looks like this: - - ![Alt text][id] - -Where "id" is the name of a defined image reference. Image references -are defined using syntax identical to link references: - - [id]: url/to/image "Optional title attribute" - -As of this writing, Markdown has no syntax for specifying the -dimensions of an image; if this is important to you, you can simply -use regular HTML `` tags. - - -* * * - - -Miscellaneous - -Automatic Links - -Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: - - - -Markdown will turn this into: - - http://example.com/ - -Automatic links for email addresses work similarly, except that -Markdown will also perform a bit of randomized decimal and hex -entity-encoding to help obscure your address from address-harvesting -spambots. For example, Markdown will turn this: - - - -into something like this: - - address@exa - mple.com - -which will render in a browser as a clickable link to "address@example.com". - -(This sort of entity-encoding trick will indeed fool many, if not -most, address-harvesting bots, but it definitely won't fool all of -them. It's better than nothing, but an address published in this way -will probably eventually start receiving spam.) - - - -Backslash Escapes - -Markdown allows you to use backslash escapes to generate literal -characters which would otherwise have special meaning in Markdown's -formatting syntax. For example, if you wanted to surround a word with -literal asterisks (instead of an HTML `` tag), you can backslashes -before the asterisks, like this: - - \*literal asterisks\* - -Markdown provides backslash escapes for the following characters: - - \ backslash - ` backtick - * asterisk - _ underscore - {} curly braces - [] square brackets - () parentheses - # hash mark - + plus sign - - minus sign (hyphen) - . dot - ! exclamation mark - diff --git a/tests/MarkdownTest_1.0.3/Tests/Nested blockquotes.html b/tests/MarkdownTest_1.0.3/Tests/Nested blockquotes.html deleted file mode 100644 index d8ec7f8e0..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Nested blockquotes.html +++ /dev/null @@ -1,9 +0,0 @@ - - foo - - - bar - - - foo - diff --git a/tests/MarkdownTest_1.0.3/Tests/Nested blockquotes.text b/tests/MarkdownTest_1.0.3/Tests/Nested blockquotes.text deleted file mode 100644 index ed3c624ff..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Nested blockquotes.text +++ /dev/null @@ -1,5 +0,0 @@ -> foo -> -> > bar -> -> foo diff --git a/tests/MarkdownTest_1.0.3/Tests/Ordered and unordered lists.html b/tests/MarkdownTest_1.0.3/Tests/Ordered and unordered lists.html deleted file mode 100644 index ba71eab39..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Ordered and unordered lists.html +++ /dev/null @@ -1,148 +0,0 @@ -Unordered - -Asterisks tight: - - -asterisk 1 -asterisk 2 -asterisk 3 - - -Asterisks loose: - - -asterisk 1 -asterisk 2 -asterisk 3 - - - - -Pluses tight: - - -Plus 1 -Plus 2 -Plus 3 - - -Pluses loose: - - -Plus 1 -Plus 2 -Plus 3 - - - - -Minuses tight: - - -Minus 1 -Minus 2 -Minus 3 - - -Minuses loose: - - -Minus 1 -Minus 2 -Minus 3 - - -Ordered - -Tight: - - -First -Second -Third - - -and: - - -One -Two -Three - - -Loose using tabs: - - -First -Second -Third - - -and using spaces: - - -One -Two -Three - - -Multiple paragraphs: - - -Item 1, graf one. - -Item 2. graf two. The quick brown fox jumped over the lazy dog's -back. -Item 2. -Item 3. - - -Nested - - -Tab - -Tab - -Tab - - - - -Here's another: - - -First -Second: - -Fee -Fie -Foe - -Third - - -Same thing but with paragraphs: - - -First -Second: - - -Fee -Fie -Foe - -Third - - - -This was an error in Markdown 1.0.1: - - -this - -sub - -that - diff --git a/tests/MarkdownTest_1.0.3/Tests/Ordered and unordered lists.text b/tests/MarkdownTest_1.0.3/Tests/Ordered and unordered lists.text deleted file mode 100644 index 7f3b49777..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Ordered and unordered lists.text +++ /dev/null @@ -1,131 +0,0 @@ -## Unordered - -Asterisks tight: - -* asterisk 1 -* asterisk 2 -* asterisk 3 - - -Asterisks loose: - -* asterisk 1 - -* asterisk 2 - -* asterisk 3 - -* * * - -Pluses tight: - -+ Plus 1 -+ Plus 2 -+ Plus 3 - - -Pluses loose: - -+ Plus 1 - -+ Plus 2 - -+ Plus 3 - -* * * - - -Minuses tight: - -- Minus 1 -- Minus 2 -- Minus 3 - - -Minuses loose: - -- Minus 1 - -- Minus 2 - -- Minus 3 - - -## Ordered - -Tight: - -1. First -2. Second -3. Third - -and: - -1. One -2. Two -3. Three - - -Loose using tabs: - -1. First - -2. Second - -3. Third - -and using spaces: - -1. One - -2. Two - -3. Three - -Multiple paragraphs: - -1. Item 1, graf one. - - Item 2. graf two. The quick brown fox jumped over the lazy dog's - back. - -2. Item 2. - -3. Item 3. - - - -## Nested - -* Tab - * Tab - * Tab - -Here's another: - -1. First -2. Second: - * Fee - * Fie - * Foe -3. Third - -Same thing but with paragraphs: - -1. First - -2. Second: - * Fee - * Fie - * Foe - -3. Third - - -This was an error in Markdown 1.0.1: - -* this - - * sub - - that diff --git a/tests/MarkdownTest_1.0.3/Tests/Strong and em together.html b/tests/MarkdownTest_1.0.3/Tests/Strong and em together.html deleted file mode 100644 index 71ec78c70..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Strong and em together.html +++ /dev/null @@ -1,7 +0,0 @@ -This is strong and em. - -So is this word. - -This is strong and em. - -So is this word. diff --git a/tests/MarkdownTest_1.0.3/Tests/Strong and em together.text b/tests/MarkdownTest_1.0.3/Tests/Strong and em together.text deleted file mode 100644 index 95ee690db..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Strong and em together.text +++ /dev/null @@ -1,7 +0,0 @@ -***This is strong and em.*** - -So is ***this*** word. - -___This is strong and em.___ - -So is ___this___ word. diff --git a/tests/MarkdownTest_1.0.3/Tests/Tabs.html b/tests/MarkdownTest_1.0.3/Tests/Tabs.html deleted file mode 100644 index 3301ba803..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Tabs.html +++ /dev/null @@ -1,25 +0,0 @@ - -this is a list item -indented with tabs -this is a list item -indented with spaces - - -Code: - -this code block is indented by one tab - - -And: - - this code block is indented by two tabs - - -And: - -+ this is an example list item - indented with tabs - -+ this is an example list item - indented with spaces - diff --git a/tests/MarkdownTest_1.0.3/Tests/Tabs.text b/tests/MarkdownTest_1.0.3/Tests/Tabs.text deleted file mode 100644 index 589d1136e..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Tabs.text +++ /dev/null @@ -1,21 +0,0 @@ -+ this is a list item - indented with tabs - -+ this is a list item - indented with spaces - -Code: - - this code block is indented by one tab - -And: - - this code block is indented by two tabs - -And: - - + this is an example list item - indented with tabs - - + this is an example list item - indented with spaces diff --git a/tests/MarkdownTest_1.0.3/Tests/Tidyness.html b/tests/MarkdownTest_1.0.3/Tests/Tidyness.html deleted file mode 100644 index f2a8ce70f..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Tidyness.html +++ /dev/null @@ -1,8 +0,0 @@ - -A list within a blockquote: - -asterisk 1 -asterisk 2 -asterisk 3 - - diff --git a/tests/MarkdownTest_1.0.3/Tests/Tidyness.text b/tests/MarkdownTest_1.0.3/Tests/Tidyness.text deleted file mode 100644 index 5f18b8da2..000000000 --- a/tests/MarkdownTest_1.0.3/Tests/Tidyness.text +++ /dev/null @@ -1,5 +0,0 @@ -> A list within a blockquote: -> -> * asterisk 1 -> * asterisk 2 -> * asterisk 3 -- cgit v1.2.3 From f3ee82373b4ad8e955db12d3c2c2159a2bea53a0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 1 Jan 2014 09:22:37 -0800 Subject: HTML reader: Parse name/content pairs from meta tags as metadata. Closes #1106. --- src/Text/Pandoc/Readers/HTML.hs | 11 ++++++++++- tests/html-reader.native | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4b44a3a21..506fe7770 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -76,9 +76,18 @@ pBody :: TagParser [Block] pBody = pInTags "body" block pHead :: TagParser [Block] -pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) + pMetaTag = do + mt <- pSatisfy (~== TagOpen "meta" []) + let name = fromAttrib "name" mt + if null name + then return [] + else do + let content = fromAttrib "content" mt + updateState $ B.setMeta name (B.text content) + return [] block :: TagParser [Block] block = choice diff --git a/tests/html-reader.native b/tests/html-reader.native index 794512426..e80905729 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Str ".",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] -- cgit v1.2.3 From 3cad665afb4e8838d3fb26cc445b548197f01563 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Jan 2014 23:35:30 -0800 Subject: Updated tests for latest texmath. --- tests/markdown-reader-more.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index ca588571f..27f09dada 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -40,7 +40,7 @@ ,OrderedList (3,Example,TwoParens) [[Plain [Str "Third",Space,Str "example."]]] ,Header 2 ("macros",[],[]) [Str "Macros"] -,Para [Math InlineMath "\\langle x,y \\rangle"] +,Para [Math InlineMath "{\\langle x,y \\rangle}"] ,Header 2 ("case-insensitive-references",[],[]) [Str "Case-insensitive",Space,Str "references"] ,Para [Link [Str "Fum"] ("/fum","")] ,Para [Link [Str "FUM"] ("/fum","")] -- cgit v1.2.3 From 9f3b2f6f5d06a4cf3142ffc74c8de4c1cc2bd928 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 22 Jan 2014 22:07:13 -0800 Subject: Fixed mediawiki ordered list parsing. Closes #1122. --- src/Text/Pandoc/Readers/MediaWiki.hs | 25 +++++++++++++++---------- tests/mediawiki-reader.wiki | 4 ++-- 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 8d8ea0199..794890eb6 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -149,9 +149,16 @@ inlinesInTags tag = try $ do blocksInTags :: String -> MWParser Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) + let closer = if tag == "li" + then htmlTag (~== TagClose "li") + <|> lookAhead ( + htmlTag (~== TagOpen "li" []) + <|> htmlTag (~== TagClose "ol") + <|> htmlTag (~== TagClose "ul")) + else htmlTag (~== TagClose tag) if '/' `elem` raw -- self-closing tag then return mempty - else mconcat <$> manyTill block (htmlTag (~== TagClose tag)) + else mconcat <$> manyTill block closer charsInTags :: String -> MWParser [Char] charsInTags tag = try $ do @@ -381,15 +388,13 @@ bulletList = B.bulletList <$> orderedList :: MWParser Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) - <|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *> - many (listItem '#' <|> li) <* - optional (htmlTag (~== TagClose "ul")))) - <|> do (tag,_) <- htmlTag (~== TagOpen "ol" []) - spaces - items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) - let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag - return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items + <|> try + (do (tag,_) <- htmlTag (~== TagOpen "ol" []) + spaces + items <- many (listItem '#' <|> li) + optional (htmlTag (~== TagClose "ol")) + let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag + return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) definitionList :: MWParser Blocks definitionList = B.definitionList <$> many1 defListItem diff --git a/tests/mediawiki-reader.wiki b/tests/mediawiki-reader.wiki index 26f4ef164..c0c22bec6 100644 --- a/tests/mediawiki-reader.wiki +++ b/tests/mediawiki-reader.wiki @@ -232,11 +232,11 @@ ends the list. list item A2 - + #abc #def #ghi - + Amsterdam -- cgit v1.2.3 From fe246ce01c4c523b7391d58d910af09bf3bac6e6 Mon Sep 17 00:00:00 2001 From: Merijn Verstraaten Date: Sat, 15 Feb 2014 17:51:33 +0100 Subject: Enhanced Pandoc's support for rST roles. rST parser now supports: - All built-in rST roles - New role definition - Role inheritance Issues/TODO: - Silently ignores illegal fields on roles - Silently drops class annotations for roles - Only supports :format: fields with a single format for :raw: roles, requires a change to Text.Pandoc.Definition.Format to support multiple formats. - Allows direct use of :raw: role, rST only allows indirect (i.e., inherited use of :raw:). --- src/Text/Pandoc/Parsing.hs | 2 + src/Text/Pandoc/Readers/RST.hs | 91 +++++++++++++++++++++++++++++++++++++----- tests/rst-reader.native | 10 +++++ tests/rst-reader.rst | 24 +++++++++++ 4 files changed, 117 insertions(+), 10 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 2f21e1253..0713f4a96 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -853,6 +853,7 @@ data ParserState = ParserState stateHasChapters :: Bool, -- ^ True if \chapter encountered stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role + stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles stateWarnings :: [String] -- ^ Warnings generated by the parser } @@ -915,6 +916,7 @@ defaultParserState = stateHasChapters = False, stateMacros = [], stateRstDefaultRole = "title-reference", + stateRstCustomRoles = M.empty, stateWarnings = []} getOption :: (ReaderOptions -> a) -> Parser s ParserState a diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index c12a1493a..a46a3a6c6 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -36,12 +36,13 @@ import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options -import Control.Monad ( when, liftM, guard, mzero ) +import Control.Monad ( when, liftM, guard, mzero, mplus ) import Data.List ( findIndex, intersperse, intercalate, transpose, sort, deleteFirstsBy, isSuffixOf ) +import Data.Maybe (fromMaybe) import qualified Data.Map as M import Text.Printf ( printf ) -import Control.Applicative ((<$>), (<$), (<*), (*>)) +import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>)) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B import Data.Monoid (mconcat, mempty) @@ -530,7 +531,7 @@ directive' = do let body' = body ++ "\n\n" case label of "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) - "role" -> return mempty + "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields "container" -> parseFromString parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseFromString (trimInlines . mconcat <$> many inline) @@ -591,7 +592,38 @@ directive' = do Nothing -> B.image src "" alt _ -> return mempty --- Can contain haracter codes as decimal numbers or +-- TODO: +-- - Silently ignores illegal fields +-- - Silently drops classes +-- - Only supports :format: fields with a single format for :raw: roles, +-- change Text.Pandoc.Definition.Format to fix +addNewRole :: String -> [(String, String)] -> RSTParser Blocks +addNewRole roleString fields = do + (role, parentRole) <- parseFromString inheritedRole roleString + customRoles <- stateRstCustomRoles <$> getState + baseRole <- case M.lookup parentRole customRoles of + Just (base, _, _) -> return base + Nothing -> return parentRole + + let fmt = if baseRole == "raw" then lookup "format" fields else Nothing + annotate = maybe id addLanguage $ + if baseRole == "code" + then lookup "language" fields + else Nothing + + updateState $ \s -> s { + stateRstCustomRoles = + M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles + } + + return $ B.singleton Null + where + addLanguage lang (ident, classes, keyValues) = + (ident, "sourceCode" : lang : classes, keyValues) + inheritedRole = + (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')') + +-- Can contain character codes as decimal numbers or -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u -- or as XML-style hexadecimal character entities, e.g. ᨫ -- or text, which is used as-is. Comments start with .. @@ -930,17 +962,56 @@ strong = B.strong . trimInlines . mconcat <$> -- Note, this doesn't precisely implement the complex rule in -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules -- but it should be good enough for most purposes +-- +-- TODO: +-- - Classes are silently discarded in addNewRole +-- - Lacks sensible implementation for title-reference (which is the default) +-- - Allows direct use of the :raw: role, rST only allows inherited use. interpretedRole :: RSTParser Inlines interpretedRole = try $ do (role, contents) <- roleBefore <|> roleAfter - case role of - "sup" -> return $ B.superscript $ B.str contents - "sub" -> return $ B.subscript $ B.str contents - "math" -> return $ B.math contents - _ -> return $ B.str contents --unknown + renderRole contents Nothing role nullAttr + +renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines +renderRole contents fmt role attr = case role of + "sup" -> return $ B.superscript $ B.str contents + "superscript" -> return $ B.superscript $ B.str contents + "sub" -> return $ B.subscript $ B.str contents + "subscript" -> return $ B.subscript $ B.str contents + "emphasis" -> return $ B.emph $ B.str contents + "strong" -> return $ B.strong $ B.str contents + "rfc-reference" -> return $ rfcLink contents + "RFC" -> return $ rfcLink contents + "pep-reference" -> return $ pepLink contents + "PEP" -> return $ pepLink contents + "literal" -> return $ B.str contents + "math" -> return $ B.math contents + "title-reference" -> titleRef contents + "title" -> titleRef contents + "t" -> titleRef contents + "code" -> return $ B.codeWith attr contents + "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents + custom -> do + customRole <- stateRstCustomRoles <$> getState + case M.lookup custom customRole of + Just (_, newFmt, inherit) -> let + fmtStr = fmt `mplus` newFmt + (newRole, newAttr) = inherit attr + in renderRole contents fmtStr newRole newAttr + Nothing -> return $ B.str contents -- Undefined role + where + titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour + rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) + where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" + pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) + where padNo = replicate (4 - length pepNo) '0' ++ pepNo + pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + +roleNameEndingIn :: RSTParser Char -> RSTParser String +roleNameEndingIn end = many1Till (letter <|> char '-') end roleMarker :: RSTParser String -roleMarker = char ':' *> many1Till (letter <|> char '-') (char ':') +roleMarker = char ':' *> roleNameEndingIn (char ':') roleBefore :: RSTParser (String,String) roleBefore = try $ do diff --git a/tests/rst-reader.native b/tests/rst-reader.native index 497810f39..fd48bc60c 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -319,5 +319,15 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."] ,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."] ,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."] +,Null +,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "inline HTML",Str "."] +,Null +,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["sourceCode","haskell"],[]) "fmap id [1,2..10]",Str "."] +,Null +,Null +,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["sourceCode","python"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."] +,Null +,Null +,Para [Str "Different",Space,Str "indirect",Space,Str "C",Space,Code ("",["sourceCode","c"],[]) "int x = 15;",Str "."] ,Header 2 ("literal-symbols",[],[]) [Str "Literal",Space,Str "symbols"] ,Para [Str "2*2",Space,Str "=",Space,Str "4*1"]] diff --git a/tests/rst-reader.rst b/tests/rst-reader.rst index 748bfe0a5..930bf2ed2 100644 --- a/tests/rst-reader.rst +++ b/tests/rst-reader.rst @@ -599,6 +599,30 @@ Reset default-role to the default default. And now `some-invalid-string-3231231` is nonsense. +.. role:: html(raw) + :format: html + +And now with :html:`inline HTML`. + +.. role:: haskell(code) + :language: haskell + +And some inline haskell :haskell:`fmap id [1,2..10]`. + +.. role:: indirect(code) + +.. role:: python(indirect) + :language: python + +Indirect python role :python:`[x*x for x in [1,2,3,4,5]]`. + +.. role:: different-indirect(code) + :language: c + +.. role:: c(different-indirect) + +Different indirect C :c:`int x = 15;`. + Literal symbols --------------- -- cgit v1.2.3 From b677ce066347db32645a973d859d741309038c4e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 20 Feb 2014 09:43:02 -0800 Subject: Revised tests for new latex template. --- tests/lhs-test.latex | 2 +- tests/lhs-test.latex+lhs | 2 +- tests/writer.latex | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tests') diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex index 51c62f98a..78f072600 100644 --- a/tests/lhs-test.latex +++ b/tests/lhs-test.latex @@ -1,5 +1,4 @@ \documentclass[]{article} -\usepackage[T1]{fontenc} \usepackage{lmodern} \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} @@ -7,6 +6,7 @@ % use upquote if available, for straight quotes in verbatim environments \IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \else % if luatex or xelatex \ifxetex diff --git a/tests/lhs-test.latex+lhs b/tests/lhs-test.latex+lhs index 606d49a12..50a0e15e1 100644 --- a/tests/lhs-test.latex+lhs +++ b/tests/lhs-test.latex+lhs @@ -1,5 +1,4 @@ \documentclass[]{article} -\usepackage[T1]{fontenc} \usepackage{lmodern} \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} @@ -7,6 +6,7 @@ % use upquote if available, for straight quotes in verbatim environments \IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \else % if luatex or xelatex \ifxetex diff --git a/tests/writer.latex b/tests/writer.latex index 9431f43b1..4cb989fba 100644 --- a/tests/writer.latex +++ b/tests/writer.latex @@ -1,5 +1,4 @@ \documentclass[]{article} -\usepackage[T1]{fontenc} \usepackage{lmodern} \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} @@ -7,6 +6,7 @@ % use upquote if available, for straight quotes in verbatim environments \IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \else % if luatex or xelatex \ifxetex -- cgit v1.2.3 From 80511f1b34d082742d78d9745469eb8c63592a9c Mon Sep 17 00:00:00 2001 From: mb21 Date: Sun, 1 Dec 2013 21:11:39 +0100 Subject: InDesign ICML Writer --- pandoc.cabal | 1 + src/Text/Pandoc.hs | 3 + src/Text/Pandoc/Writers/ICML.hs | 525 +++++++ tests/Tests/Old.hs | 2 +- tests/tables.icml | 748 ++++++++++ tests/writer.icml | 3023 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 4301 insertions(+), 1 deletion(-) create mode 100644 src/Text/Pandoc/Writers/ICML.hs create mode 100644 tests/tables.icml create mode 100644 tests/writer.icml (limited to 'tests') diff --git a/pandoc.cabal b/pandoc.cabal index a4b8ac61b..e279a2cc9 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -270,6 +270,7 @@ Library Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.OPML, Text.Pandoc.Writers.HTML, + Text.Pandoc.Writers.ICML, Text.Pandoc.Writers.LaTeX, Text.Pandoc.Writers.ConTeXt, Text.Pandoc.Writers.OpenDocument, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 703bb876a..2c90fd09b 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -85,6 +85,7 @@ module Text.Pandoc , writeTexinfo , writeHtml , writeHtmlString + , writeICML , writeDocbook , writeOPML , writeOpenDocument @@ -133,6 +134,7 @@ import Text.Pandoc.Writers.ODT import Text.Pandoc.Writers.Docx import Text.Pandoc.Writers.EPUB import Text.Pandoc.Writers.FB2 +import Text.Pandoc.Writers.ICML import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OPML import Text.Pandoc.Writers.OpenDocument @@ -226,6 +228,7 @@ writers = [ ,("html" , PureStringWriter writeHtmlString) ,("html5" , PureStringWriter $ \o -> writeHtmlString o{ writerHtml5 = True }) + ,("icml" , PureStringWriter writeICML) ,("s5" , PureStringWriter $ \o -> writeHtmlString o{ writerSlideVariant = S5Slides , writerTableOfContents = False }) diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs new file mode 100644 index 000000000..19d486b25 --- /dev/null +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -0,0 +1,525 @@ +{-# LANGUAGE OverloadedStrings #-} + +{- | + Module : Text.Pandoc.Writers.ICML + Copyright : Copyright (C) 2013 github.com/mb21 + License : GNU GPL, version 2 or above + + Stability : alpha + +Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format +which is a subset of the zipped IDML format for which the documentation is +available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf +InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated +into InDesign with File -> Place. +-} +module Text.Pandoc.Writers.ICML (writeICML) where +import Text.Pandoc.Definition +import Text.Pandoc.XML +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Shared (splitBy) +import Text.Pandoc.Options +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Pretty +import Data.List (isPrefixOf, isInfixOf, stripPrefix) +import Data.Text as Text (breakOnAll, pack) +import Data.Monoid (mappend) +import Control.Monad.State +import qualified Data.Set as Set + +type Style = [String] +type Hyperlink = [(Int, String)] + +data WriterState = WriterState{ + blockStyles :: Set.Set String + , inlineStyles :: Set.Set String + , links :: Hyperlink + , listDepth :: Int + , maxListDepth :: Int + } + +type WS a = State WriterState a + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ + blockStyles = Set.empty + , inlineStyles = Set.empty + , links = [] + , listDepth = 1 + , maxListDepth = 0 + } + +-- inline names (appear in InDesign's character styles pane) +emphName :: String +strongName :: String +strikeoutName :: String +superscriptName :: String +subscriptName :: String +smallCapsName :: String +codeName :: String +linkName :: String +emphName = "Italic" +strongName = "Bold" +strikeoutName = "Strikeout" +superscriptName = "Superscript" +subscriptName = "Subscript" +smallCapsName = "SmallCaps" +codeName = "Code" +linkName = "Link" + +-- block element names (appear in InDesign's paragraph styles pane) +paragraphName :: String +codeBlockName :: String +rawBlockName :: String +blockQuoteName :: String +orderedListName :: String +bulletListName :: String +defListTermName :: String +defListDefName :: String +headerName :: String +tableName :: String +tableHeaderName :: String +tableCaptionName :: String +alignLeftName :: String +alignRightName :: String +alignCenterName :: String +firstListItemName :: String +beginsWithName :: String +lowerRomanName :: String +upperRomanName :: String +lowerAlphaName :: String +upperAlphaName :: String +subListParName :: String +footnoteName :: String +paragraphName = "Paragraph" +codeBlockName = "CodeBlock" +rawBlockName = "Rawblock" +blockQuoteName = "Blockquote" +orderedListName = "NumList" +bulletListName = "BulList" +defListTermName = "DefListTerm" +defListDefName = "DefListDef" +headerName = "Header" +tableName = "TablePar" +tableHeaderName = "TableHeader" +tableCaptionName = "TableCaption" +alignLeftName = "LeftAlign" +alignRightName = "RightAlign" +alignCenterName = "CenterAlign" +firstListItemName = "first" +beginsWithName = "beginsWith-" +lowerRomanName = "lowerRoman" +upperRomanName = "upperRoman" +lowerAlphaName = "lowerAlpha" +upperAlphaName = "upperAlpha" +subListParName = "subParagraph" +footnoteName = "Footnote" + + +-- | Convert Pandoc document to string in ICML format. +writeICML :: WriterOptions -> Pandoc -> String +writeICML opts (Pandoc meta blocks) = + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + render' = render colwidth + renderMeta f s = Just $ render' $ fst $ runState (f opts [] s) defaultWriterState + Just metadata = metaToJSON opts + (renderMeta blocksToICML) + (renderMeta inlinesToICML) + meta + (doc, st) = runState (blocksToICML opts [] blocks) defaultWriterState + main = render' doc + context = defField "body" main + $ defField "charStyles" (render' $ charStylesToDoc st) + $ defField "parStyles" (render' $ parStylesToDoc st) + $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) + $ metadata + in if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +-- | Auxilary functions for parStylesToDoc and charStylesToDoc. +contains :: String -> (String, (String, String)) -> [(String, String)] +contains s rule = + if isInfixOf (fst rule) s + then [snd rule] + else [] + +-- | The monospaced font to use as default. +monospacedFont :: Doc +monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New" + +-- | How much to indent blockquotes etc. +defaultIndent :: Int +defaultIndent = 20 + +-- | How much to indent numbered lists before the number. +defaultListIndent :: Int +defaultListIndent = 10 + +-- other constants +lineSeparator :: String +lineSeparator = " " + +-- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles. +parStylesToDoc :: WriterState -> Doc +parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st + where + makeStyle s = + let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) + attrs = concat $ map (contains s) $ [ + (defListTermName, ("BulletsAndNumberingListType", "BulletList")) + , (defListTermName, ("FontStyle", "Bold")) + , (tableHeaderName, ("FontStyle", "Bold")) + , (alignLeftName, ("Justification", "LeftAlign")) + , (alignRightName, ("Justification", "RightAlign")) + , (alignCenterName, ("Justification", "CenterAlign")) + , (headerName++"1", ("PointSize", "36")) + , (headerName++"2", ("PointSize", "30")) + , (headerName++"3", ("PointSize", "24")) + , (headerName++"4", ("PointSize", "18")) + , (headerName++"5", ("PointSize", "14")) + ] + -- what is the most nested list type, if any? + (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s + where + findList [] = (False, False) + findList (x:xs) | x == bulletListName = (True, False) + | x == orderedListName = (False, True) + | otherwise = findList xs + nBuls = countSubStrs bulletListName s + nOrds = countSubStrs orderedListName s + attrs' = numbering ++ listType ++ indent ++ attrs + where + numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] + | otherwise = [] + listType | isOrderedList && (not $ isInfixOf subListParName s) + = [("BulletsAndNumberingListType", "NumberedList")] + | isBulletList && (not $ isInfixOf subListParName s) + = [("BulletsAndNumberingListType", "BulletList")] + | otherwise = [] + indent = [("LeftIndent", show indt)] + where + nBlockQuotes = countSubStrs blockQuoteName s + nDefLists = countSubStrs defListDefName s + indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) + props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm) + where + font = if isInfixOf codeBlockName s + then monospacedFont + else empty + basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font + tabList = if isBulletList + then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")] + $ vcat [ + inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign" + , inTags False "AlignmentCharacter" [("type","string")] $ text "." + , selfClosingTag "Leader" [("type","string")] + , inTags False "Position" [("type","unit")] $ text + $ show $ defaultListIndent * (nBuls + nOrds) + ] + else empty + makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name) + numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." + | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." + | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." + | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." + | otherwise = empty + in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props + +-- | Convert a WriterState with its inline styles to the ICML listing of Character Styles. +charStylesToDoc :: WriterState -> Doc +charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st + where + makeStyle s = + let attrs = concat $ map (contains s) [ + (strikeoutName, ("StrikeThru", "true")) + , (superscriptName, ("Position", "Superscript")) + , (subscriptName, ("Position", "Subscript")) + , (smallCapsName, ("Capitalization", "SmallCaps")) + ] + attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs + | isInfixOf strongName s = ("FontStyle", "Bold") : attrs + | isInfixOf emphName s = ("FontStyle", "Italic") : attrs + | otherwise = attrs + props = inTags True "Properties" [] $ + inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font + where + font = + if isInfixOf codeName s + then monospacedFont + else empty + in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props + +-- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks. +hyperlinksToDoc :: Hyperlink -> Doc +hyperlinksToDoc [] = empty +hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs + where + hyp (ident, url) = hdest $$ hlink + where + hdest = selfClosingTag "HyperlinkURLDestination" + [("Self", "HyperlinkURLDestination/"++url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] + hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), + ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] + $ inTags True "Properties" [] + $ inTags False "BorderColor" [("type","enumeration")] (text "Black") + $$ (inTags False "Destination" [("type","object")] + $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url)) + + +-- | Convert a list of Pandoc blocks to ICML. +blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc +blocksToICML opts style lst = vcat `fmap` mapM (blockToICML opts style) lst + +-- | Convert a Pandoc block element to ICML. +blockToICML :: WriterOptions -> Style -> Block -> WS Doc +blockToICML opts style (Plain lst) = parStyle opts style lst +blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst +blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] +blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str] +blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks +blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst +blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst +blockToICML opts style (DefinitionList lst) = vcat `fmap` mapM (definitionListItemToICML opts style) lst +blockToICML opts style (Header lvl _ lst) = + let stl = (headerName ++ show lvl):style + in parStyle opts stl lst +blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead +blockToICML opts style (Table caption aligns widths headers rows) = + let style' = tableName : style + noHeader = all null headers + nrHeaders = if noHeader + then "0" + else "1" + nrRows = length rows + nrCols = if null rows + then 0 + else length $ head rows + rowsToICML [] _ = return empty + rowsToICML (col:rest) rowNr = + liftM2 ($$) (colsToICML col rowNr (0::Int)) $ rowsToICML rest (rowNr+1) + colsToICML [] _ _ = return empty + colsToICML (cell:rest) rowNr colNr = do + let stl = if rowNr == 0 && not noHeader + then tableHeaderName:style' + else style' + alig = aligns !! colNr + stl' | alig == AlignLeft = alignLeftName : stl + | alig == AlignRight = alignRightName : stl + | alig == AlignCenter = alignCenterName : stl + | otherwise = stl + c <- blocksToICML opts stl' cell + let cl = return $ inTags True "Cell" + [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c + liftM2 ($$) cl $ colsToICML rest rowNr (colNr+1) + in do + let tabl = if noHeader + then rows + else headers:rows + cells <- rowsToICML tabl (0::Int) + let colWidths w = if w > 0 + then [("SingleColumnWidth",show $ 500 * w)] + else [] + let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup) + let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths + let tableDoc = return $ inTags True "Table" [ + ("AppliedTableStyle","TableStyle/Table") + , ("HeaderRowCount", nrHeaders) + , ("BodyRowCount", show nrRows) + , ("ColumnCount", show nrCols) + ] (colDescs $$ cells) + liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption +blockToICML opts style (Div _ lst) = blocksToICML opts style lst +blockToICML _ _ Null = return empty + +-- | Convert a list of lists of blocks to ICML list items. +listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc +listItemsToICML _ _ _ _ [] = return empty +listItemsToICML opts listType style attribs (first:rest) = do + st <- get + put st{ listDepth = 1 + listDepth st} + let stl = listType:style + let f = listItemToICML opts stl True attribs first + let r = map (listItemToICML opts stl False attribs) rest + docs <- sequence $ f:r + s <- get + let maxD = max (maxListDepth s) (listDepth s) + put s{ listDepth = 1, maxListDepth = maxD } + return $ vcat docs + +-- | Convert a list of blocks to ICML list items. +listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc +listItemToICML opts style isFirst attribs item = + let makeNumbStart (Just (beginsWith, numbStl, _)) = + let doN DefaultStyle = [] + doN LowerRoman = [lowerRomanName] + doN UpperRoman = [upperRomanName] + doN LowerAlpha = [lowerAlphaName] + doN UpperAlpha = [upperAlphaName] + doN _ = [] + bw = if beginsWith > 1 + then [beginsWithName ++ show beginsWith] + else [] + in doN numbStl ++ bw + makeNumbStart Nothing = [] + stl = if isFirst + then firstListItemName:style + else style + stl' = makeNumbStart attribs ++ stl + in if length item > 1 + then do + let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst + insertTab block = blockToICML opts style block + f <- blockToICML opts stl' $ head item + r <- fmap vcat $ mapM insertTab $ tail item + return $ f $$ r + else blocksToICML opts stl' item + +definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc +definitionListItemToICML opts style (term,defs) = do + term' <- parStyle opts (defListTermName:style) term + defs' <- vcat `fmap` mapM (blocksToICML opts (defListDefName:style)) defs + return $ term' $$ defs' + + +-- | Convert a list of inline elements to ICML. +inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc +inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst) + +-- | Convert an inline element to ICML. +inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc +inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str +inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst +inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst +inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst +inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst +inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst +inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst +inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] +inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] +inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst] +inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str +inlineToICML _ style Space = charStyle style space +inlineToICML _ style LineBreak = charStyle style $ text lineSeparator +inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math +inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str +inlineToICML opts style (Link lst (url, title)) = do + content <- inlinesToICML opts (linkName:style) lst + state $ \st -> + let ident = if null $ links st + then 1::Int + else 1 + (fst $ head $ links st) + newst = st{ links = (ident, url):(links st) } + cont = inTags True "HyperlinkTextSource" + [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content + in (cont, newst) +inlineToICML opts style (Image alt target) = imageICML opts style alt target +inlineToICML opts style (Note lst) = footnoteToICML opts style lst +inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst + +-- | Convert a list of block elements to an ICML footnote. +footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc +footnoteToICML opts style lst = + let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls + insertTab block = blockToICML opts (footnoteName:style) block + in do + contents <- mapM insertTab lst + let number = inTags True "ParagraphStyleRange" [] $ + inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "" + return $ inTags True "CharacterStyleRange" + [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")] + $ inTags True "Footnote" [] $ number $$ vcat contents + +-- | Auxiliary function to merge Space elements into the adjacent Strs. +mergeSpaces :: [Inline] -> [Inline] +mergeSpaces ((Str s):(Space:((Str s'):xs))) = mergeSpaces $ Str(s++" "++s') : xs +mergeSpaces (Space:((Str s):xs)) = mergeSpaces $ Str (" "++s) : xs +mergeSpaces ((Str s):(Space:xs)) = mergeSpaces $ Str (s++" ") : xs +mergeSpaces (x:xs) = x : (mergeSpaces xs) +mergeSpaces [] = [] + +-- | Wrap a list of inline elements in an ICML Paragraph Style +parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc +parStyle opts style lst = + let slipIn x y = if null y + then x + else x ++ " > " ++ y + stlStr = foldr slipIn [] $ reverse style + stl = if null stlStr + then "" + else "ParagraphStyle/" ++ stlStr + attrs = ("AppliedParagraphStyle", stl) + attrs' = if firstListItemName `elem` style + then let ats = attrs : [("NumberingContinue", "false")] + begins = filter (isPrefixOf beginsWithName) style + in if null begins + then ats + else let i = maybe "" id $ stripPrefix beginsWithName $ head begins + in ("NumberingStartAt", i) : ats + else [attrs] + in do + content <- inlinesToICML opts [] lst + let cont = inTags True "ParagraphStyleRange" attrs' + $ mappend content $ selfClosingTag "Br" [] + state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st }) + +-- | Wrap a Doc in an ICML Character Style. +charStyle :: Style -> Doc -> WS Doc +charStyle style content = + let (stlStr, attrs) = styleToStrAttr style + doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content + in do + state $ \st -> + let styles = if null stlStr + then st + else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } + in (doc, styles) + +-- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. +styleToStrAttr :: Style -> (String, [(String, String)]) +styleToStrAttr style = + let stlStr = unwords $ Set.toAscList $ Set.fromList style + stl = if null style + then "$ID/NormalCharacterStyle" + else "CharacterStyle/" ++ stlStr + attrs = [("AppliedCharacterStyle", stl)] + in (stlStr, attrs) + +-- | Assemble an ICML Image. +imageICML :: WriterOptions -> Style -> [Inline] -> Target -> WS Doc +imageICML _ style _ (linkURI, _) = + let imgWidth = 300::Int --TODO: set width, height dynamically as in Docx.hs + imgHeight = 200::Int + scaleFact = show (1::Double) --TODO: set scaling factor so image is scaled exactly to imgWidth x imgHeight + hw = show $ imgWidth `div` 2 + hh = show $ imgHeight `div` 2 + qw = show $ imgWidth `div` 4 + qh = show $ imgHeight `div` 4 + (stlStr, attrs) = styleToStrAttr style + props = inTags True "Properties" [] $ inTags True "PathGeometry" [] + $ inTags True "GeometryPathType" [("PathOpen","false")] + $ inTags True "PathPointArray" [] + $ vcat [ + selfClosingTag "PathPointType" [("Anchor", "-"++qw++" -"++qh), + ("LeftDirection", "-"++qw++" -"++qh), ("RightDirection", "-"++qw++" -"++qh)] + , selfClosingTag "PathPointType" [("Anchor", "-"++qw++" "++qh), + ("LeftDirection", "-"++qw++" "++qh), ("RightDirection", "-"++qw++" "++qh)] + , selfClosingTag "PathPointType" [("Anchor", qw++" "++qh), + ("LeftDirection", qw++" "++qh), ("RightDirection", qw++" "++qh)] + , selfClosingTag "PathPointType" [("Anchor", qw++" -"++qh), + ("LeftDirection", qw++" -"++qh), ("RightDirection", qw++" -"++qh)] + ] + image = inTags True "Image" + [("Self","ue6"), ("ItemTransform", scaleFact++" 0 0 "++scaleFact++" -"++qw++" -"++qh)] + $ vcat [ + inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" + $$ selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0"), ("Right", hw), ("Bottom", hh)] + , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", linkURI)] + ] + doc = inTags True "CharacterStyleRange" attrs + $ inTags True "Rectangle" [("Self","uec"), ("ItemTransform", "1 0 0 1 "++qw++" -"++qh)] + $ (props $$ image) + in do + state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index a16784889..424e1b7c5 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -135,7 +135,7 @@ tests = [ testGroup "markdown" "haddock-reader.haddock" "haddock-reader.native" ] , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) - [ "opendocument" , "context" , "texinfo" + [ "opendocument" , "context" , "texinfo", "icml" , "man" , "plain" , "rtf", "org", "asciidoc" ] ] diff --git a/tests/tables.icml b/tests/tables.icml new file mode 100644 index 000000000..eb73af670 --- /dev/null +++ b/tests/tables.icml @@ -0,0 +1,748 @@ + + + Simple table with caption: + + + + + + + + + + + Right + + + + + + + Left + + + + + + + Center + + + + + + + Default + + + + + + + 12 + + + + + + + 12 + + + + + + + 12 + + + + + + + 12 + + + + + + + 123 + + + + + + + 123 + + + + + + + 123 + + + + + + + 123 + + + + + + + 1 + + + + + + + 1 + + + + + + + 1 + + + + + + + 1 + + + + + + + Demonstration of simple table syntax. + + + + + Simple table without caption: + + + + + + + + + + + Right + + + + + + + Left + + + + + + + Center + + + + + + + Default + + + + + + + 12 + + + + + + + 12 + + + + + + + 12 + + + + + + + 12 + + + + + + + 123 + + + + + + + 123 + + + + + + + 123 + + + + + + + 123 + + + + + + + 1 + + + + + + + 1 + + + + + + + 1 + + + + + + + 1 + + + + + + + + + + Simple table indented two spaces: + + + + + + + + + + + Right + + + + + + + Left + + + + + + + Center + + + + + + + Default + + + + + + + 12 + + + + + + + 12 + + + + + + + 12 + + + + + + + 12 + + + + + + + 123 + + + + + + + 123 + + + + + + + 123 + + + + + + + 123 + + + + + + + 1 + + + + + + + 1 + + + + + + + 1 + + + + + + + 1 + + + + + + + Demonstration of simple table syntax. + + + + + Multiline table with caption: + + + + + + + + + + + Centered Header + + + + + + + Left Aligned + + + + + + + Right Aligned + + + + + + + Default aligned + + + + + + + First + + + + + + + row + + + + + + + 12.0 + + + + + + + Example of a row that spans multiple lines. + + + + + + + Second + + + + + + + row + + + + + + + 5.0 + + + + + + + Here's another one. Note the blank line between rows. + + + + + + + Here's the caption. It may span multiple lines. + + + + + Multiline table without caption: + + + + + + + + + + + Centered Header + + + + + + + Left Aligned + + + + + + + Right Aligned + + + + + + + Default aligned + + + + + + + First + + + + + + + row + + + + + + + 12.0 + + + + + + + Example of a row that spans multiple lines. + + + + + + + Second + + + + + + + row + + + + + + + 5.0 + + + + + + + Here's another one. Note the blank line between rows. + + + + + + + + + + Table without column headers: + + + + + + + + + + + 12 + + + + + + + 12 + + + + + + + 12 + + + + + + + 12 + + + + + + + 123 + + + + + + + 123 + + + + + + + 123 + + + + + + + 123 + + + + + + + 1 + + + + + + + 1 + + + + + + + 1 + + + + + + + 1 + + + + + + + + + + Multiline table without column headers: + + + + + + + + + + + First + + + + + + + row + + + + + + + 12.0 + + + + + + + Example of a row that spans multiple lines. + + + + + + + Second + + + + + + + row + + + + + + + 5.0 + + + + + + + Here's another one. Note the blank line between rows. + + + + + + + diff --git a/tests/writer.icml b/tests/writer.icml new file mode 100644 index 000000000..ef6ddcf64 --- /dev/null +++ b/tests/writer.icml @@ -0,0 +1,3023 @@ + + + + + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + Courier New + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + $ID/NormalCharacterStyle + + + + + + + + + LeftAlign + . + + 10 + + + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + Courier New + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 10 + + + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 30 + + + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 20 + + + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 20 + + + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 20 + + + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 10 + + + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 10 + + + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 10 + + + + + + + $ID/NormalParagraphStyle + Courier New + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + Courier New + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + Courier New + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 20 + + + + + + + $ID/NormalParagraphStyle + + + LeftAlign + . + + 20 + + + + + + + $ID/NormalParagraphStyle + a, b, c, d... + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + A, B, C, D... + + + + + $ID/NormalParagraphStyle + A, B, C, D... + + + + + $ID/NormalParagraphStyle + i, ii, iii, iv... + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + i, ii, iii, iv... + + + + + $ID/NormalParagraphStyle + I, II, III, IV... + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + A, B, C, D... + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + $ID/NormalParagraphStyle + + + + + + + + + + + + + + + + This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite. + + + + + Headers + + + + + Level 2 with an + + + + embedded link + + + + + + Level 3 with + + + emphasis + + + + + Level 4 + + + + + Level 5 + + + + + Level 1 + + + + + Level 2 with + + + emphasis + + + + + Level 3 + + + + + with no blank line + + + + + Level 2 + + + + + with no blank line + + + + + Paragraphs + + + + + Here’s a regular paragraph. + + + + + In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item. + + + + + Here’s one with a bullet. * criminey. + + + + + There should be a hard line break + + +   + + + here. + + + + + Block Quotes + + + + + E-mail style: + + + + + This is a block quote. It is pretty short. + + + + + Code in a block quote: + + + + + sub status { + print "working"; +} + + + + + A list: + + + + + item one + + + + + item two + + + + + Nested block quotes: + + + + + nested + + + + + nested + + + + + This should not be a block quote: 2 > 1. + + + + + And a following paragraph. + + + + + Code Blocks + + + + + Code: + + + + + ---- (should be four hyphens) + +sub status { + print "working"; +} + +this code block is indented by one tab + + + + + And: + + + + + this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{ + + + + + Lists + + + + + Unordered + + + + + Asterisks tight: + + + + + asterisk 1 + + + + + asterisk 2 + + + + + asterisk 3 + + + + + Asterisks loose: + + + + + asterisk 1 + + + + + asterisk 2 + + + + + asterisk 3 + + + + + Pluses tight: + + + + + Plus 1 + + + + + Plus 2 + + + + + Plus 3 + + + + + Pluses loose: + + + + + Plus 1 + + + + + Plus 2 + + + + + Plus 3 + + + + + Minuses tight: + + + + + Minus 1 + + + + + Minus 2 + + + + + Minus 3 + + + + + Minuses loose: + + + + + Minus 1 + + + + + Minus 2 + + + + + Minus 3 + + + + + Ordered + + + + + Tight: + + + + + First + + + + + Second + + + + + Third + + + + + and: + + + + + One + + + + + Two + + + + + Three + + + + + Loose using tabs: + + + + + First + + + + + Second + + + + + Third + + + + + and using spaces: + + + + + One + + + + + Two + + + + + Three + + + + + Multiple paragraphs: + + + + + Item 1, graf one. + + + + + + + + Item 1. graf two. The quick brown fox jumped over the lazy dog’s back. + + + + + Item 2. + + + + + Item 3. + + + + + Nested + + + + + Tab + + + + + Tab + + + + + Tab + + + + + Here’s another: + + + + + First + + + + + Second: + + + + + Fee + + + + + Fie + + + + + Foe + + + + + Third + + + + + Same thing but with paragraphs: + + + + + First + + + + + Second: + + + + + Fee + + + + + Fie + + + + + Foe + + + + + Third + + + + + Tabs and spaces + + + + + this is a list item indented with tabs + + + + + this is a list item indented with spaces + + + + + this is an example list item indented with tabs + + + + + this is an example list item indented with spaces + + + + + Fancy list markers + + + + + begins with 2 + + + + + and now 3 + + + + + + + + with a continuation + + + + + sublist with roman numerals, starting with 4 + + + + + more items + + + + + a subsublist + + + + + a subsublist + + + + + Nesting: + + + + + Upper Alpha + + + + + Upper Roman. + + + + + Decimal start with 6 + + + + + Lower alpha with paren + + + + + Autonumbering: + + + + + Autonumber. + + + + + More. + + + + + Nested. + + + + + Should not be a list item: + + + + + M.A. 2007 + + + + + B. Williams + + + + + Definition Lists + + + + + Tight using spaces: + + + + + apple + + + + + red fruit + + + + + orange + + + + + orange fruit + + + + + banana + + + + + yellow fruit + + + + + Tight using tabs: + + + + + apple + + + + + red fruit + + + + + orange + + + + + orange fruit + + + + + banana + + + + + yellow fruit + + + + + Loose: + + + + + apple + + + + + red fruit + + + + + orange + + + + + orange fruit + + + + + banana + + + + + yellow fruit + + + + + Multiple blocks with italics: + + + + + apple + + + + + red fruit + + + + + contains seeds, crisp, pleasant to taste + + + + + orange + + + + + orange fruit + + + + + { orange code block } + + + + + orange block quote + + + + + Multiple definitions, tight: + + + + + apple + + + + + red fruit + + + + + computer + + + + + orange + + + + + orange fruit + + + + + bank + + + + + Multiple definitions, loose: + + + + + apple + + + + + red fruit + + + + + computer + + + + + orange + + + + + orange fruit + + + + + bank + + + + + Blank line after term, indented marker, alternate markers: + + + + + apple + + + + + red fruit + + + + + computer + + + + + orange + + + + + orange fruit + + + + + sublist + + + + + sublist + + + + + HTML Blocks + + + + + Simple block on one line: + + + + + foo + + + + + And nested without indentation: + + + + + foo + + + + + bar + + + + + Interpreted markdown in a table: + + + + + <table> +<tr> +<td> + + + + + This is + + + emphasized + + + + + </td> +<td> + + + + + And this is + + + strong + + + + + </td> +</tr> +</table> + +<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script> + + + + + + Here’s a simple block: + + + + + foo + + + + + This should be a code block, though: + + + + + <div> + foo +</div> + + + + + As should this: + + + + + <div>foo</div> + + + + + Now, nested: + + + + + foo + + + + + This should just be an HTML comment: + + + + +  + + + + + + Multiline: + + + + +  + + + + + + + + Code block: + + + + +  + + + + + Just plain comment, with trailing spaces on the line: + + + + +  + + + + + + Code: + + + + + <hr /> + + + + + Hr’s: + + + + + <hr> + +<hr /> + +<hr /> + +<hr> + +<hr /> + +<hr /> + +<hr class="foo" id="bar" /> + +<hr class="foo" id="bar" /> + +<hr class="foo" id="bar"> + + + + + + Inline Markup + + + + + This is + + + emphasized + + + , and so + + + is this + + + . + + + + + This is + + + strong + + + , and so + + + is this + + + . + + + + + An + + + + emphasized link + + + + . + + + + + This is strong and em. + + + + + So is + + + this + + + word. + + + + + This is strong and em. + + + + + So is + + + this + + + word. + + + + + This is code: + + + > + + + , + + + $ + + + , + + + \ + + + , + + + \$ + + + , + + + <html> + + + . + + + + + This is + + + strikeout + + + . + + + + + Superscripts: a + + + bc + + + d a + + + hello + + + a + + + hello there + + + . + + + + + Subscripts: H + + + 2 + + + O, H + + + 23 + + + O, H + + + many of them + + + O. + + + + + These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d. + + + + + Smart quotes, ellipses, dashes + + + + + “ + + + Hello, + + + ” + + + said the spider. + + + “ + + + ‘ + + + Shelob + + + ’ + + + is my name. + + + ” + + + + + ‘ + + + A + + + ’ + + + , + + + ‘ + + + B + + + ’ + + + , and + + + ‘ + + + C + + + ’ + + + are letters. + + + + + ‘ + + + Oak, + + + ’ + + + + + + ‘ + + + elm, + + + ’ + + + and + + + ‘ + + + beech + + + ’ + + + are names of trees. So is + + + ‘ + + + pine. + + + ’ + + + + + ‘ + + + He said, + + + “ + + + I want to go. + + + ” + + + ’ + + + Were you alive in the 70’s? + + + + + Here is some quoted + + + ‘ + + + code + + + ’ + + + and a + + + “ + + + + quoted link + + + + ” + + + . + + + + + Some dashes: one—two — three—four — five. + + + + + Dashes between numbers: 5–7, 255–66, 1987–1999. + + + + + Ellipses…and…and…. + + + + + LaTeX + + + + + \cite[22-23]{smith.1899} + + + + + 2+2=4 + + + + + x \in y + + + + + \alpha \wedge \omega + + + + + 223 + + + + + p + + + -Tree + + + + + Here’s some display math: + + + \frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h} + + + + + Here’s one that has a line break in it: + + + \alpha + \omega \times x^2 + + + . + + + + + These shouldn’t be math: + + + + + To get the famous equation, write + + + $e = mc^2$ + + + . + + + + + $22,000 is a + + + lot + + + of money. So is $34,000. (It worked if + + + “ + + + lot + + + ” + + + is emphasized.) + + + + + Shoes ($20) and socks ($5). + + + + + Escaped + + + $ + + + : $73 + + + this should be emphasized + + + 23$. + + + + + Here’s a LaTeX table: + + + + + \begin{tabular}{|l|l|}\hline +Animal & Number \\ \hline +Dog & 2 \\ +Cat & 1 \\ \hline +\end{tabular} + + + + + Special Characters + + + + + Here is some unicode: + + + + + I hat: Î + + + + + o umlaut: ö + + + + + section: § + + + + + set membership: ∈ + + + + + copyright: © + + + + + AT&T has an ampersand in their name. + + + + + AT&T is another way to write it. + + + + + This & that. + + + + + 4 < 5. + + + + + 6 > 5. + + + + + Backslash: \ + + + + + Backtick: ` + + + + + Asterisk: * + + + + + Underscore: _ + + + + + Left brace: { + + + + + Right brace: } + + + + + Left bracket: [ + + + + + Right bracket: ] + + + + + Left paren: ( + + + + + Right paren: ) + + + + + Greater-than: > + + + + + Hash: # + + + + + Period: . + + + + + Bang: ! + + + + + Plus: + + + + + + Minus: - + + + + + Links + + + + + Explicit + + + + + Just a + + + + URL + + + + . + + + + + + URL and title + + + + . + + + + + + URL and title + + + + . + + + + + + URL and title + + + + . + + + + + + URL and title + + + + + + + URL and title + + + + + + + with_underscore + + + + + + + Email link + + + + + + + Empty + + + + . + + + + + Reference + + + + + Foo + + + + bar + + + + . + + + + + Foo + + + + bar + + + + . + + + + + Foo + + + + bar + + + + . + + + + + With + + + + embedded [brackets] + + + + . + + + + + + b + + + + by itself should be a link. + + + + + Indented + + + + once + + + + . + + + + + Indented + + + + twice + + + + . + + + + + Indented + + + + thrice + + + + . + + + + + This should [not][] be a link. + + + + + [not]: /url + + + + + Foo + + + + bar + + + + . + + + + + Foo + + + + biz + + + + . + + + + + With ampersands + + + + + Here’s a + + + + link with an ampersand in the URL + + + + . + + + + + Here’s a link with an amersand in the link text: + + + + AT&T + + + + . + + + + + Here’s an + + + + inline link + + + + . + + + + + Here’s an + + + + inline link in pointy braces + + + + . + + + + + Autolinks + + + + + With an ampersand: + + + + http://example.com/?foo=1&bar=2 + + + + + + In a list? + + + + + + http://example.com/ + + + + + + It should. + + + + + An e-mail address: + + + + nobody@nowhere.net + + + + + + Blockquoted: + + + + http://example.com/ + + + + + + Auto-links should not occur here: + + + <http://example.com/> + + + + + or here: <http://example.com/> + + + + + Images + + + + + From + + + “ + + + Voyage dans la Lune + + + ” + + + by Georges Melies (1902): + + + + + + + + + + + + + + + + + + + + + $ID/Embedded + + + + + + + + + + + Here is a movie + + + + + + + + + + + + + + + + + + + $ID/Embedded + + + + + + + + + icon. + + + + + Footnotes + + + + + Here is a footnote reference, + + + + + + + + + + + + + + Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document. + + + + + + and another. + + + + + + + + + + + + + + Here’s the long note. This one contains multiple blocks. + + + + + + + + Subsequent blocks are indented to show that they belong to the footnote (as with list items). + + + + + { <code> } + + + + + + + + If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. + + + + + + This should + + + not + + + be a footnote reference, because it contains a space.[^my note] Here is an inline note. + + + + + + + + + + + + + + This is + + + easier + + + to type. Inline notes may contain + + + + links + + + + and + + + ] + + + verbatim characters, as well as [bracketed text]. + + + + + + + + Notes can go in quotes. + + + + + + + + + + + + + + In quote. + + + + + + + + And in list items. + + + + + + + + + + + + + + In list. + + + + + + + + This paragraph should not be part of the note, as it is not indented. + + + + + + + + Black + HyperlinkURLDestination/http://google.com + + + + + + Black + HyperlinkURLDestination/http://example.com/ + + + + + + Black + HyperlinkURLDestination/mailto:nobody@nowhere.net + + + + + + Black + HyperlinkURLDestination/http://example.com/ + + + + + + Black + HyperlinkURLDestination/http://example.com/?foo=1&bar=2 + + + + + + Black + HyperlinkURLDestination//script?foo=1&bar=2 + + + + + + Black + HyperlinkURLDestination//script?foo=1&bar=2 + + + + + + Black + HyperlinkURLDestination/http://att.com/ + + + + + + Black + HyperlinkURLDestination/http://example.com/?foo=1&bar=2 + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url + + + + + + Black + HyperlinkURLDestination//url + + + + + + Black + HyperlinkURLDestination//url + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination/ + + + + + + Black + HyperlinkURLDestination/mailto:nobody@nowhere.net + + + + + + Black + HyperlinkURLDestination//url/with_underscore + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination//url/ + + + + + + Black + HyperlinkURLDestination/http://example.com/?foo=1&bar=2 + + + + + + Black + HyperlinkURLDestination//url + + + + + + Black + HyperlinkURLDestination//url + + + -- cgit v1.2.3 From 24b2ac43b0a8596f7baea10579c95ee75b6e584f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 4 Mar 2014 00:33:25 +0100 Subject: Add a simple Emacs Org-mode reader The basic structure of org-mode documents is recognized; however, org-mode features like todo markers, tags etc. are not supported yet. --- pandoc.cabal | 2 + pandoc.hs | 1 + src/Text/Pandoc.hs | 3 + src/Text/Pandoc/Readers/Org.hs | 552 +++++++++++++++++++++++++++++++++++++++++ tests/Tests/Readers/Org.hs | 533 +++++++++++++++++++++++++++++++++++++++ tests/test-pandoc.hs | 2 + 6 files changed, 1093 insertions(+) create mode 100644 src/Text/Pandoc/Readers/Org.hs create mode 100644 tests/Tests/Readers/Org.hs (limited to 'tests') diff --git a/pandoc.cabal b/pandoc.cabal index bbf963672..ccd23e551 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -263,6 +263,7 @@ Library Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.MediaWiki, Text.Pandoc.Readers.RST, + Text.Pandoc.Readers.Org, Text.Pandoc.Readers.DocBook, Text.Pandoc.Readers.OPML, Text.Pandoc.Readers.TeXMath, @@ -381,6 +382,7 @@ Test-Suite test-pandoc Tests.Walk Tests.Readers.LaTeX Tests.Readers.Markdown + Tests.Readers.Org Tests.Readers.RST Tests.Writers.Native Tests.Writers.ConTeXt diff --git a/pandoc.hs b/pandoc.hs index 677101746..709b5a777 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -834,6 +834,7 @@ defaultReaderName fallback (x:xs) = ".latex" -> "latex" ".ltx" -> "latex" ".rst" -> "rst" + ".org" -> "org" ".lhs" -> "markdown+lhs" ".db" -> "docbook" ".opml" -> "opml" diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 3ae81db00..e511ed861 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -65,6 +65,7 @@ module Text.Pandoc , readMarkdown , readMediaWiki , readRST + , readOrg , readLaTeX , readHtml , readTextile @@ -115,6 +116,7 @@ import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST +import Text.Pandoc.Readers.Org import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.OPML import Text.Pandoc.Readers.LaTeX @@ -201,6 +203,7 @@ readers = [ ("native" , \_ s -> return $ readNative s) ,("mediawiki" , \o s -> return $ readMediaWiki o s) ,("docbook" , \o s -> return $ readDocBook o s) ,("opml" , \o s -> return $ readOPML o s) + ,("org" , \o s -> return $ readOrg o s) ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs ,("html" , \o s -> return $ readHtml o s) ,("latex" , \o s -> return $ readLaTeX o s) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs new file mode 100644 index 000000000..5dc250f04 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org.hs @@ -0,0 +1,552 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014 Albert Krewinkel + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org + Copyright : Copyright (C) 2014 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel + +Conversion of Org-Mode to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Org ( readOrg ) where + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (orderedListMarker) +import Text.Pandoc.Shared (compactify') + +import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Monad (guard, mzero) +import Data.Char (toLower) +import Data.List (foldl') +import Data.Maybe (listToMaybe, fromMaybe) +import Data.Monoid (mconcat, mempty, mappend) + +-- | Parse org-mode string and return a Pandoc document. +readOrg :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n") + +type OrgParser = Parser [Char] ParserState + +parseOrg:: OrgParser Pandoc +parseOrg = do + blocks' <- B.toList <$> parseBlocks + st <- getState + let meta = stateMeta st + return $ Pandoc meta $ filter (/= Null) blocks' + +-- +-- parsing blocks +-- + +parseBlocks :: OrgParser Blocks +parseBlocks = mconcat <$> manyTill block eof + +block :: OrgParser Blocks +block = choice [ mempty <$ blanklines + , orgBlock + , example + , drawer + , specialLine + , header + , hline + , list + , table + , paraOrPlain + ] "block" + +-- +-- Org Blocks (#+BEGIN_... / #+END_...) +-- + +orgBlock :: OrgParser Blocks +orgBlock = try $ do + (indent, blockType, args) <- blockHeader + blockStr <- rawBlockContent indent blockType + let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] + case blockType of + "comment" -> return mempty + "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr + _ -> B.divWith ("", [blockType], []) + <$> (parseFromString parseBlocks blockStr) + +blockHeader :: OrgParser (Int, String, [String]) +blockHeader = (,,) <$> blockIndent + <*> blockType + <*> (skipSpaces *> blockArgs) + where blockIndent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) + blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline + +rawBlockContent :: Int -> String -> OrgParser String +rawBlockContent indent blockType = + unlines . map commaEscaped <$> manyTill indentedLine blockEnder + where + indentedLine = try $ choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +-- indent by specified number of spaces (or equiv. tabs) +indentWith :: Int -> OrgParser String +indentWith num = do + tabStop <- getOption readerTabStop + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> count (num - tabStop) (char ' ')) ] + +translateLang :: String -> String +translateLang "sh" = "bash" +translateLang cs = cs + +commaEscaped :: String -> String +commaEscaped (',':cs@('*':_)) = cs +commaEscaped (',':cs@('#':'+':_)) = cs +commaEscaped cs = cs + +example :: OrgParser Blocks +example = try $ + B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine + +exampleLine :: OrgParser String +exampleLine = try $ string ": " *> anyLine + +-- Drawers for properties or a logbook +drawer :: OrgParser Blocks +drawer = try $ do + drawerStart + manyTill drawerLine (try drawerEnd) + return mempty + +drawerStart :: OrgParser String +drawerStart = try $ + skipSpaces *> drawerName <* skipSpaces <* newline + where drawerName = try $ char ':' *> validDrawerName <* char ':' + validDrawerName = stringAnyCase "PROPERTIES" + <|> stringAnyCase "LOGBOOK" + +drawerLine :: OrgParser String +drawerLine = try $ anyLine + +drawerEnd :: OrgParser String +drawerEnd = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + + +-- Comments, Options and Metadata +specialLine :: OrgParser Blocks +specialLine = try $ metaLine <|> commentLine + +metaLine :: OrgParser Blocks +metaLine = try $ metaLineStart *> declarationLine + +commentLine :: OrgParser Blocks +commentLine = try $ commentLineStart *> anyLine *> pure mempty + +-- The order, in which blocks are tried, makes sure that we're not looking at +-- the beginning of a block, so we don't need to check for it +metaLineStart :: OrgParser String +metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" + +commentLineStart :: OrgParser String +commentLineStart = try $ mappend <$> many spaceChar <*> string "# " + +declarationLine :: OrgParser Blocks +declarationLine = try $ do + meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta + updateState $ \st -> st { stateMeta = stateMeta st <> meta' } + return mempty + +metaValue :: OrgParser MetaValue +metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine + +metaKey :: OrgParser [Char] +metaKey = map toLower <$> many1 (noneOf ": \n\r") + <* char ':' + <* skipSpaces + +-- | Headers +header :: OrgParser Blocks +header = try $ + B.header <$> headerStart + <*> (trimInlines <$> restOfLine) + +headerStart :: OrgParser Int +headerStart = try $ + (length <$> many1 (char '*')) <* many1 (char ' ') + +-- Horizontal Line (five dashes or more) +hline :: OrgParser Blocks +hline = try $ do + skipSpaces + string "-----" + many (char '-') + skipSpaces + newline + return B.horizontalRule + +-- +-- Tables +-- + +data OrgTableRow = OrgContentRow [Blocks] + | OrgAlignRow [Alignment] + | OrgHlineRow + deriving (Eq, Show) + +type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]]) + +table :: OrgParser Blocks +table = try $ do + lookAhead tableStart + (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows + return $ B.table "" (zip aligns widths) heads lns + +tableStart :: OrgParser Char +tableStart = try $ skipSpaces *> char '|' + +tableRows :: OrgParser [OrgTableRow] +tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) + +tableContentRow :: OrgParser OrgTableRow +tableContentRow = try $ + OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) + +tableContentCell :: OrgParser Blocks +tableContentCell = try $ + B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell) + +endOfCell :: OrgParser Char +-- endOfCell = char '|' <|> newline +endOfCell = try $ char '|' <|> lookAhead newline + +tableAlignRow :: OrgParser OrgTableRow +tableAlignRow = try $ + OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) + +tableAlignCell :: OrgParser Alignment +tableAlignCell = + choice [ try $ emptyCell *> return (AlignDefault) + , try $ skipSpaces + *> char '<' + *> tableAlignFromChar + <* many digit + <* char '>' + <* emptyCell + ] "alignment info" + where emptyCell = try $ skipSpaces *> endOfCell + +tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] + +tableHline :: OrgParser OrgTableRow +tableHline = try $ + OrgHlineRow <$ (tableStart *> char '-' *> anyLine) + +tableContent :: [OrgTableRow] + -> OrgTableContent +tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty) + +normalizeTable :: OrgTableContent + -> OrgTableContent +normalizeTable (cols, aligns, widths, heads, lns) = + let aligns' = fillColumns aligns AlignDefault + widths' = fillColumns widths 0.0 + heads' = if heads == mempty + then heads + else fillColumns heads (B.plain mempty) + lns' = map (flip fillColumns (B.plain mempty)) lns + fillColumns base padding = take cols $ base ++ repeat padding + in (cols, aligns', widths', heads', lns') + + +-- One or more horizontal rules after the first content line mark the previous +-- line as a header. All other horizontal lines are discarded. +rowToContent :: OrgTableRow + -> OrgTableContent + -> OrgTableContent +rowToContent OrgHlineRow = maybeBodyToHeader +rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs +rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as + +setLongestRow :: [a] + -> OrgTableContent + -> OrgTableContent +setLongestRow r (cols, aligns, widths, heads, lns) = + (max cols (length r), aligns, widths, heads, lns) + +maybeBodyToHeader :: OrgTableContent + -> OrgTableContent +maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, []) +maybeBodyToHeader content = content + +appendToBody :: [Blocks] + -> OrgTableContent + -> OrgTableContent +appendToBody r (cols, aligns, widths, heads, lns) = + (cols, aligns, widths, heads, lns ++ [r]) + +setAligns :: [Alignment] + -> OrgTableContent + -> OrgTableContent +setAligns aligns (cols, _, widths, heads, lns) = + (cols, aligns, widths, heads, lns) + +-- Paragraphs or Plain text +paraOrPlain :: OrgParser Blocks +paraOrPlain = try $ + trimInlines . mconcat + <$> many1 inline + <**> option B.plain + (try $ newline *> pure B.para) + +restOfLine :: OrgParser Inlines +restOfLine = mconcat <$> manyTill inline newline + + +-- +-- list blocks +-- + +list :: OrgParser Blocks +list = choice [ bulletList, orderedList ] "list" + +bulletList :: OrgParser Blocks +bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) + +orderedList :: OrgParser Blocks +orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) + +genericListStart :: OrgParser String + -> OrgParser Int +genericListStart listMarker = try $ + (+) <$> (length <$> many spaceChar) + <*> (length <$> listMarker <* many1 spaceChar) + +-- parses bullet list start and returns its length (excl. following whitespace) +bulletListStart :: OrgParser Int +bulletListStart = genericListStart bulletListMarker + where bulletListMarker = pure <$> oneOf "*-+" + +orderedListStart :: OrgParser Int +orderedListStart = genericListStart orderedListMarker + -- Ordered list markers allowed in org-mode + where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + +listItem :: OrgParser Int + -> OrgParser Blocks +listItem start = try $ do + (markerLength, first) <- try (start >>= rawListItem) + rest <- many (listContinuation markerLength) + parseFromString parseBlocks $ concat (first:rest) + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: Int + -> OrgParser (Int, String) +rawListItem markerLength = try $ do + firstLine <- anyLine + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + +-- continuation of a list item - indented and separated by blankline or endline. +-- Note: nested lists are parsed as continuations. +listContinuation :: Int + -> OrgParser String +listContinuation markerLength = try $ + mappend <$> many blankline + <*> (concat <$> many1 (listLine markerLength)) + +-- parse a line of a list item +listLine :: Int + -> OrgParser String +listLine markerLength = try $ + indentWith markerLength *> anyLine + <**> pure (++ "\n") + + +-- +-- inline +-- + +inline :: OrgParser Inlines +inline = choice inlineParsers "inline" + where inlineParsers = [ whitespace + , link + , str + , endline + , emph + , strong + , strikeout + , underline + , code + , verbatim + , subscript + , superscript + , symbol + ] + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" + + +whitespace :: OrgParser Inlines +whitespace = B.space <$ skipMany1 spaceChar "whitespace" + +str :: OrgParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") + <* updateLastStrPos + +-- an endline character that can be treated as a space, not a structural break +endline :: OrgParser Inlines +endline = try $ do + newline + notFollowedBy blankline + notFollowedBy' exampleLine + notFollowedBy' hline + notFollowedBy' tableStart + notFollowedBy' drawerStart + notFollowedBy' headerStart + notFollowedBy' metaLineStart + notFollowedBy' commentLineStart + notFollowedBy' bulletListStart + notFollowedBy' orderedListStart + return B.space + +link :: OrgParser Inlines +link = explicitLink <|> selfLink "link" + +explicitLink :: OrgParser Inlines +explicitLink = try $ do + char '[' + src <- enclosedRaw (char '[') (char ']') + title <- enclosedInlines (char '[') (char ']') + char ']' + return $ B.link src "" title + +selfLink :: OrgParser Inlines +selfLink = try $ do + src <- enclosedRaw (string "[[") (string "]]") + return $ B.link src "" (B.str src) + +emph :: OrgParser Inlines +emph = B.emph <$> inlinesEnclosedBy '/' + +strong :: OrgParser Inlines +strong = B.strong <$> inlinesEnclosedBy '*' + +strikeout :: OrgParser Inlines +strikeout = B.strikeout <$> inlinesEnclosedBy '+' + +-- There is no underline, so we use strong instead. +underline :: OrgParser Inlines +underline = B.strong <$> inlinesEnclosedBy '_' + +code :: OrgParser Inlines +code = B.code <$> rawEnclosedBy '=' + +verbatim :: OrgParser Inlines +verbatim = B.rawInline "" <$> rawEnclosedBy '~' + +subscript :: OrgParser Inlines +subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) + +superscript :: OrgParser Inlines +superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces) + +maybeGroupedByBraces :: OrgParser Inlines +maybeGroupedByBraces = try $ + choice [ try $ enclosedInlines (char '{') (char '}') + , B.str . (:"") <$> anyChar + ] + +symbol :: OrgParser Inlines +symbol = B.str . (: "") <$> oneOf specialChars + +enclosedInlines :: OrgParser a + -> OrgParser b + -> OrgParser Inlines +enclosedInlines start end = try $ + trimInlines . mconcat <$> enclosed start end inline + +-- FIXME: This is a hack +inlinesEnclosedBy :: Char + -> OrgParser Inlines +inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + +enclosedRaw :: OrgParser a + -> OrgParser b + -> OrgParser String +enclosedRaw start end = try $ + start *> (onSingleLine <|> spanningTwoLines) + where onSingleLine = try $ many1Till (noneOf "\n\r") end + spanningTwoLines = try $ + anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine + +rawEnclosedBy :: Char + -> OrgParser String +rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) + +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: OrgParser a -> OrgParser a +atStart p = do + pos <- getPosition + st <- getState + guard $ stateLastStrPos st /= Just pos + p + +-- | succeeds only if we're at the end of a word +atEnd :: OrgParser a -> OrgParser a +atEnd p = try $ p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars + +postWordChars :: [Char] +postWordChars = "\t\n\r !\"'),-.:?}" + +-- FIXME: These functions are hacks and should be replaced +endsOnThisOrNextLine :: Char + -> OrgParser () +endsOnThisOrNextLine c = do + inp <- getInput + let doOtherwise = \rest -> endsOnThisLine rest c (const mzero) + endsOnThisLine inp c doOtherwise + +endsOnThisLine :: [Char] + -> Char + -> ([Char] -> OrgParser ()) + -> OrgParser () +endsOnThisLine input c doOnOtherLines = do + case break (`elem` c:"\n") input of + (_,'\n':rest) -> doOnOtherLines rest + (_,_:rest@(n:_)) -> if n `elem` postWordChars + then return () + else endsOnThisLine rest c doOnOtherLines + _ -> mzero + diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs new file mode 100644 index 000000000..8c5982302 --- /dev/null +++ b/tests/Tests/Readers/Org.hs @@ -0,0 +1,533 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.Org (tests) where + +import Text.Pandoc.Definition +import Test.Framework +import Tests.Helpers +import Tests.Arbitrary() +import Text.Pandoc.Builder +import Text.Pandoc +import Data.List (intersperse) +import Data.Monoid (mempty, mconcat) + +org :: String -> Pandoc +org = readOrg def + +infix 4 =: +(=:) :: ToString c + => String -> (String, c) -> Test +(=:) = test org + +spcSep :: [Inlines] -> Inlines +spcSep = mconcat . intersperse space + +simpleTable' :: Int + -> [Blocks] + -> [[Blocks]] + -> Blocks +simpleTable' n = table "" (take n $ repeat (AlignDefault, 0.0)) + +tests :: [Test] +tests = + [ testGroup "Inlines" $ + [ "Plain String" =: + "Hello, World" =?> + para (spcSep [ "Hello,", "World" ]) + + , "Emphasis" =: + "/Planet Punk/" =?> + para (emph . spcSep $ ["Planet", "Punk"]) + + , "Strong" =: + "*Cider*" =?> + para (strong "Cider") + + , "Strikeout" =: + "+Kill Bill+" =?> + para (strikeout . spcSep $ [ "Kill", "Bill" ]) + + , "Code" =: + "=Robot.rock()=" =?> + para (code "Robot.rock()") + + , "Verbatim" =: + "~word for word~" =?> + para (rawInline "" "word for word") + + , "Symbol" =: + "A * symbol" =?> + para (str "A" <> space <> str "*" <> space <> "symbol") + + , "Superscript single char" =: + "2^n" =?> + para (str "2" <> superscript "n") + + , "Superscript multi char" =: + "2^{n-1}" =?> + para (str "2" <> superscript "n-1") + + , "Subscript single char" =: + "a_n" =?> + para (str "a" <> subscript "n") + + , "Subscript multi char" =: + "a_{n+1}" =?> + para (str "a" <> subscript "n+1") + + , "Markup-chars not occuring on word break are symbols" =: + unlines [ "this+that+ +so+on" + , "seven*eight* nine*" + , "+not+funny+" + ] =?> + para (spcSep [ "this+that+", "+so+on" + , "seven*eight*", "nine*" + , strikeout "not+funny" + ]) + + , "Markup may not span more than two lines" =: + unlines [ "/this *is", "not*", "emph/" ] =?> + para (spcSep [ "/this" + , (strong ("is" <> space <> "not")) + , "emph/" ]) + + , "Explicit link" =: + "[[http://zeitlens.com/][pseudo-random nonsense]]" =?> + (para $ link "http://zeitlens.com/" "" + ("pseudo-random" <> space <> "nonsense")) + + , "Self-link" =: + "[[http://zeitlens.com/]]" =?> + (para $ link "http://zeitlens.com/" "" "http://zeitlens.com/") + ] + + , testGroup "Meta Information" $ + [ "Comment" =: + "# Nothing to see here" =?> + (mempty::Blocks) + + , "Not a comment" =: + "#-tag" =?> + para "#-tag" + + , "Comment surrounded by Text" =: + unlines [ "Before" + , "# Comment" + , "After" + ] =?> + mconcat [ para "Before" + , para "After" + ] + + , "Title" =: + "#+TITLE: Hello, World" =?> + let titleInline = toList $ "Hello," <> space <> "World" + meta = setMeta "title" (MetaInlines titleInline) $ nullMeta + in Pandoc meta mempty + + , "Author" =: + "#+author: Albert /Emacs-Fanboy/ Krewinkel" =?> + let author = toList . spcSep $ [ "Albert", emph "Emacs-Fanboy", "Krewinkel" ] + meta = setMeta "author" (MetaInlines author) $ nullMeta + in Pandoc meta mempty + + , "Date" =: + "#+Date: Feb. *28*, 2014" =?> + let date = toList . spcSep $ [ "Feb.", (strong "28") <> ",", "2014" ] + meta = setMeta "date" (MetaInlines date) $ nullMeta + in Pandoc meta mempty + + , "Description" =: + "#+DESCRIPTION: Explanatory text" =?> + let description = toList . spcSep $ [ "Explanatory", "text" ] + meta = setMeta "description" (MetaInlines description) $ nullMeta + in Pandoc meta mempty + + , "Properties drawer" =: + unlines [ " :PROPERTIES:" + , " :setting: foo" + , " :END:" + ] =?> + (mempty::Blocks) + + , "Logbook drawer" =: + unlines [ " :LogBook:" + , " - State \"DONE\" from \"TODO\" [2014-03-03 Mon 11:00]" + , " :END:" + ] =?> + (mempty::Blocks) + + , "Drawer surrounded by text" =: + unlines [ "Before" + , ":PROPERTIES:" + , ":END:" + , "After" + ] =?> + para "Before" <> para "After" + + , "Drawer start is the only text in first line of a drawer" =: + unlines [ " :LOGBOOK: foo" + , " :END:" + ] =?> + para (spcSep [ ":LOGBOOK:", "foo", ":END:" ]) + + , "Drawers with unknown names are just text" =: + unlines [ ":FOO:" + , ":END:" + ] =?> + para (":FOO:" <> space <> ":END:") + ] + + , testGroup "Basic Blocks" $ + [ "Paragraph" =: + "Paragraph\n" =?> + para "Paragraph" + + , "First Level Header" =: + "* Headline\n" =?> + header 1 "Headline" + + , "Third Level Header" =: + "*** Third Level Headline\n" =?> + header 3 ("Third" <> space <> + "Level" <> space <> + "Headline") + + , "Compact Headers with Paragraph" =: + unlines [ "* First Level" + , "** Second Level" + , " Text" + ] =?> + mconcat [ header 1 ("First" <> space <> "Level") + , header 2 ("Second" <> space <> "Level") + , para "Text" + ] + + , "Separated Headers with Paragraph" =: + unlines [ "* First Level" + , "" + , "** Second Level" + , "" + , " Text" + ] =?> + mconcat [ header 1 ("First" <> space <> "Level") + , header 2 ("Second" <> space <> "Level") + , para "Text" + ] + + , "Headers not preceded by a blank line" =: + unlines [ "** eat dinner" + , "Spaghetti and meatballs tonight." + , "** walk dog" + ] =?> + mconcat [ header 2 ("eat" <> space <> "dinner") + , para $ spcSep [ "Spaghetti", "and", "meatballs", "tonight." ] + , header 2 ("walk" <> space <> "dog") + ] + + , "Paragraph starting with an asterisk" =: + "*five" =?> + para "*five" + + , "Paragraph containing asterisk at beginning of line" =: + unlines [ "lucky" + , "*star" + ] =?> + para ("lucky" <> space <> "*star") + + , "Example block" =: + unlines [ ": echo hello" + , ": echo dear tester" + ] =?> + codeBlockWith ("", ["example"], []) "echo hello\necho dear tester\n" + + , "Example block surrounded by text" =: + unlines [ "Greetings" + , ": echo hello" + , ": echo dear tester" + , "Bye" + ] =?> + mconcat [ para "Greetings" + , codeBlockWith ("", ["example"], []) + "echo hello\necho dear tester\n" + , para "Bye" + ] + + , "Horizontal Rule" =: + unlines [ "before" + , "-----" + , "after" + ] =?> + mconcat [ para "before" + , horizontalRule + , para "after" + ] + + , "Not a Horizontal Rule" =: + "----- five dashes" =?> + (para $ spcSep [ "-----", "five", "dashes" ]) + + , "Comment Block" =: + unlines [ "#+BEGIN_COMMENT" + , "stuff" + , "bla" + , "#+END_COMMENT"] =?> + (mempty::Blocks) + + , "Source Block in Text" =: + unlines [ "Low German greeting" + , " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] + , codeBlockWith attr' code' + ] + + , "Source Block" =: + unlines [ " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + ] + + , testGroup "Lists" $ + [ "Simple Bullet Lists" =: + ("- Item1\n" ++ + "- Item2\n") =?> + bulletList [ plain "Item1" + , plain "Item2" + ] + + , "Indented Bullet Lists" =: + (" - Item1\n" ++ + " - Item2\n") =?> + bulletList [ plain "Item1" + , plain "Item2" + ] + + , "Multi-line Bullet Lists" =: + ("- *Fat\n" ++ + " Tony*\n" ++ + "- /Sideshow\n" ++ + " Bob/") =?> + bulletList [ plain $ strong ("Fat" <> space <> "Tony") + , plain $ emph ("Sideshow" <> space <> "Bob") + ] + + , "Nested Bullet Lists" =: + ("- Discovery\n" ++ + " + One More Time\n" ++ + " + Harder, Better, Faster, Stronger\n" ++ + "- Homework\n" ++ + " + Around the World\n"++ + "- Human After All\n" ++ + " + Technologic\n" ++ + " + Robot Rock\n") =?> + bulletList [ mconcat + [ para "Discovery" + , bulletList [ plain ("One" <> space <> + "More" <> space <> + "Time") + , plain ("Harder," <> space <> + "Better," <> space <> + "Faster," <> space <> + "Stronger") + ] + ] + , mconcat + [ para "Homework" + , bulletList [ plain ("Around" <> space <> + "the" <> space <> + "World") + ] + ] + , mconcat + [ para ("Human" <> space <> "After" <> space <> "All") + , bulletList [ plain "Technologic" + , plain ("Robot" <> space <> "Rock") + ] + ] + ] + + , "Simple Ordered List" =: + ("1. Item1\n" ++ + "2. Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Simple Ordered List with Parens" =: + ("1) Item1\n" ++ + "2) Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Indented Ordered List" =: + (" 1. Item1\n" ++ + " 2. Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Nested Ordered Lists" =: + ("1. One\n" ++ + " 1. One-One\n" ++ + " 2. One-Two\n" ++ + "2. Two\n" ++ + " 1. Two-One\n"++ + " 2. Two-Two\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ mconcat + [ para "One" + , orderedList [ plain "One-One" + , plain "One-Two" + ] + ] + , mconcat + [ para "Two" + , orderedList [ plain "Two-One" + , plain "Two-Two" + ] + ] + ] + in orderedListWith listStyle listStructure + + , "Ordered List in Bullet List" =: + ("- Emacs\n" ++ + " 1. Org\n") =?> + bulletList [ (para "Emacs") <> + (orderedList [ plain "Org"]) + ] + + , "Bullet List in Ordered List" =: + ("1. GNU\n" ++ + " - Freedom\n") =?> + orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + ] + + , testGroup "Tables" + [ "Single cell table" =: + "|Test|" =?> + simpleTable' 1 mempty [[plain "Test"]] + + , "Multi cell table" =: + "| One | Two |" =?> + simpleTable' 2 mempty [ [ plain "One", plain "Two" ] ] + + , "Multi line table" =: + unlines [ "| One |" + , "| Two |" + , "| Three |" + ] =?> + simpleTable' 1 mempty + [ [ plain "One" ] + , [ plain "Two" ] + , [ plain "Three" ] + ] + + , "Empty table" =: + "||" =?> + simpleTable' 1 mempty mempty + + , "Glider Table" =: + unlines [ "| 1 | 0 | 0 |" + , "| 0 | 1 | 1 |" + , "| 1 | 1 | 0 |" + ] =?> + simpleTable' 3 mempty + [ [ plain "1", plain "0", plain "0" ] + , [ plain "0", plain "1", plain "1" ] + , [ plain "1", plain "1", plain "0" ] + ] + + , "Table between Paragraphs" =: + unlines [ "Before" + , "| One | Two |" + , "After" + ] =?> + mconcat [ para "Before" + , simpleTable' 2 mempty [ [ plain "One", plain "Two" ] ] + , para "After" + ] + + , "Table with Header" =: + unlines [ "| Species | Status |" + , "|--------------+--------------|" + , "| cervisiae | domesticated |" + , "| paradoxus | wild |" + ] =?> + simpleTable [ plain "Species", plain "Status" ] + [ [ plain "cervisiae", plain "domesticated" ] + , [ plain "paradoxus", plain "wild" ] + ] + + , "Table with final hline" =: + unlines [ "| cervisiae | domesticated |" + , "| paradoxus | wild |" + , "|--------------+--------------|" + ] =?> + simpleTable' 2 mempty + [ [ plain "cervisiae", plain "domesticated" ] + , [ plain "paradoxus", plain "wild" ] + ] + + , "Table in a box" =: + unlines [ "|---------|---------|" + , "| static | Haskell |" + , "| dynamic | Lisp |" + , "|---------+---------|" + ] =?> + simpleTable' 2 mempty + [ [ plain "static", plain "Haskell" ] + , [ plain "dynamic", plain "Lisp" ] + ] + + , "Table with alignment row" =: + unlines [ "| Numbers | Text | More |" + , "| | | |" + , "| 1 | One | foo |" + , "| 2 | Two | bar |" + ] =?> + table "" (zip [AlignCenter, AlignRight, AlignDefault] [0, 0, 0]) + [] + [ [ plain "Numbers", plain "Text", plain "More" ] + , [ plain "1" , plain "One" , plain "foo" ] + , [ plain "2" , plain "Two" , plain "bar" ] + ] + + , "Pipe within text doesn't start a table" =: + "Ceci n'est pas une | pipe " =?> + para (spcSep [ "Ceci", "n'est", "pas", "une", "|", "pipe" ]) + + , "Missing pipe at end of row" =: + "|incomplete-but-valid" =?> + simpleTable' 1 mempty [ [ plain "incomplete-but-valid" ] ] + + , "Table with differing row lengths" =: + unlines [ "| Numbers | Text " + , "|-" + , "| | |" + , "| 1 | One | foo |" + , "| 2" + ] =?> + table "" (zip [AlignCenter, AlignRight, AlignDefault] [0, 0, 0]) + [ plain "Numbers", plain "Text" , plain mempty ] + [ [ plain "1" , plain "One" , plain "foo" ] + , [ plain "2" , plain mempty , plain mempty ] + ] + ] + ] diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index ae521541a..74f8e5044 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -7,6 +7,7 @@ import GHC.IO.Encoding import qualified Tests.Old import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown +import qualified Tests.Readers.Org import qualified Tests.Readers.RST import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX @@ -31,6 +32,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests , testGroup "Markdown" Tests.Readers.Markdown.tests + , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests ] ] -- cgit v1.2.3 From f3c9d3788530e450d1bb23a4fd829bc5a6eed266 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 5 Mar 2014 13:01:23 -0800 Subject: HTML writer: Add colgroup around col tags. Also affects EPUB writer. Closes #877. --- src/Text/Pandoc/Writers/HTML.hs | 15 ++++++++++----- tests/tables.html | 6 ++++++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 3ac2a836f..e0385af25 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -533,11 +533,16 @@ blockToHtml opts (Table capt aligns widths headers rows') = do let percent w = show (truncate (100*w) :: Integer) ++ "%" let coltags = if all (== 0.0) widths then mempty - else mconcat $ map (\w -> - if writerHtml5 opts - then H.col ! A.style (toValue $ "width: " ++ percent w) - else H.col ! A.width (toValue $ percent w) >> nl opts) - widths + else do + H.colgroup $ do + nl opts + mapM_ (\w -> do + if writerHtml5 opts + then H.col ! A.style (toValue $ "width: " ++ + percent w) + else H.col ! A.width (toValue $ percent w) + nl opts) widths + nl opts head' <- if all null headers then return mempty else do diff --git a/tests/tables.html b/tests/tables.html index b72aa784e..a9b2b247d 100644 --- a/tests/tables.html +++ b/tests/tables.html @@ -96,10 +96,12 @@ Multiline table with caption: ++ @@ -125,10 +127,12 @@ Here's the caption. It may span multiple lines. Centered Header Multiline table without caption: ++ @@ -177,10 +181,12 @@ Centered Header Multiline table without column headers: ++ -- cgit v1.2.3 From d7fbc40dff9771181f26d7d9cb3129c9884a5f01 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 24 Mar 2014 15:07:19 -0700 Subject: RTF writer: Fixed tables cells containing paragraphs. This moves \intbl after \pard. --- src/Text/Pandoc/Writers/RTF.hs | 2 +- tests/tables.rtf | 184 ++++++++++++++++++++--------------------- 2 files changed, 93 insertions(+), 93 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index fb935fa6a..3e0bd9976 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -259,7 +259,7 @@ tableRowToRTF header indent aligns sizes' cols = tableItemToRTF :: Int -> Alignment -> [Block] -> String tableItemToRTF indent alignment item = let contents = concatMap (blockToRTF indent alignment) item - in "{\\intbl " ++ contents ++ "\\cell}\n" + in "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. diff --git a/tests/tables.rtf b/tests/tables.rtf index 011724967..e1fe4aab1 100644 --- a/tests/tables.rtf +++ b/tests/tables.rtf @@ -4,13 +4,13 @@ \clbrdrb\brdrs\cellx2160\clbrdrb\brdrs\cellx4320\clbrdrb\brdrs\cellx6480\clbrdrb\brdrs\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Center\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Center\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default\par} \cell} } \intbl\row} @@ -19,13 +19,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} } \intbl\row} @@ -34,13 +34,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} } \intbl\row} @@ -49,13 +49,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} } \intbl\row} @@ -66,13 +66,13 @@ \clbrdrb\brdrs\cellx2160\clbrdrb\brdrs\cellx4320\clbrdrb\brdrs\cellx6480\clbrdrb\brdrs\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Center\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Center\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default\par} \cell} } \intbl\row} @@ -81,13 +81,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} } \intbl\row} @@ -96,13 +96,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} } \intbl\row} @@ -111,13 +111,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} } \intbl\row} @@ -128,13 +128,13 @@ \clbrdrb\brdrs\cellx2160\clbrdrb\brdrs\cellx4320\clbrdrb\brdrs\cellx6480\clbrdrb\brdrs\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Center\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Center\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default\par} \cell} } \intbl\row} @@ -143,13 +143,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} } \intbl\row} @@ -158,13 +158,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} } \intbl\row} @@ -173,13 +173,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} } \intbl\row} @@ -190,13 +190,13 @@ \clbrdrb\brdrs\cellx1296\clbrdrb\brdrs\cellx2484\clbrdrb\brdrs\cellx3888\clbrdrb\brdrs\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Centered Header\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Centered Header\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left Aligned\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left Aligned\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right Aligned\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right Aligned\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default aligned\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default aligned\par} \cell} } \intbl\row} @@ -205,13 +205,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 First\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 First\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} \cell} } \intbl\row} @@ -220,13 +220,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Second\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Second\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 5.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 5.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} \cell} } \intbl\row} @@ -237,13 +237,13 @@ \clbrdrb\brdrs\cellx1296\clbrdrb\brdrs\cellx2484\clbrdrb\brdrs\cellx3888\clbrdrb\brdrs\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Centered Header\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Centered Header\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left Aligned\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left Aligned\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right Aligned\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right Aligned\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default aligned\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default aligned\par} \cell} } \intbl\row} @@ -252,13 +252,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 First\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 First\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} \cell} } \intbl\row} @@ -267,13 +267,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Second\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Second\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 5.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 5.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} \cell} } \intbl\row} @@ -284,13 +284,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} } \intbl\row} @@ -299,13 +299,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} } \intbl\row} @@ -314,13 +314,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} } \intbl\row} @@ -331,13 +331,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 First\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 First\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} \cell} } \intbl\row} @@ -346,13 +346,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Second\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Second\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 5.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 5.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} \cell} } \intbl\row} -- cgit v1.2.3 From 90269cb213ae6b2db31cc11199d1ce8d378bdac8 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Thu, 27 Mar 2014 19:58:14 +0000 Subject: Tests updated to reflect changes to readers. Previously normalisation was handled by the `normalizeSpaces` function. The behavoir of the builder monoid is slightly different and melds together more items such as consecutive strings and spaces adjacent to line breaks. The tests have been changed to reflect this. All relevant tests passed when the string melding line of the builder monoid was commented out. --- tests/html-reader.html | 5 ++ tests/html-reader.native | 152 ++++++++++++++++++++++--------------------- tests/opml-reader.native | 2 +- tests/textile-reader.native | 91 +++++++++++++------------- tests/textile-reader.textile | 6 +- 5 files changed, 132 insertions(+), 124 deletions(-) (limited to 'tests') diff --git a/tests/html-reader.html b/tests/html-reader.html index 69bb9ba8a..b7e5c0d2f 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -426,5 +426,10 @@ An e-mail address: nobody [at] nowhere.net { <code> } If you want, you can use a caret at the beginning of every line, as with blockquotes, but all that you need is a caret at the beginning of the first line of the block and any preceding blank lines. +text Leading space +Trailing space text +text Leading spaces +Trailing spaces text + diff --git a/tests/html-reader.native b/tests/html-reader.native index e80905729..a6a3ab177 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -1,5 +1,5 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) -[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Str ".",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] ,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embedded",Space,Str "link"] ("/url","")] @@ -14,15 +14,15 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Paragraphs"] -,Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."] -,Para [Str "In",Space,Str "Markdown",Space,Str "1",Str ".",Str "0",Str ".",Str "0",Space,Str "and",Space,Str "earlier",Str ".",Space,Str "Version",Space,Str "8",Str ".",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",Space,Str "list",Space,Str "item",Str ".",Space,Str "Because",Space,Str "a",Space,Str "hard",Str "-",Str "wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",Space,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item",Str "."] -,Para [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str ".",Space,Str "*",Space,Str "criminey",Str "."] -,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break",LineBreak,Space,Str "here",Str "."] +,Para [Str "Here's",Space,Str "a",Space,Str "regular",Space,Str "paragraph."] +,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",Space,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."] +,Para [Str "Here's",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet.",Space,Str "*",Space,Str "criminey."] +,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break",LineBreak,Space,Str "here."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Block",Space,Str "Quotes"] -,Para [Str "E",Str "-",Str "mail",Space,Str "style:"] +,Para [Str "E-mail",Space,Str "style:"] ,BlockQuote - [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote",Str ".",Space,Str "It",Space,Str "is",Space,Str "pretty",Space,Str "short",Str "."]] + [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "It",Space,Str "is",Space,Str "pretty",Space,Str "short."]] ,BlockQuote [Para [Str "Code",Space,Str "in",Space,Str "a",Space,Str "block",Space,Str "quote:"] ,CodeBlock ("",[],[]) "sub status {\n print \"working\";\n}" @@ -35,8 +35,8 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl [Para [Str "nested"]] ,BlockQuote [Para [Str "nested"]]] -,Para [Str "This",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "a",Space,Str "block",Space,Str "quote:",Space,Str "2",Space,Str ">",Space,Str "1",Str "."] -,Para [Str "Box",Str "-",Str "style:"] +,Para [Str "This",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "a",Space,Str "block",Space,Str "quote:",Space,Str "2",Space,Str ">",Space,Str "1."] +,Para [Str "Box-style:"] ,BlockQuote [Para [Str "Example:"] ,CodeBlock ("",[],[]) "sub status {\n print \"working\";\n}"] @@ -44,12 +44,12 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl [OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "do",Space,Str "laundry"]] ,[Plain [Str "take",Space,Str "out",Space,Str "the",Space,Str "trash"]]]] -,Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "nested",Space,Str "one:"] +,Para [Str "Here's",Space,Str "a",Space,Str "nested",Space,Str "one:"] ,BlockQuote [Para [Str "Joe",Space,Str "said:"] ,BlockQuote - [Para [Str "Don",Str "'",Str "t",Space,Str "quote",Space,Str "me",Str "."]]] -,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph",Str "."] + [Para [Str "Don't",Space,Str "quote",Space,Str "me."]]] +,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Code",Space,Str "Blocks"] ,Para [Str "Code:"] @@ -112,10 +112,10 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,[Para [Str "Three"]]] ,Para [Str "Multiple",Space,Str "paragraphs:"] ,OrderedList (1,DefaultStyle,DefaultDelim) - [[Para [Str "Item",Space,Str "1,",Space,Str "graf",Space,Str "one",Str "."] - ,Para [Str "Item",Space,Str "1",Str ".",Space,Str "graf",Space,Str "two",Str ".",Space,Str "The",Space,Str "quick",Space,Str "brown",Space,Str "fox",Space,Str "jumped",Space,Str "over",Space,Str "the",Space,Str "lazy",Space,Str "dog",Str "'",Str "s",Space,Str "back",Str "."]] - ,[Para [Str "Item",Space,Str "2",Str "."]] - ,[Para [Str "Item",Space,Str "3",Str "."]]] + [[Para [Str "Item",Space,Str "1,",Space,Str "graf",Space,Str "one."] + ,Para [Str "Item",Space,Str "1.",Space,Str "graf",Space,Str "two.",Space,Str "The",Space,Str "quick",Space,Str "brown",Space,Str "fox",Space,Str "jumped",Space,Str "over",Space,Str "the",Space,Str "lazy",Space,Str "dog's",Space,Str "back."]] + ,[Para [Str "Item",Space,Str "2."]] + ,[Para [Str "Item",Space,Str "3."]]] ,Header 2 ("",[],[]) [Str "Nested"] ,BulletList [[Plain [Str "Tab"] @@ -123,7 +123,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl [[Plain [Str "Tab"] ,BulletList [[Plain [Str "Tab"]]]]]]] -,Para [Str "Here",Str "'",Str "s",Space,Str "another:"] +,Para [Str "Here's",Space,Str "another:"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "First"]] ,[Plain [Str "Second:"] @@ -163,63 +163,63 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,OrderedList (1,UpperAlpha,DefaultDelim) [[Plain [Str "Upper",Space,Str "Alpha"] ,OrderedList (1,UpperRoman,DefaultDelim) - [[Plain [Str "Upper",Space,Str "Roman",Str "."] + [[Plain [Str "Upper",Space,Str "Roman."] ,OrderedList (6,Decimal,DefaultDelim) [[Plain [Str "Decimal",Space,Str "start",Space,Str "with",Space,Str "6"] ,OrderedList (3,LowerAlpha,DefaultDelim) [[Plain [Str "Lower",Space,Str "alpha",Space,Str "with",Space,Str "paren"]]]]]]]]] ,Para [Str "Autonumbering:"] ,OrderedList (1,DefaultStyle,DefaultDelim) - [[Plain [Str "Autonumber",Str "."]] - ,[Plain [Str "More",Str "."] + [[Plain [Str "Autonumber."]] + ,[Plain [Str "More."] ,OrderedList (1,DefaultStyle,DefaultDelim) - [[Plain [Str "Nested",Str "."]]]]] + [[Plain [Str "Nested."]]]]] ,HorizontalRule ,Header 2 ("",[],[]) [Str "Definition"] ,DefinitionList [([Str "Violin"], - [[Plain [Str "Stringed",Space,Str "musical",Space,Str "instrument",Str "."]] - ,[Plain [Str "Torture",Space,Str "device",Str "."]]]) + [[Plain [Str "Stringed",Space,Str "musical",Space,Str "instrument."]] + ,[Plain [Str "Torture",Space,Str "device."]]]) ,([Str "Cello",LineBreak,Str "Violoncello"], - [[Plain [Str "Low",Str "-",Str "voiced",Space,Str "stringed",Space,Str "instrument",Str "."]]])] + [[Plain [Str "Low-voiced",Space,Str "stringed",Space,Str "instrument."]]])] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] ,Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str "."] ,Para [Str "An",Space,Emph [Link [Str "emphasized",Space,Str "link"] ("/url","")],Str "."] -,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]]] -,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Str "."] -,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]]] -,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Str "."] +,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]] +,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."] +,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]] +,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."] ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",Space,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",Space,Code ("",[],[]) "",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"] -,Para [Str "\"",Str "Hello,",Str "\"",Space,Str "said",Space,Str "the",Space,Str "spider",Str ".",Space,Str "\"",Str "'",Str "Shelob",Str "'",Space,Str "is",Space,Str "my",Space,Str "name",Str ".",Str "\""] -,Para [Str "'",Str "A",Str "'",Str ",",Space,Str "'",Str "B",Str "'",Str ",",Space,Str "and",Space,Str "'",Str "C",Str "'",Space,Str "are",Space,Str "letters",Str "."] -,Para [Str "'",Str "Oak,",Str "'",Space,Str "'",Str "elm,",Str "'",Space,Str "and",Space,Str "'",Str "beech",Str "'",Space,Str "are",Space,Str "names",Space,Str "of",Space,Str "trees",Str ".",Space,Str "So",Space,Str "is",Space,Str "'",Str "pine",Str ".",Str "'"] -,Para [Str "'",Str "He",Space,Str "said,",Space,Str "\"",Str "I",Space,Str "want",Space,Str "to",Space,Str "go",Str ".",Str "\"",Str "'",Space,Str "Were",Space,Str "you",Space,Str "alive",Space,Str "in",Space,Str "the",Space,Str "70",Str "'",Str "s?"] -,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "quoted",Space,Str "'",Code ("",[],[]) "code",Str "'",Space,Str "and",Space,Str "a",Space,Str "\"",Link [Str "quoted",Space,Str "link"] ("http://example.com/?foo=1&bar=2",""),Str "\"",Str "."] -,Para [Str "Some",Space,Str "dashes:",Space,Str "one",Str "-",Str "-",Str "-",Str "two",Space,Str "-",Str "-",Str "-",Space,Str "three",Str "-",Str "-",Str "four",Space,Str "-",Str "-",Space,Str "five",Str "."] -,Para [Str "Dashes",Space,Str "between",Space,Str "numbers:",Space,Str "5",Str "-",Str "7,",Space,Str "255",Str "-",Str "66,",Space,Str "1987",Str "-",Str "1999",Str "."] -,Para [Str "Ellipses",Str ".",Str ".",Str ".",Str "and",Str ".",Space,Str ".",Space,Str ".",Str "and",Space,Str ".",Space,Str ".",Space,Str ".",Space,Str "."] +,Para [Str "\"Hello,\"",Space,Str "said",Space,Str "the",Space,Str "spider.",Space,Str "\"'Shelob'",Space,Str "is",Space,Str "my",Space,Str "name.\""] +,Para [Str "'A',",Space,Str "'B',",Space,Str "and",Space,Str "'C'",Space,Str "are",Space,Str "letters."] +,Para [Str "'Oak,'",Space,Str "'elm,'",Space,Str "and",Space,Str "'beech'",Space,Str "are",Space,Str "names",Space,Str "of",Space,Str "trees.",Space,Str "So",Space,Str "is",Space,Str "'pine.'"] +,Para [Str "'He",Space,Str "said,",Space,Str "\"I",Space,Str "want",Space,Str "to",Space,Str "go.\"'",Space,Str "Were",Space,Str "you",Space,Str "alive",Space,Str "in",Space,Str "the",Space,Str "70's?"] +,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "quoted",Space,Str "'",Code ("",[],[]) "code",Str "'",Space,Str "and",Space,Str "a",Space,Str "\"",Link [Str "quoted",Space,Str "link"] ("http://example.com/?foo=1&bar=2",""),Str "\"."] +,Para [Str "Some",Space,Str "dashes:",Space,Str "one---two",Space,Str "---",Space,Str "three--four",Space,Str "--",Space,Str "five."] +,Para [Str "Dashes",Space,Str "between",Space,Str "numbers:",Space,Str "5-7,",Space,Str "255-66,",Space,Str "1987-1999."] +,Para [Str "Ellipses...and.",Space,Str ".",Space,Str ".and",Space,Str ".",Space,Str ".",Space,Str ".",Space,Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "LaTeX"] ,BulletList - [[Plain [Str "\\cite[22",Str "-",Str "23]{smith",Str ".",Str "1899}"]] + [[Plain [Str "\\cite[22-23]{smith.1899}"]] ,[Plain [Str "\\doublespacing"]] - ,[Plain [Str "$",Str "2+2=4",Str "$"]] - ,[Plain [Str "$",Str "x",Space,Str "\\in",Space,Str "y",Str "$"]] - ,[Plain [Str "$",Str "\\alpha",Space,Str "\\wedge",Space,Str "\\omega",Str "$"]] - ,[Plain [Str "$",Str "223",Str "$"]] - ,[Plain [Str "$",Str "p",Str "$",Str "-",Str "Tree"]] - ,[Plain [Str "$",Str "\\frac{d}{dx}f(x)=\\lim_{h\\to",Space,Str "0}\\frac{f(x+h)",Str "-",Str "f(x)}{h}",Str "$"]] - ,[Plain [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "that",Space,Str "has",Space,Str "a",Space,Str "line",Space,Str "break",Space,Str "in",Space,Str "it:",Space,Str "$",Str "\\alpha",Space,Str "+",Space,Str "\\omega",Space,Str "\\times",Space,Str "x^2",Str "$",Str "."]]] -,Para [Str "These",Space,Str "shouldn",Str "'",Str "t",Space,Str "be",Space,Str "math:"] + ,[Plain [Str "$2+2=4$"]] + ,[Plain [Str "$x",Space,Str "\\in",Space,Str "y$"]] + ,[Plain [Str "$\\alpha",Space,Str "\\wedge",Space,Str "\\omega$"]] + ,[Plain [Str "$223$"]] + ,[Plain [Str "$p$-Tree"]] + ,[Plain [Str "$\\frac{d}{dx}f(x)=\\lim_{h\\to",Space,Str "0}\\frac{f(x+h)-f(x)}{h}$"]] + ,[Plain [Str "Here's",Space,Str "one",Space,Str "that",Space,Str "has",Space,Str "a",Space,Str "line",Space,Str "break",Space,Str "in",Space,Str "it:",Space,Str "$\\alpha",Space,Str "+",Space,Str "\\omega",Space,Str "\\times",Space,Str "x^2$."]]] +,Para [Str "These",Space,Str "shouldn't",Space,Str "be",Space,Str "math:"] ,BulletList [[Plain [Str "To",Space,Str "get",Space,Str "the",Space,Str "famous",Space,Str "equation,",Space,Str "write",Space,Code ("",[],[]) "$e = mc^2$",Str "."]] - ,[Plain [Str "$",Str "22,000",Space,Str "is",Space,Str "a",Space,Emph [Str "lot"],Space,Str "of",Space,Str "money",Str ".",Space,Str "So",Space,Str "is",Space,Str "$",Str "34,000",Str ".",Space,Str "(It",Space,Str "worked",Space,Str "if",Space,Str "\"",Str "lot",Str "\"",Space,Str "is",Space,Str "emphasized",Str ".",Str ")"]] - ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$",Str "73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23",Str "$",Str "."]]] -,Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] + ,[Plain [Str "$22,000",Space,Str "is",Space,Str "a",Space,Emph [Str "lot"],Space,Str "of",Space,Str "money.",Space,Str "So",Space,Str "is",Space,Str "$34,000.",Space,Str "(It",Space,Str "worked",Space,Str "if",Space,Str "\"lot\"",Space,Str "is",Space,Str "emphasized.)"]] + ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23$."]]] +,Para [Str "Here's",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] ,Para [Str "\\begin{tabular}{|l|l|}\\hline",Space,Str "Animal",Space,Str "&",Space,Str "Number",Space,Str "\\\\",Space,Str "\\hline",Space,Str "Dog",Space,Str "&",Space,Str "2",Space,Str "\\\\",Space,Str "Cat",Space,Str "&",Space,Str "1",Space,Str "\\\\",Space,Str "\\hline",Space,Str "\\end{tabular}"] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Special",Space,Str "Characters"] @@ -230,11 +230,11 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,[Plain [Str "section:",Space,Str "\167"]] ,[Plain [Str "set",Space,Str "membership:",Space,Str "\8712"]] ,[Plain [Str "copyright:",Space,Str "\169"]]] -,Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."] -,Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."] -,Para [Str "This",Space,Str "&",Space,Str "that",Str "."] -,Para [Str "4",Space,Str "<",Space,Str "5",Str "."] -,Para [Str "6",Space,Str ">",Space,Str "5",Str "."] +,Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name."] +,Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it."] +,Para [Str "This",Space,Str "&",Space,Str "that."] +,Para [Str "4",Space,Str "<",Space,Str "5."] +,Para [Str "6",Space,Str ">",Space,Str "5."] ,Para [Str "Backslash:",Space,Str "\\"] ,Para [Str "Backtick:",Space,Str "`"] ,Para [Str "Asterisk:",Space,Str "*"] @@ -245,7 +245,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Str "Right",Space,Str "bracket:",Space,Str "]"] ,Para [Str "Left",Space,Str "paren:",Space,Str "("] ,Para [Str "Right",Space,Str "paren:",Space,Str ")"] -,Para [Str "Greater",Str "-",Str "than:",Space,Str ">"] +,Para [Str "Greater-than:",Space,Str ">"] ,Para [Str "Hash:",Space,Str "#"] ,Para [Str "Period:",Space,Str "."] ,Para [Str "Bang:",Space,Str "!"] @@ -260,47 +260,51 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title preceded by a tab"),Str "."] ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title with \"quotes\" in it")] ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title with single quotes")] -,Para [Str "Email",Space,Str "link",Space,Str "(nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net)"] +,Para [Str "Email",Space,Str "link",Space,Str "(nobody",Space,Str "[at]",Space,Str "nowhere.net)"] ,Para [Link [Str "Empty"] ("",""),Str "."] ,Header 2 ("",[],[]) [Str "Reference"] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "With",Space,Link [Str "embedded",Space,Str "[brackets]"] ("/url/",""),Str "."] -,Para [Link [Str "b"] ("/url/",""),Space,Str "by",Space,Str "itself",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "link",Str "."] +,Para [Link [Str "b"] ("/url/",""),Space,Str "by",Space,Str "itself",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "link."] ,Para [Str "Indented",Space,Link [Str "once"] ("/url",""),Str "."] ,Para [Str "Indented",Space,Link [Str "twice"] ("/url",""),Str "."] ,Para [Str "Indented",Space,Link [Str "thrice"] ("/url",""),Str "."] -,Para [Str "This",Space,Str "should",Space,Str "[not]",Space,Str "be",Space,Str "a",Space,Str "link",Str "."] +,Para [Str "This",Space,Str "should",Space,Str "[not]",Space,Str "be",Space,Str "a",Space,Str "link."] ,CodeBlock ("",[],[]) "[not]: /url" ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/","Title with \"quotes\" inside"),Str "."] ,Para [Str "Foo",Space,Link [Str "biz"] ("/url/","Title with \"quote\" inside"),Str "."] ,Header 2 ("",[],[]) [Str "With",Space,Str "ampersands"] -,Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] ("http://example.com/?foo=1&bar=2",""),Str "."] -,Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] ("http://att.com/","AT&T"),Str "."] -,Para [Str "Here",Str "'",Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."] -,Para [Str "Here",Str "'",Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."] +,Para [Str "Here's",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] ("http://example.com/?foo=1&bar=2",""),Str "."] +,Para [Str "Here's",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] ("http://att.com/","AT&T"),Str "."] +,Para [Str "Here's",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."] +,Para [Str "Here's",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."] ,Header 2 ("",[],[]) [Str "Autolinks"] -,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link [Str "http://example",Str ".",Str "com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")] +,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")] ,BulletList [[Plain [Str "In",Space,Str "a",Space,Str "list?"]] - ,[Plain [Link [Str "http://example",Str ".",Str "com/"] ("http://example.com/","")]] - ,[Plain [Str "It",Space,Str "should",Str "."]]] -,Para [Str "An",Space,Str "e",Str "-",Str "mail",Space,Str "address:",Space,Str "nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net"] + ,[Plain [Link [Str "http://example.com/"] ("http://example.com/","")]] + ,[Plain [Str "It",Space,Str "should."]]] +,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Str "nobody",Space,Str "[at]",Space,Str "nowhere.net"] ,BlockQuote - [Para [Str "Blockquoted:",Space,Link [Str "http://example",Str ".",Str "com/"] ("http://example.com/","")]] -,Para [Str "Auto",Str "-",Str "links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) ""] + [Para [Str "Blockquoted:",Space,Link [Str "http://example.com/"] ("http://example.com/","")]] +,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) ""] ,CodeBlock ("",[],[]) "or here: " ,HorizontalRule ,Header 1 ("",[],[]) [Str "Images"] -,Para [Str "From",Space,Str "\"",Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune",Str "\"",Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"] +,Para [Str "From",Space,Str "\"Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune\"",Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"] ,Para [Image [Str "lalune"] ("lalune.jpg","Voyage dans la Lune")] -,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "movie"] ("movie.jpg",""),Space,Str "icon",Str "."] +,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "movie"] ("movie.jpg",""),Space,Str "icon."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Footnotes"] -,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference",Link [Str "(1)"] ("#note_1",""),Str ",",Space,Str "and",Space,Str "another",Link [Str "(longnote)"] ("#note_longnote",""),Str ".",Space,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",Space,Str "contains",Space,Str "a",Space,Str "space^(my",Space,Str "note)",Str "."] -,Para [Link [Str "(1)"] ("#ref_1",""),Space,Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote",Str ".",Space,Str "It",Space,Str "can",Space,Str "go",Space,Str "anywhere",Space,Str "in",Space,Str "the",Space,Str "document,",Space,Str "not",Space,Str "just",Space,Str "at",Space,Str "the",Space,Str "end",Str "."] -,Para [Link [Str "(longnote)"] ("#ref_longnote",""),Space,Str "Here",Str "'",Str "s",Space,Str "the",Space,Str "other",Space,Str "note",Str ".",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks",Str "."] -,Para [Str "Caret",Space,Str "characters",Space,Str "are",Space,Str "used",Space,Str "to",Space,Str "indicate",Space,Str "that",Space,Str "the",Space,Str "blocks",Space,Str "all",Space,Str "belong",Space,Str "to",Space,Str "a",Space,Str "single",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "block",Space,Str "quotes)",Str "."] +,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference",Link [Str "(1)"] ("#note_1",""),Str ",",Space,Str "and",Space,Str "another",Link [Str "(longnote)"] ("#note_longnote",""),Str ".",Space,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",Space,Str "contains",Space,Str "a",Space,Str "space^(my",Space,Str "note)."] +,Para [Link [Str "(1)"] ("#ref_1",""),Space,Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote.",Space,Str "It",Space,Str "can",Space,Str "go",Space,Str "anywhere",Space,Str "in",Space,Str "the",Space,Str "document,",Space,Str "not",Space,Str "just",Space,Str "at",Space,Str "the",Space,Str "end."] +,Para [Link [Str "(longnote)"] ("#ref_longnote",""),Space,Str "Here's",Space,Str "the",Space,Str "other",Space,Str "note.",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks."] +,Para [Str "Caret",Space,Str "characters",Space,Str "are",Space,Str "used",Space,Str "to",Space,Str "indicate",Space,Str "that",Space,Str "the",Space,Str "blocks",Space,Str "all",Space,Str "belong",Space,Str "to",Space,Str "a",Space,Str "single",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "block",Space,Str "quotes)."] ,CodeBlock ("",[],[]) " { }" -,Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "use",Space,Str "a",Space,Str "caret",Space,Str "at",Space,Str "the",Space,Str "beginning",Space,Str "of",Space,Str "every",Space,Str "line,",Space,Str "as",Space,Str "with",Space,Str "blockquotes,",Space,Str "but",Space,Str "all",Space,Str "that",Space,Str "you",Space,Str "need",Space,Str "is",Space,Str "a",Space,Str "caret",Space,Str "at",Space,Str "the",Space,Str "beginning",Space,Str "of",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "the",Space,Str "block",Space,Str "and",Space,Str "any",Space,Str "preceding",Space,Str "blank",Space,Str "lines",Str "."]] +,Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "use",Space,Str "a",Space,Str "caret",Space,Str "at",Space,Str "the",Space,Str "beginning",Space,Str "of",Space,Str "every",Space,Str "line,",Space,Str "as",Space,Str "with",Space,Str "blockquotes,",Space,Str "but",Space,Str "all",Space,Str "that",Space,Str "you",Space,Str "need",Space,Str "is",Space,Str "a",Space,Str "caret",Space,Str "at",Space,Str "the",Space,Str "beginning",Space,Str "of",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "the",Space,Str "block",Space,Str "and",Space,Str "any",Space,Str "preceding",Space,Str "blank",Space,Str "lines."] +,Para [Str "text",Space,Emph [Str "Leading",Space,Str "space"]] +,Para [Emph [Str "Trailing",Space,Str "space"],Space,Str "text"] +,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]] +,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"]] diff --git a/tests/opml-reader.native b/tests/opml-reader.native index e71857680..14aff2c03 100644 --- a/tests/opml-reader.native +++ b/tests/opml-reader.native @@ -19,7 +19,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Dave",Spa ,Header 3 ("",[],[]) [Str "North",Space,Str "Dakota"] ,Header 3 ("",[],[]) [Str "Oklahoma"] ,Header 3 ("",[],[]) [Str "South",Space,Str "Dakota"] -,Header 2 ("",[],[]) [Str "Mid",Str "-",Str "Atlantic"] +,Header 2 ("",[],[]) [Str "Mid-Atlantic"] ,Header 3 ("",[],[]) [Str "Delaware"] ,Header 3 ("",[],[]) [Str "Maryland"] ,Header 3 ("",[],[]) [Str "New",Space,Str "Jersey"] diff --git a/tests/textile-reader.native b/tests/textile-reader.native index ebfbc07fd..0c2b13e72 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -1,5 +1,5 @@ Pandoc (Meta {unMeta = fromList []}) -[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "\8217",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader.",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] ,Header 2 ("level-2-with-an-embeded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] @@ -8,42 +8,42 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 5 ("level-5",[],[]) [Str "Level",Space,Str "5"] ,Header 6 ("level-6",[],[]) [Str "Level",Space,Str "6"] ,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"] -,Para [Str "Here",Str "\8217",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."] -,Para [Str "Line",Space,Str "breaks",Space,Str "are",Space,Str "preserved",Space,Str "in",Space,Str "textile",Str ",",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "not",Space,Str "wrap",Space,Str "your",Space,Str "very",LineBreak,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor",Space,Str "and",Space,Str "have",Space,Str "it",Space,Str "rendered",LineBreak,Str "with",Space,Str "no",Space,Str "break",Str "."] -,Para [Str "Here",Str "\8217",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."] +,Para [Str "Here\8217s",Space,Str "a",Space,Str "regular",Space,Str "paragraph."] +,Para [Str "Line",Space,Str "breaks",Space,Str "are",Space,Str "preserved",Space,Str "in",Space,Str "textile,",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "not",Space,Str "wrap",Space,Str "your",Space,Str "very",LineBreak,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor",Space,Str "and",Space,Str "have",Space,Str "it",Space,Str "rendered",LineBreak,Str "with",Space,Str "no",Space,Str "break."] +,Para [Str "Here\8217s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet."] ,BulletList - [[Plain [Str "criminey",Str "."]]] + [[Plain [Str "criminey."]]] ,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "paragraph",Space,Str "break",Space,Str "between",Space,Str "here"] -,Para [Str "and",Space,Str "here",Str "."] -,Para [Str "pandoc",Space,Str "converts",Space,Str "textile",Str "."] +,Para [Str "and",Space,Str "here."] +,Para [Str "pandoc",Space,Str "converts",Space,Str "textile."] ,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"] ,BlockQuote - [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "famous",Space,Str "quote",Space,Str "from",Space,Str "somebody",Str ".",Space,Str "He",Space,Str "had",Space,Str "a",Space,Str "lot",Space,Str "of",Space,Str "things",Space,Str "to",LineBreak,Str "say",Str ",",Space,Str "so",Space,Str "the",Space,Str "text",Space,Str "is",Space,Str "really",Space,Str "really",Space,Str "long",Space,Str "and",Space,Str "spans",Space,Str "on",Space,Str "multiple",Space,Str "lines",Str "."]] -,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph",Str "."] + [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "famous",Space,Str "quote",Space,Str "from",Space,Str "somebody.",Space,Str "He",Space,Str "had",Space,Str "a",Space,Str "lot",Space,Str "of",Space,Str "things",Space,Str "to",LineBreak,Str "say,",Space,Str "so",Space,Str "the",Space,Str "text",Space,Str "is",Space,Str "really",Space,Str "really",Space,Str "long",Space,Str "and",Space,Str "spans",Space,Str "on",Space,Str "multiple",Space,Str "lines."]] +,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."] ,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"] -,Para [Str "Code",Str ":"] +,Para [Str "Code:"] ,CodeBlock ("",[],[]) " ---- (should be four hyphens)\n\n sub status {\n print \"working\";\n }\n\n this code block is indented by one tab" -,Para [Str "And",Str ":"] +,Para [Str "And:"] ,CodeBlock ("",[],[]) " this code block is indented by two tabs\n\n These should not be escaped: \\$ \\\\ \\> \\[ \\{" ,CodeBlock ("",[],[]) "Code block with .bc\n continued\n @",Str ",",Space,Code ("",[],[]) "@",Str "."] +,Para [Str "Inline",Space,Str "code:",Space,Code ("",[],[]) "",Str ",",Space,Code ("",[],[]) "@",Str "."] ,Header 1 ("notextile",[],[]) [Str "Notextile"] ,Para [Str "A",Space,Str "block",Space,Str "of",Space,Str "text",Space,Str "can",Space,Str "be",Space,Str "protected",Space,Str "with",Space,Str "notextile",Space,Str ":"] ,Para [Str "\nNo *bold* and\n* no bullet\n"] -,Para [Str "and",Space,Str "inlines",Space,Str "can",Space,Str "be",Space,Str "protected",Space,Str "with",Space,Str "double *equals (=)* markup",Str "."] +,Para [Str "and",Space,Str "inlines",Space,Str "can",Space,Str "be",Space,Str "protected",Space,Str "with",Space,Str "double *equals (=)* markup."] ,Header 1 ("lists",[],[]) [Str "Lists"] ,Header 2 ("unordered",[],[]) [Str "Unordered"] -,Para [Str "Asterisks",Space,Str "tight",Str ":"] +,Para [Str "Asterisks",Space,Str "tight:"] ,BulletList [[Plain [Str "asterisk",Space,Str "1"]] ,[Plain [Str "asterisk",Space,Str "2"]] ,[Plain [Str "asterisk",Space,Str "3"]]] -,Para [Str "With",Space,Str "line",Space,Str "breaks",Str ":"] +,Para [Str "With",Space,Str "line",Space,Str "breaks:"] ,BulletList [[Plain [Str "asterisk",Space,Str "1",LineBreak,Str "newline"]] ,[Plain [Str "asterisk",Space,Str "2"]]] ,Header 2 ("ordered",[],[]) [Str "Ordered"] -,Para [Str "Tight",Str ":"] +,Para [Str "Tight:"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "First"]] ,[Plain [Str "Second"]] @@ -52,42 +52,42 @@ Pandoc (Meta {unMeta = fromList []}) ,BulletList [[Plain [Str "ui",Space,Str "1"] ,BulletList - [[Plain [Str "ui",Space,Str "1",Str ".",Str "1"] + [[Plain [Str "ui",Space,Str "1.1"] ,OrderedList (1,DefaultStyle,DefaultDelim) - [[Plain [Str "oi",Space,Str "1",Str ".",Str "1",Str ".",Str "1"]] - ,[Plain [Str "oi",Space,Str "1",Str ".",Str "1",Str ".",Str "2"]]]] - ,[Plain [Str "ui",Space,Str "1",Str ".",Str "2"]]]] + [[Plain [Str "oi",Space,Str "1.1.1"]] + ,[Plain [Str "oi",Space,Str "1.1.2"]]]] + ,[Plain [Str "ui",Space,Str "1.2"]]]] ,[Plain [Str "ui",Space,Str "2"] ,OrderedList (1,DefaultStyle,DefaultDelim) - [[Plain [Str "oi",Space,Str "2",Str ".",Str "1"] + [[Plain [Str "oi",Space,Str "2.1"] ,BulletList - [[Plain [Str "ui",Space,Str "2",Str ".",Str "1",Str ".",Str "1"]] - ,[Plain [Str "ui",Space,Str "2",Str ".",Str "1",Str ".",Str "2"]]]]]]] + [[Plain [Str "ui",Space,Str "2.1.1"]] + ,[Plain [Str "ui",Space,Str "2.1.2"]]]]]]] ,Header 2 ("definition-list",[],[]) [Str "Definition",Space,Str "List"] ,DefinitionList [([Str "coffee"], [[Plain [Str "Hot",Space,Str "and",Space,Str "black"]]]) ,([Str "tea"], - [[Plain [Str "Also",Space,Str "hot",Str ",",Space,Str "but",Space,Str "a",Space,Str "little",Space,Str "less",Space,Str "black"]]]) + [[Plain [Str "Also",Space,Str "hot,",Space,Str "but",Space,Str "a",Space,Str "little",Space,Str "less",Space,Str "black"]]]) ,([Str "milk"], - [[Para [Str "Nourishing",Space,Str "beverage",Space,Str "for",Space,Str "baby",Space,Str "cows",Str "."] - ,Para [Str "Cold",Space,Str "drink",Space,Str "that",Space,Str "goes",Space,Str "great",Space,Str "with",Space,Str "cookies",Str "."]]]) + [[Para [Str "Nourishing",Space,Str "beverage",Space,Str "for",Space,Str "baby",Space,Str "cows."] + ,Para [Str "Cold",Space,Str "drink",Space,Str "that",Space,Str "goes",Space,Str "great",Space,Str "with",Space,Str "cookies."]]]) ,([Str "beer"], [[Plain [Str "fresh",Space,Str "and",Space,Str "bitter"]]])] ,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] -,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str ".",LineBreak,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str ".",LineBreak,Str "Hyphenated-words-are-ok",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "strange_underscore_notation",Str ".",LineBreak,Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."] -,Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]],LineBreak,Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str ".",LineBreak,Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]] -,Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str ".",LineBreak,Str "Subscripts",Str ":",Space,Subscript [Str "here"],Space,Str "H",Subscript [Str "2"],Str "O",Str ",",Space,Str "H",Subscript [Str "23"],Str "O",Str ",",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O",Str "."] -,Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,Str "\8212",Space,Str "automatic",Space,Str "dashes",Str "."] -,Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Str "\8230",Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."] -,Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Quoted DoubleQuote [Str "I",Str "\8217",Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you"],Space,Str "for",Space,Str "example",Str "."] +,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str ".",LineBreak,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str ".",LineBreak,Str "Hyphenated-words-are-ok,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "strange_underscore_notation.",LineBreak,Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."] +,Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]],LineBreak,Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str ".",LineBreak,Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]] +,Para [Str "Superscripts:",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Space,Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str ".",LineBreak,Str "Subscripts:",Space,Subscript [Str "here"],Space,Str "H",Space,Subscript [Str "2"],Str "O,",Space,Str "H",Space,Subscript [Str "23"],Str "O,",Space,Str "H",Space,Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O."] +,Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,Str "\8212",Space,Str "automatic",Space,Str "dashes."] +,Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Str "\8230",Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more."] +,Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Quoted DoubleQuote [Str "I\8217d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you"],Space,Str "for",Space,Str "example."] ,Header 1 ("links",[],[]) [Str "Links"] ,Header 2 ("explicit",[],[]) [Str "Explicit"] ,Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")] ,Para [Link [Str "Email",Space,Str "link"] ("mailto:nobody@nowhere.net","")] ,Para [Str "Automatic",Space,Str "linking",Space,Str "to",Space,Link [Str "http://www.example.com"] ("http://www.example.com",""),Str "."] -,Para [Link [Str "Example"] ("http://www.example.com/",""),Str ":",Space,Str "Example",Space,Str "of",Space,Str "a",Space,Str "link",Space,Str "followed",Space,Str "by",Space,Str "a",Space,Str "colon",Str "."] -,Para [Str "A",Space,Str "link",Link [Str "with",Space,Str "brackets"] ("http://www.example.com",""),Str "and",Space,Str "no",Space,Str "spaces",Str "."] +,Para [Link [Str "Example"] ("http://www.example.com/",""),Str ":",Space,Str "Example",Space,Str "of",Space,Str "a",Space,Str "link",Space,Str "followed",Space,Str "by",Space,Str "a",Space,Str "colon."] +,Para [Str "A",Space,Str "link",Link [Str "with",Space,Str "brackets"] ("http://www.example.com",""),Str "and",Space,Str "no",Space,Str "spaces."] ,Header 1 ("tables",[],[]) [Str "Tables"] ,Para [Str "Textile",Space,Str "allows",Space,Str "tables",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "headers",Space,Str ":"] ,Header 2 ("without-headers",[],[]) [Str "Without",Space,Str "headers"] @@ -121,11 +121,11 @@ Pandoc (Meta {unMeta = fromList []}) ,[Plain [Str "45"]] ,[Plain [Str "f"]]]] ,Header 1 ("images",[],[]) [Str "Images"] -,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax",Str ",",Space,Str "like",Space,LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] +,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax,",Space,Str "like",LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] ,Header 1 ("attributes",[],[]) [Str "Attributes"] -,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers",Str "."] +,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers."] ,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str " all kind"] -,Para [Str "and",Space,Str "paragraph",Space,Str "attributes",Str ",",Space,Str "and",Space,Str "table",Space,Str "attributes",Str "."] +,Para [Str "and",Space,Str "paragraph",Space,Str "attributes,",Space,Str "and",Space,Str "table",Space,Str "attributes."] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [] [[[Plain [Str "name"]] @@ -137,7 +137,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 1 ("entities",[],[]) [Str "Entities"] ,Para [Str "*",LineBreak,Str "&"] ,Header 1 ("raw-html",[],[]) [Str "Raw",Space,Str "HTML"] -,Para [Str "However",Str ",",Space,RawInline (Format "html") "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format "html") "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] +,Para [Str "However,",Space,RawInline (Format "html") "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format "html") "",Space,Str "are",Space,Str "accepted,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] ,RawBlock (Format "html") "" ,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] ,RawBlock (Format "html") "" @@ -145,9 +145,9 @@ Pandoc (Meta {unMeta = fromList []}) ,RawBlock (Format "html") "" ,Para [Str "inlined"] ,RawBlock (Format "html") "" -,Para [Str "as",Space,Str "well",Str "."] +,Para [Str "as",Space,Str "well."] ,BulletList - [[Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "\8217",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"]] + [[Plain [Str "this",Space,Str "",Space,Str "won\8217t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str ""]] ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") ""]]] ,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ,Header 1 ("raw-latex",[],[]) [Str "Raw",Space,Str "LaTeX"] @@ -156,12 +156,11 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "and",Space,Str "for",Space,RawInline (Format "latex") "\\emph{inlines}",Str "."] ,Header 1 ("acronyms-and-marks",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"] ,Para [Str "PBS (Public Broadcasting System)"] -,Para [Str "Hi",Str "\8482"] +,Para [Str "Hi\8482"] ,Para [Str "Hi",Space,Str "\8482"] -,Para [Str "\174",Space,Str "Hi",Str "\174"] -,Para [Str "Hi",Str "\169",Str "2008",Space,Str "\169",Space,Str "2008"] +,Para [Str "\174",Space,Str "Hi\174"] +,Para [Str "Hi\169\&2008",Space,Str "\169",Space,Str "2008"] ,Header 1 ("footnotes",[],[]) [Str "Footnotes"] -,Para [Str "A",Space,Str "note",Str ".",Note [Para [Str "The",Space,Str "note",LineBreak,Str "is",Space,Str "here",Str "!"]],Space,Str "Another",Space,Str "note",Note [Para [Str "Other",Space,Str "note",Str "."]],Str "."] +,Para [Str "A",Space,Str "note.",Note [Para [Str "The",Space,Str "note",LineBreak,Str "is",Space,Str "here!"]],Space,Str "Another",Space,Str "note",Note [Para [Str "Other",Space,Str "note."]],Str "."] ,Header 1 ("comment-blocks",[],[]) [Str "Comment",Space,Str "blocks"] -,Null -,Para [Str "not",Space,Str "a",Space,Str "comment",Str "."]] +,Para [Str "not",Space,Str "a",Space,Str "comment."]] diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile index e31052b6a..73c36b0d1 100644 --- a/tests/textile-reader.textile +++ b/tests/textile-reader.textile @@ -139,8 +139,8 @@ _*This is strong and em.*_ So is *_this_* word and __**that one**__. -This is strikeout and *strong*- -Superscripts: a[^bc^]d a^*hello*^ a[^hello there^]. -Subscripts: ~here~ H[~2~]O, H[~23~]O, H[~many of them~]O. +Superscripts: a[^bc^]d a ^*hello*^ a[^hello there^]. +Subscripts: ~here~ H[ ~2~]O, H[ ~23~]O, H[ ~many of them~]O. Dashes : How cool -- automatic dashes. @@ -187,7 +187,7 @@ h2. With headers h1. Images -Textile inline image syntax, like +Textile inline image syntax, like here !this_is_an_image.png(this is the alt text)! and here !this_is_an_image.png!. -- cgit v1.2.3 From 976f4f2d8fd8b5b984da52dc63d665038c4d023f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 1 Apr 2014 10:15:21 -0700 Subject: Added HTML reader tests for empty strong, emph. --- tests/html-reader.html | 1 + tests/html-reader.native | 1 + 2 files changed, 2 insertions(+) (limited to 'tests') diff --git a/tests/html-reader.html b/tests/html-reader.html index b7e5c0d2f..1e104b00f 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -302,6 +302,7 @@ These should not be escaped: \$ \\ \> \[ \{ Inline Markup This is emphasized, and so is this. This is strong, and so is this. +Empty and . An emphasized link. This is strong and em. So is this word. diff --git a/tests/html-reader.native b/tests/html-reader.native index a6a3ab177..8fbecf34f 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -186,6 +186,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] ,Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str "."] +,Para [Str "Empty",Space,Strong [],Space,Str "and",Space,Emph [],Str "."] ,Para [Str "An",Space,Emph [Link [Str "emphasized",Space,Str "link"] ("/url","")],Str "."] ,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]] ,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."] -- cgit v1.2.3 From 4ee92dce0ce624db2d02c60ae2856a70cfeb6c42 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 1 Apr 2014 10:36:23 -0700 Subject: MediaWiki reader: Fixed bug in certain nested lists. The bug: If a level 2 list was followed by a level 1 list, the first item of the level 1 list would be lost. Closes #1213. --- src/Text/Pandoc/Readers/MediaWiki.hs | 3 ++- tests/mediawiki-reader.native | 4 ++++ tests/mediawiki-reader.wiki | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index f70b44aad..9bbabd44b 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -438,7 +438,8 @@ listItem c = try $ do skipMany spaceChar first <- concat <$> manyTill listChunk newline rest <- many - (try $ string extras *> (concat <$> manyTill listChunk newline)) + (try $ string extras *> lookAhead listStartChar *> + (concat <$> manyTill listChunk newline)) contents <- parseFromString (many1 $ listItem' c) (unlines (first : rest)) case c of diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index 87e4043f7..2e97e9484 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -102,6 +102,10 @@ Pandoc (Meta {unMeta = fromList []}) [[BulletList [[Plain [Str "But",Space,Str "jumping",Space,Str "levels",Space,Str "creates",Space,Str "empty",Space,Str "space."]]]]]]] ,Para [Str "Any",Space,Str "other",Space,Str "start",Space,Str "ends",Space,Str "the",Space,Str "list."] +,BulletList + [[BulletList + [[Plain [Str "two"]]]] + ,[Plain [Str "one"]]] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] ,[Plain [Str "with",Space,Str "a",Space,Str "number",Space,Str "sign",Space,Str "(#)."] diff --git a/tests/mediawiki-reader.wiki b/tests/mediawiki-reader.wiki index 641f98eb9..6a6bc226d 100644 --- a/tests/mediawiki-reader.wiki +++ b/tests/mediawiki-reader.wiki @@ -185,6 +185,9 @@ http://johnmacfarlane.net/pandoc/ *** But jumping levels creates empty space. Any other start ends the list. +** two +* one + # Start each line # with a number sign (#). ## More number signs gives deeper -- cgit v1.2.3 From 25763a8a35075171b00bfb829dacc25176af990a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 4 Apr 2014 21:41:44 -0700 Subject: DocBook reader/writer tests: use "authorgroup". --- tests/docbook-reader.docbook | 18 ++++++++++-------- tests/writer.docbook | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) (limited to 'tests') diff --git a/tests/docbook-reader.docbook b/tests/docbook-reader.docbook index 6173fa50e..c76925917 100644 --- a/tests/docbook-reader.docbook +++ b/tests/docbook-reader.docbook @@ -4,14 +4,16 @@ Pandoc Test Suite - - John - MacFarlane - - - - Anonymous - + + + John + MacFarlane + + + + Anonymous + + July 17, 2006 diff --git a/tests/writer.docbook b/tests/writer.docbook index 1d4da4842..d4b3c7efd 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -4,14 +4,16 @@ Pandoc Test Suite - - John - MacFarlane - - - - Anonymous - + + + John + MacFarlane + + + + Anonymous + + July 17, 2006 -- cgit v1.2.3 From fd98532784e43ad73072f37a31af5ff40fdc1c56 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 5 Apr 2014 09:37:46 +0200 Subject: Org reader: Fix parsing of nested inlines Text such as /*this*/ was not correctly parsed as a strong, emphasised word. This was due to the end-of-word recognition being to strict as it did not accept markup chars as part of a word. The fix involves an additional parser state field, listing the markup chars which might be parsed as part of a word. --- src/Text/Pandoc/Readers/Org.hs | 27 ++++++++++++++++++++------- tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0ae4d231c..ad66caab9 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -535,8 +535,15 @@ enclosedInlines start end = try $ -- FIXME: This is a hack inlinesEnclosedBy :: Char -> OrgParser Inlines -inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) - (atEnd $ char c) +inlinesEnclosedBy c = try $ do + updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } + res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st } + return res + where shift xs + | null xs = [] + | otherwise = tail xs enclosedRaw :: OrgParser a -> OrgParser b @@ -561,11 +568,16 @@ atStart p = do -- | succeeds only if we're at the end of a word atEnd :: OrgParser a -> OrgParser a -atEnd p = try $ p <* lookingAtEndOfWord - where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars +atEnd p = try $ do + p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars -postWordChars :: [Char] -postWordChars = "\t\n\r !\"'),-.:?}" +postWordChars :: OrgParser [Char] +postWordChars = do + st <- getState + return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st) + where safeSecond (_:x2:_) = [x2] + safeSecond _ = [] -- FIXME: These functions are hacks and should be replaced endsOnThisOrNextLine :: Char @@ -580,9 +592,10 @@ endsOnThisLine :: [Char] -> ([Char] -> OrgParser ()) -> OrgParser () endsOnThisLine input c doOnOtherLines = do + postWordChars' <- postWordChars case break (`elem` c:"\n") input of (_,'\n':rest) -> doOnOtherLines rest - (_,_:rest@(n:_)) -> if n `elem` postWordChars + (_,_:rest@(n:_)) -> if n `elem` postWordChars' then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 8c5982302..9091d9c74 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -42,6 +42,10 @@ tests = "*Cider*" =?> para (strong "Cider") + , "Strong Emphasis" =: + "/*strength*/" =?> + para (emph . strong $ "strength") + , "Strikeout" =: "+Kill Bill+" =?> para (strikeout . spcSep $ [ "Kill", "Bill" ]) -- cgit v1.2.3 From 652c781e375f3678a0ec821663240d4958f324de Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 5 Apr 2014 16:10:52 +0200 Subject: Org reader: Support inline images --- src/Text/Pandoc/Readers/Org.hs | 34 ++++++++++++++++++++++++---------- tests/Tests/Readers/Org.hs | 12 ++++++++++-- 2 files changed, 34 insertions(+), 12 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 62088a04d..8b1b4fa23 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,7 +39,7 @@ import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<** import Control.Monad (guard, mzero) import Data.Char (toLower) import Data.Default -import Data.List (foldl') +import Data.List (foldl', isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) @@ -484,20 +484,26 @@ endline = try $ do return B.space link :: OrgParser Inlines -link = explicitLink <|> selfLink "link" +link = explicitOrImageLink <|> selflinkOrImage "link" -explicitLink :: OrgParser Inlines -explicitLink = try $ do +explicitOrImageLink :: OrgParser Inlines +explicitOrImageLink = try $ do char '[' - src <- enclosedRaw (char '[') (char ']') - title <- enclosedInlines (char '[') (char ']') + src <- enclosedRaw (char '[') (char ']') + title <- enclosedRaw (char '[') (char ']') + title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n") char ']' - return $ B.link src "" title + return $ if (isImage src) && (isImage title) + then B.link src "" (B.image title "" "") + else B.link src "" title' + where butLast = reverse . tail . reverse -selfLink :: OrgParser Inlines -selfLink = try $ do +selflinkOrImage :: OrgParser Inlines +selflinkOrImage = try $ do src <- enclosedRaw (string "[[") (string "]]") - return $ B.link src "" (B.str src) + return $ if isImage src + then B.image src "" "" + else B.link src "" (B.str src) emph :: OrgParser Inlines emph = B.emph <$> inlinesEnclosedBy '/' @@ -606,3 +612,11 @@ endsOnThisLine input c doOnOtherLines = do then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero + +isImage filename = + any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && + any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename + where + imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + protocols = [ "file", "http", "https" ] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9091d9c74..1088d6611 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -94,14 +94,22 @@ tests = , (strong ("is" <> space <> "not")) , "emph/" ]) + , "Image" =: + "[[./sunset.jpg]]" =?> + (para $ image "./sunset.jpg" "" "") + , "Explicit link" =: - "[[http://zeitlens.com/][pseudo-random nonsense]]" =?> + "[[http://zeitlens.com/][pseudo-random /nonsense/]]" =?> (para $ link "http://zeitlens.com/" "" - ("pseudo-random" <> space <> "nonsense")) + ("pseudo-random" <> space <> emph "nonsense")) , "Self-link" =: "[[http://zeitlens.com/]]" =?> (para $ link "http://zeitlens.com/" "" "http://zeitlens.com/") + + , "Image link" =: + "[[sunset.png][dusk.svg]]" =?> + (para $ link "sunset.png" "" (image "dusk.svg" "" "")) ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 060a76a38e1f3586bc92787bb2c25c2dc04e380e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 5 Apr 2014 20:41:38 -0700 Subject: Textile reader: Improved treatment of HTML spans (%). Closes #1115. --- src/Text/Pandoc/Readers/Textile.hs | 6 +----- tests/textile-reader.native | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index b42c5e75d..f19d68e64 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -376,7 +376,6 @@ inlineParsers = [ inlineMarkup , endline , code , escapedInline - , htmlSpan , rawHtmlInline , rawLaTeXInline' , note @@ -400,6 +399,7 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout , simpleInline (char '^') B.superscript , simpleInline (char '~') B.subscript + , simpleInline (char '%') id ] -- | Trademark, registered, copyright @@ -476,10 +476,6 @@ str = do updateLastStrPos return $ B.str fullStr --- | Textile allows HTML span infos, we discard them -htmlSpan :: Parser [Char] ParserState Inlines -htmlSpan = try $ B.str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) - -- | Some number of space chars whitespace :: Parser [Char] ParserState Inlines whitespace = many1 spaceChar >> return B.space "whitespace" diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 0c2b13e72..88fc0bb47 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -124,7 +124,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax,",Space,Str "like",LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] ,Header 1 ("attributes",[],[]) [Str "Attributes"] ,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers."] -,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str " all kind"] +,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str "all",Space,Str "kind"] ,Para [Str "and",Space,Str "paragraph",Space,Str "attributes,",Space,Str "and",Space,Str "table",Space,Str "attributes."] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [] -- cgit v1.2.3 From 24f438aa5f230464d510fae034c94644c0e181ca Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 5 Apr 2014 21:02:12 -0700 Subject: Textile reader: Better support for attributes. Instead of being ignored, attributes are now parsed and included in Span inlines. The output will be a bit different from stock textile: e.g. for `*(foo)hi*`, we'll get `hi` instead of `hi`. But at least the data is not lost. --- src/Text/Pandoc/Readers/Textile.hs | 21 ++++++++++++--------- tests/textile-reader.native | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index f19d68e64..81994e6bd 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -604,15 +604,18 @@ ungroupedSimpleInline border construct = try $ do guard $ (stateQuoteContext st /= NoQuote) || (sourceColumn pos == 1) || isWhitespace - body <- surrounded border inlineWithAttribute - lookAhead (notFollowedBy alphaNum) - let result = construct $ mconcat body - return $ if isWhitespace then B.space <> result - else result - where - inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") - >> (withQuoteContext InSingleQuote inline) - + border *> notFollowedBy (oneOf " \t\n\r") + attr <- attributes + body <- trimInlines . mconcat <$> + withQuoteContext InSingleQuote + (manyTill inline (try border <* notFollowedBy alphaNum)) + let result = construct $ + if attr == nullAttr + then body + else B.spanWith attr body + return $ if isWhitespace + then B.space <> result + else result groupedSimpleInline :: Parser [Char] ParserState t -> (Inlines -> Inlines) diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 88fc0bb47..a17bd8de1 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -124,7 +124,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax,",Space,Str "like",LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] ,Header 1 ("attributes",[],[]) [Str "Attributes"] ,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers."] -,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str "all",Space,Str "kind"] +,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Span ("",["foo"],[]) [Str "inline",Space,Str "attributes"]],Space,Str "of",Space,Span ("",[],[("style","color:red")]) [Str "all",Space,Str "kind"]] ,Para [Str "and",Space,Str "paragraph",Space,Str "attributes,",Space,Str "and",Space,Str "table",Space,Str "attributes."] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [] -- cgit v1.2.3 From 480b33b7100048ef3fad51754ae76c21daa8b86f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 6 Apr 2014 14:49:57 +0200 Subject: Org reader: Add support for definition lists --- src/Text/Pandoc/Readers/Org.hs | 17 ++++++++++++++++- tests/Tests/Readers/Org.hs | 26 +++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 6652925aa..20bca3e28 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -383,7 +383,10 @@ restOfLine = mconcat <$> manyTill inline newline -- list :: OrgParser Blocks -list = choice [ bulletList, orderedList ] "list" +list = choice [ definitionList, bulletList, orderedList ] "list" + +definitionList :: OrgParser Blocks +definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart) bulletList :: OrgParser Blocks bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) @@ -407,6 +410,18 @@ orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") +definitionListItem :: OrgParser Int + -> OrgParser (Inlines, [Blocks]) +definitionListItem parseMarkerGetLength = try $ do + markerLength <- parseMarkerGetLength + term <- manyTill (noneOf "\n\r") (try $ string "::") + first <- anyLineNewline + cont <- concat <$> many (listContinuation markerLength) + term' <- parseFromString inline term + contents' <- parseFromString parseBlocks $ first ++ cont + return (term', [contents']) + + -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser Blocks diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 1088d6611..eb9f4d741 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -43,8 +43,8 @@ tests = para (strong "Cider") , "Strong Emphasis" =: - "/*strength*/" =?> - para (emph . strong $ "strength") + "/*strength*/" =?> + para (emph . strong $ "strength") , "Strikeout" =: "+Kill Bill+" =?> @@ -428,7 +428,27 @@ tests = , "Bullet List in Ordered List" =: ("1. GNU\n" ++ " - Freedom\n") =?> - orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + + , "Definition List" =: + unlines [ "- PLL :: phase-locked loop" + , "- TTL ::" + , " transistor-transistor logic" + , "- PSK::phase-shift keying" + , "" + , " a digital modulation scheme" + ] =?> + definitionList [ ("PLL", [ plain $ "phase-locked" <> space <> "loop" ]) + , ("TTL", [ plain $ "transistor-transistor" <> space <> + "logic" ]) + , ("PSK", [ mconcat + [ para $ "phase-shift" <> space <> "keying" + , plain $ spcSep [ "a", "digital" + , "modulation", "scheme" ] + ] + ] + ) + ] ] , testGroup "Tables" -- cgit v1.2.3 From c47bd8404fda0a782719848ef190b56eb0fdb9dc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 7 Apr 2014 11:00:30 +0200 Subject: Org reader: Support inline math (like $E=mc^2$) Closes #1223. --- src/Text/Pandoc/Readers/Org.hs | 22 ++++++++++++++++------ tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 20bca3e28..2bb6ee122 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -459,6 +459,7 @@ inline = choice inlineParsers "inline" , strikeout , underline , code + , math , verbatim , subscript , superscript @@ -530,10 +531,13 @@ underline = B.strong <$> inlinesEnclosedBy '_' code :: OrgParser Inlines code = B.code <$> rawEnclosedBy '=' -verbatim :: OrgParser Inlines +math :: OrgParser Inlines +math = B.math <$> rawEnclosedBy '$' + +verbatim :: OrgParser Inlines verbatim = B.rawInline "" <$> rawEnclosedBy '~' -subscript :: OrgParser Inlines +subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) superscript :: OrgParser Inlines @@ -580,18 +584,24 @@ rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) -- succeeds only if we're not right after a str (ie. in middle of word) atStart :: OrgParser a -> OrgParser a atStart p = do - pos <- getPosition - st <- getState - guard $ orgLastStrPos st /= Just pos + guard =<< not <$> isRightAfterString p -- | succeeds only if we're at the end of a word atEnd :: OrgParser a -> OrgParser a atEnd p = try $ do - p <* lookingAtEndOfWord + p <* lookingAtEndOfWord where lookingAtEndOfWord = eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars +isRightAfterString :: OrgParser Bool +isRightAfterString = do + pos <- getPosition + st <- getState + -- the position `Nothing` isn't after a String, either, hence the double + -- negation + return $ not $ orgLastStrPos st /= Just pos + postWordChars :: OrgParser [Char] postWordChars = do st <- getState diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index eb9f4d741..77b9d9327 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -54,6 +54,10 @@ tests = "=Robot.rock()=" =?> para (code "Robot.rock()") + , "Math" =: + "$E=mc^2$" =?> + para (math "E=mc^2") + , "Verbatim" =: "~word for word~" =?> para (rawInline "" "word for word") -- cgit v1.2.3 From 030020236c85c736892a6f8e0dcefca1681e5ce0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 8 Apr 2014 22:39:25 +0200 Subject: Org reader: Precise rules for the recognition of markup The inline parsers have been rewritten using the org source code as a reference. This fixes a couple of bugs related to erroneous markup recognition. --- src/Text/Pandoc/Readers/Org.hs | 380 ++++++++++++++++++++++++++++------------- tests/Tests/Readers/Org.hs | 25 ++- 2 files changed, 283 insertions(+), 122 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2bb6ee122..392b17bbc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -32,11 +32,12 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos) +import qualified Text.Pandoc.Parsing as P +import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos) import Text.Pandoc.Shared (compactify') import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) -import Control.Monad (guard, mzero) +import Control.Monad (guard, when) import Data.Char (toLower) import Data.Default import Data.List (foldl', isPrefixOf, isSuffixOf) @@ -47,49 +48,100 @@ import Data.Monoid (mconcat, mempty, mappend) readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n") +readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n") type OrgParser = Parser [Char] OrgParserState +parseOrg:: OrgParser Pandoc +parseOrg = do + blocks' <- B.toList <$> parseBlocks + st <- getState + let meta = orgStateMeta st + return $ Pandoc meta $ filter (/= Null) blocks' + +-- +-- Parser State for Org +-- + -- | Org-mode parser state data OrgParserState = OrgParserState - { orgOptions :: ReaderOptions - , orgInlineCharStack :: [Char] - , orgLastStrPos :: Maybe SourcePos - , orgMeta :: Meta + { orgStateOptions :: ReaderOptions + , orgStateEmphasisCharStack :: [Char] + , orgStateEmphasisNewlines :: Maybe Int + , orgStateLastForbiddenCharPos :: Maybe SourcePos + , orgStateLastPreCharPos :: Maybe SourcePos + , orgStateLastStrPos :: Maybe SourcePos + , orgStateMeta :: Meta } deriving (Show) instance HasReaderOptions OrgParserState where - extractReaderOptions = orgOptions + extractReaderOptions = orgStateOptions instance HasMeta OrgParserState where setMeta field val st = - st{ orgMeta = setMeta field val $ orgMeta st } + st{ orgStateMeta = setMeta field val $ orgStateMeta st } deleteMeta field st = - st{ orgMeta = deleteMeta field $ orgMeta st } + st{ orgStateMeta = deleteMeta field $ orgStateMeta st } instance Default OrgParserState where def = defaultOrgParserState defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState - { orgOptions = def - , orgInlineCharStack = [] - , orgLastStrPos = Nothing - , orgMeta = nullMeta + { orgStateOptions = def + , orgStateEmphasisCharStack = [] + , orgStateEmphasisNewlines = Nothing + , orgStateLastForbiddenCharPos = Nothing + , orgStateLastPreCharPos = Nothing + , orgStateLastStrPos = Nothing + , orgStateMeta = nullMeta } updateLastStrPos :: OrgParser () updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ orgLastStrPos = Just p } + updateState $ \s -> s{ orgStateLastStrPos = Just p } +updateLastForbiddenCharPos :: OrgParser () +updateLastForbiddenCharPos = getPosition >>= \p -> + updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} -parseOrg:: OrgParser Pandoc -parseOrg = do - blocks' <- B.toList <$> parseBlocks +updateLastPreCharPos :: OrgParser () +updateLastPreCharPos = getPosition >>= \p -> + updateState $ \s -> s{ orgStateLastPreCharPos = Just p} + +pushToInlineCharStack :: Char -> OrgParser () +pushToInlineCharStack c = updateState $ \st -> + st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) } + +popInlineCharStack :: OrgParser () +popInlineCharStack = updateState $ \st -> + st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st } + +surroundingEmphasisChar :: OrgParser [Char] +surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState + +startEmphasisNewlinesCounting :: Int -> OrgParser () +startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> + s { orgStateEmphasisNewlines = Just maxNewlines } + +decEmphasisNewlinesCount :: OrgParser () +decEmphasisNewlinesCount = updateState $ \s -> + s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } + +newlinesCountWithinLimits :: OrgParser Bool +newlinesCountWithinLimits = do st <- getState - let meta = orgMeta st - return $ Pandoc meta $ filter (/= Null) blocks' + return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True + +resetEmphasisNewlines :: OrgParser () +resetEmphasisNewlines = updateState $ \s -> + s{ orgStateEmphasisNewlines = Nothing } + +newline :: OrgParser Char +newline = + P.newline + <* updateLastPreCharPos + <* updateLastForbiddenCharPos -- -- parsing blocks @@ -218,7 +270,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser Blocks declarationLine = try $ do meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta - updateState $ \st -> st { orgMeta = orgMeta st <> meta' } + updateState $ \st -> st { orgStateMeta = orgStateMeta st <> meta' } return mempty metaValue :: OrgParser MetaValue @@ -449,22 +501,24 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline :: OrgParser Inlines -inline = choice inlineParsers "inline" - where inlineParsers = [ whitespace - , link - , str - , endline - , emph - , strong - , strikeout - , underline - , code - , math - , verbatim - , subscript - , superscript - , symbol - ] +inline = + choice [ whitespace + , link + , str + , endline + , emph + , strong + , strikeout + , underline + , code + , math + , verbatim + , subscript + , superscript + , symbol + ] <* (guard =<< newlinesCountWithinLimits) + "inline" + -- treat these as potentially non-text when parsing inline: specialChars :: [Char] @@ -472,7 +526,10 @@ specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" whitespace :: OrgParser Inlines -whitespace = B.space <$ skipMany1 spaceChar "whitespace" +whitespace = B.space <$ skipMany1 spaceChar + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + "whitespace" str :: OrgParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") @@ -492,6 +549,9 @@ endline = try $ do notFollowedBy' commentLineStart notFollowedBy' bulletListStart notFollowedBy' orderedListStart + decEmphasisNewlinesCount + guard =<< newlinesCountWithinLimits + updateLastPreCharPos return B.space link :: OrgParser Inlines @@ -500,42 +560,54 @@ link = explicitOrImageLink <|> selflinkOrImage "link" explicitOrImageLink :: OrgParser Inlines explicitOrImageLink = try $ do char '[' - src <- enclosedRaw (char '[') (char ']') + src <- linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if (isImage src) && (isImage title) + return $ if (isImageFilename src) && (isImageFilename title) then B.link src "" (B.image title "" "") else B.link src "" title' selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do - src <- enclosedRaw (string "[[") (string "]]") - return $ if isImage src + src <- (char '[') *> linkTarget <* char ']' + return $ if isImageFilename src then B.image src "" "" else B.link src "" (B.str src) +linkTarget :: OrgParser String +linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") + +isImageFilename :: String -> Bool +isImageFilename filename = + any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && + any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename + where + imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + protocols = [ "file", "http", "https" ] + emph :: OrgParser Inlines -emph = B.emph <$> inlinesEnclosedBy '/' +emph = B.emph <$> emphasisBetween '/' strong :: OrgParser Inlines -strong = B.strong <$> inlinesEnclosedBy '*' +strong = B.strong <$> emphasisBetween '*' strikeout :: OrgParser Inlines -strikeout = B.strikeout <$> inlinesEnclosedBy '+' +strikeout = B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. underline :: OrgParser Inlines -underline = B.strong <$> inlinesEnclosedBy '_' +underline = B.strong <$> emphasisBetween '_' code :: OrgParser Inlines -code = B.code <$> rawEnclosedBy '=' - -math :: OrgParser Inlines -math = B.math <$> rawEnclosedBy '$' +code = B.code <$> verbatimBetween '=' verbatim :: OrgParser Inlines -verbatim = B.rawInline "" <$> rawEnclosedBy '~' +verbatim = B.rawInline "" <$> verbatimBetween '~' + +math :: OrgParser Inlines +math = B.math <$> mathStringBetween '$' subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) @@ -550,7 +622,72 @@ maybeGroupedByBraces = try $ ] symbol :: OrgParser Inlines -symbol = B.str . (: "") <$> oneOf specialChars +symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) + where updatePositions c + | c `elem` emphasisPreChars = c <$ updateLastPreCharPos + | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos + | otherwise = return c + +emphasisBetween :: Char + -> OrgParser Inlines +emphasisBetween c = try $ do + startEmphasisNewlinesCounting emphasisAllowedNewlines + res <- enclosedInlines (emphasisStart c) (emphasisEnd c) + isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState + when isTopLevelEmphasis + resetEmphasisNewlines + return res + +verbatimBetween :: Char + -> OrgParser String +verbatimBetween c = try $ + emphasisStart c *> + many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c) + +-- | Parses a raw string delimited by @c@ using Org's math rules +mathStringBetween :: Char + -> OrgParser String +mathStringBetween c = try $ do + mathStart c + body <- many1TillNOrLessNewlines mathAllowedNewlines + (noneOf (c:"\n\r")) + (lookAhead $ mathEnd c) + final <- mathEnd c + return $ body ++ [final] + +-- | Parses the start (opening character) of emphasis +emphasisStart :: Char -> OrgParser Char +emphasisStart c = try $ do + guard =<< afterEmphasisPreChar + guard =<< notAfterString + char c + lookAhead (noneOf emphasisForbiddenBorderChars) + pushToInlineCharStack c + return c + +-- | Parses the closing character of emphasis +emphasisEnd :: Char -> OrgParser Char +emphasisEnd c = try $ do + guard =<< notAfterForbiddenBorderChar + char c + eof <|> lookAhead (surroundingEmphasisChar >>= \x -> + oneOf (x ++ emphasisPostChars)) + *> return () + updateLastStrPos + popInlineCharStack + return c + +mathStart :: Char -> OrgParser Char +mathStart c = try $ do + char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) + +mathEnd :: Char -> OrgParser Char +mathEnd c = try $ do + res <- noneOf (c:mathForbiddenBorderChars) + char c + eof <|> (lookAhead $ oneOf mathPostChars *> pure ()) + return res + enclosedInlines :: OrgParser a -> OrgParser b @@ -558,16 +695,6 @@ enclosedInlines :: OrgParser a enclosedInlines start end = try $ trimInlines . mconcat <$> enclosed start end inline --- FIXME: This is a hack -inlinesEnclosedBy :: Char - -> OrgParser Inlines -inlinesEnclosedBy c = try $ do - updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } - res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) - (atEnd $ char c) - updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st } - return res - enclosedRaw :: OrgParser a -> OrgParser b -> OrgParser String @@ -577,63 +704,76 @@ enclosedRaw start end = try $ spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine -rawEnclosedBy :: Char - -> OrgParser String -rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) - --- succeeds only if we're not right after a str (ie. in middle of word) -atStart :: OrgParser a -> OrgParser a -atStart p = do - guard =<< not <$> isRightAfterString - p - --- | succeeds only if we're at the end of a word -atEnd :: OrgParser a -> OrgParser a -atEnd p = try $ do - p <* lookingAtEndOfWord - where lookingAtEndOfWord = - eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars - -isRightAfterString :: OrgParser Bool -isRightAfterString = do +-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume +-- newlines. +many1TillNOrLessNewlines :: Int + -> OrgParser Char + -> OrgParser a + -> OrgParser String +many1TillNOrLessNewlines n p end = try $ + nMoreLines (Just n) mempty >>= oneOrMore + where + nMoreLines Nothing cs = return cs + nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine + nMoreLines k cs = try $ (final k cs <|> rest k cs) + >>= uncurry nMoreLines + final _ cs = (\x -> (Nothing, cs ++ x)) <$> (try finalLine) + rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline) + finalLine = try $ manyTill p end + minus1 k = k - 1 + oneOrMore cs = guard (not $ null cs) *> return cs + +-- Org allows customization of the way it reads emphasis. We use the defaults +-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` +-- for details). + +-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too) +emphasisPreChars :: [Char] +emphasisPreChars = "\t \"'({" + +-- | Chars allowed at after emphasis +emphasisPostChars :: [Char] +emphasisPostChars = "\t\n !\"'),-.:;?\\}" + +-- | Chars not allowed at the (inner) border of emphasis +emphasisForbiddenBorderChars :: [Char] +emphasisForbiddenBorderChars = "\t\n\r \"'," + +-- | The maximum number of newlines within +emphasisAllowedNewlines :: Int +emphasisAllowedNewlines = 1 + +-- LaTeX-style math: see `org-latex-regexps` for details + +-- | Chars allowed after an inline ($...$) math statement +mathPostChars :: [Char] +mathPostChars = "\t\n \"',-.:;?" + +-- | Chars not allowed at the (inner) border of math +mathForbiddenBorderChars :: [Char] +mathForbiddenBorderChars = "\t\n\r ,;.$" + +-- | Maximum number of newlines in an inline math statement +mathAllowedNewlines :: Int +mathAllowedNewlines = 2 + +-- | Whether we are right behind a char allowed before emphasis +afterEmphasisPreChar :: OrgParser Bool +afterEmphasisPreChar = do pos <- getPosition - st <- getState - -- the position `Nothing` isn't after a String, either, hence the double - -- negation - return $ not $ orgLastStrPos st /= Just pos + lastPrePos <- orgStateLastPreCharPos <$> getState + return $ lastPrePos == Nothing || lastPrePos == Just pos -postWordChars :: OrgParser [Char] -postWordChars = do - st <- getState - return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st) - --- FIXME: These functions are hacks and should be replaced -endsOnThisOrNextLine :: Char - -> OrgParser () -endsOnThisOrNextLine c = do - inp <- getInput - let doOtherwise = \rest -> endsOnThisLine rest c (const mzero) - endsOnThisLine inp c doOtherwise - -endsOnThisLine :: [Char] - -> Char - -> ([Char] -> OrgParser ()) - -> OrgParser () -endsOnThisLine input c doOnOtherLines = do - postWordChars' <- postWordChars - case break (`elem` c:"\n") input of - (_,'\n':rest) -> doOnOtherLines rest - (_,_:[]) -> return () - (_,_:rest@(n:_)) -> if n `elem` postWordChars' - then return () - else endsOnThisLine rest c doOnOtherLines - _ -> mzero - -isImage :: String -> Bool -isImage filename = - any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - any (\x -> (x++":") `isPrefixOf` filename) protocols || - ':' `notElem` filename - where - imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] - protocols = [ "file", "http", "https" ] +-- | Whether we are right after the end of a string +notAfterString :: OrgParser Bool +notAfterString = do + pos <- getPosition + lastStrPos <- orgStateLastStrPos <$> getState + return $ lastStrPos /= Just pos + +-- | Whether the parser is right after a forbidden border char +notAfterForbiddenBorderChar :: OrgParser Bool +notAfterForbiddenBorderChar = do + pos <- getPosition + lastFBCPos <- orgStateLastForbiddenCharPos <$> getState + return $ lastFBCPos /= Just pos diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 77b9d9327..efd8fe977 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -86,16 +86,37 @@ tests = unlines [ "this+that+ +so+on" , "seven*eight* nine*" , "+not+funny+" + , "this == self" ] =?> para (spcSep [ "this+that+", "+so+on" , "seven*eight*", "nine*" , strikeout "not+funny" + , "this" <> space <> "==" <> space <> "self" ]) + , "Adherence to Org's rules for markup borders" =: + "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> + para (spcSep [ emph $ "t/&" <> space <> "a" + , "/" + , "./r/" + , "(" <> (strong "l") <> ")" + , (emph "e") <> "!" + , (emph "b") <> "." + ]) + + , "Inline math must stay within three lines" =: + unlines [ "$a", "b", "c$", "$d", "e", "f", "g$" ] =?> + para ((math "a\nb\nc") <> space <> + spcSep [ "$d", "e", "f", "g$" ]) + , "Markup may not span more than two lines" =: - unlines [ "/this *is", "not*", "emph/" ] =?> + unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?> para (spcSep [ "/this" - , (strong ("is" <> space <> "not")) + , (strong (spcSep + [ "is" + , (strikeout ("totally" <> space <> "nice")) + , "not" + ])) , "emph/" ]) , "Image" =: -- cgit v1.2.3 From 1715d7cee0b9388ac77b8b2a31fcbb00ead80adf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 10 Apr 2014 15:11:03 +0200 Subject: Org reader: Support more inline/display math variants Support all of the following variants as valid ways to define inline or display math inlines: - `\[..\]` (display) - `$$..$$` (display) - `$..$` (inline) - `$..$` (inline) This closes #1223. Again. --- src/Text/Pandoc/Readers/Org.hs | 28 ++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 8 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 392b17bbc..1d0400d96 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -512,6 +512,7 @@ inline = , underline , code , math + , displayMath , verbatim , subscript , superscript @@ -607,7 +608,15 @@ verbatim :: OrgParser Inlines verbatim = B.rawInline "" <$> verbatimBetween '~' math :: OrgParser Inlines -math = B.math <$> mathStringBetween '$' +math = B.math <$> choice [ math1CharBetween '$' + , mathStringBetween '$' + , rawMathBetween "\$" "\$" + ] + +displayMath :: OrgParser Inlines +displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" + , rawMathBetween "$$" "$$" + ] subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) @@ -655,6 +664,21 @@ mathStringBetween c = try $ do final <- mathEnd c return $ body ++ [final] +-- | Parse a single character between @c@ using math rules +math1CharBetween :: Char + -> OrgParser String +math1CharBetween c = try $ do + char c + res <- noneOf $ c:mathForbiddenBorderChars + char c + eof <|> lookAhead (oneOf mathPostChars) *> return () + return [res] + +rawMathBetween :: String + -> String + -> OrgParser String +rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + -- | Parses the start (opening character) of emphasis emphasisStart :: Char -> OrgParser Char emphasisStart c = try $ do @@ -747,7 +771,7 @@ emphasisAllowedNewlines = 1 -- | Chars allowed after an inline ($...$) math statement mathPostChars :: [Char] -mathPostChars = "\t\n \"',-.:;?" +mathPostChars = "\t\n \"'),-.:;?" -- | Chars not allowed at the (inner) border of math mathForbiddenBorderChars :: [Char] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index efd8fe977..9e9482e45 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -54,14 +54,26 @@ tests = "=Robot.rock()=" =?> para (code "Robot.rock()") - , "Math" =: - "$E=mc^2$" =?> - para (math "E=mc^2") - , "Verbatim" =: "~word for word~" =?> para (rawInline "" "word for word") + , "Math $..$" =: + "$E=mc^2$" =?> + para (math "E=mc^2") + + , "Math $$..$$" =: + "$$E=mc^2$$" =?> + para (displayMath "E=mc^2") + + , "Math \\[..\\]" =: + "\\[E=ℎν\\]" =?> + para (displayMath "E=ℎν") + + , "Math \$..\$" =: + "\$σ_x σ_p ≥ \\frac{ℏ}{2}\$" =?> + para (math "σ_x σ_p ≥ \\frac{ℏ}{2}") + , "Symbol" =: "A * symbol" =?> para (str "A" <> space <> str "*" <> space <> "symbol") @@ -86,14 +98,19 @@ tests = unlines [ "this+that+ +so+on" , "seven*eight* nine*" , "+not+funny+" - , "this == self" ] =?> para (spcSep [ "this+that+", "+so+on" , "seven*eight*", "nine*" , strikeout "not+funny" - , "this" <> space <> "==" <> space <> "self" ]) + , "No empty markup" =: + -- FIXME: __ is erroneously parsed as subscript "_" + -- "// ** __ ++ == ~~ $$" =?> + -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) + "// ** ++ == ~~ $$" =?> + para (spcSep [ "//", "**", "++", "==", "~~", "$$" ]) + , "Adherence to Org's rules for markup borders" =: "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> para (spcSep [ emph $ "t/&" <> space <> "a" @@ -109,6 +126,13 @@ tests = para ((math "a\nb\nc") <> space <> spcSep [ "$d", "e", "f", "g$" ]) + , "Single-character math" =: + "$a$ $b$! $c$?" =?> + para (spcSep [ math "a" + , "$b$!" + , (math "c") <> "?" + ]) + , "Markup may not span more than two lines" =: unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?> para (spcSep [ "/this" -- cgit v1.2.3 From 6f19be7d40f583ee4e10fa2b0f20bd4f1fa80c43 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 11 Apr 2014 11:05:42 +0200 Subject: Org reader: Fix parsing of sub-/superscript expressions This fixes the org-reader's handling of sub- and superscript expressions. Simple expressions (like `2^+10`), expressions in parentheses (`a_(n+1)`) and nested sexp (like `a_(nested()parens)`) are now read correctly. --- src/Text/Pandoc/Readers/Org.hs | 47 +++++++++++++++++++++++++++++++++--------- tests/Tests/Readers/Org.hs | 39 +++++++++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 19 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 29611e8cc..ceac69367 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -622,17 +622,11 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" , rawMathBetween "$$" "$$" ] -subscript :: OrgParser Inlines -subscript = B.subscript <$> try (char '_' *> maybeGroupedByBraces) +subscript :: OrgParser Inlines +subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser Inlines -superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces) - -maybeGroupedByBraces :: OrgParser Inlines -maybeGroupedByBraces = try $ - choice [ try $ enclosedInlines (char '{') (char '}') - , B.str . (:"") <$> anyChar - ] +superscript :: OrgParser Inlines +superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) symbol :: OrgParser Inlines symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) @@ -805,3 +799,36 @@ notAfterForbiddenBorderChar = do pos <- getPosition lastFBCPos <- orgStateLastForbiddenCharPos <$> getState return $ lastFBCPos /= Just pos + +-- | Read a sub- or superscript expression +subOrSuperExpr :: OrgParser Inlines +subOrSuperExpr = try $ do + choice [ balancedSexp '{' '}' + , balancedSexp '(' ')' >>= return . enclosing ('(', ')') + , simpleSubOrSuperString + ] >>= parseFromString (mconcat <$> many inline) + +-- | Read a balanced sexp +balancedSexp :: Char + -> Char + -> OrgParser String +balancedSexp l r = try $ do + char l + res <- concat <$> many ( many1 (noneOf ([l, r] ++ "\n\r")) + <|> try (string [l, r]) + <|> enclosing (l, r) <$> balancedSexp l r + ) + char r + return res + +simpleSubOrSuperString :: OrgParser String +simpleSubOrSuperString = try $ + choice [ string "*" + , mappend <$> option [] ((:[]) <$> oneOf "+-") + <*> many1 alphaNum + ] + +enclosing :: (a, a) + -> [a] + -> [a] +enclosing (left, right) s = left : s ++ [right] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9e9482e45..49130f0ab 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -78,15 +78,15 @@ tests = "A * symbol" =?> para (str "A" <> space <> str "*" <> space <> "symbol") - , "Superscript single char" =: - "2^n" =?> - para (str "2" <> superscript "n") + , "Superscript simple expression" =: + "2^-λ" =?> + para (str "2" <> superscript "-λ") , "Superscript multi char" =: "2^{n-1}" =?> para (str "2" <> superscript "n-1") - , "Subscript single char" =: + , "Subscript simple expression" =: "a_n" =?> para (str "a" <> subscript "n") @@ -105,11 +105,8 @@ tests = ]) , "No empty markup" =: - -- FIXME: __ is erroneously parsed as subscript "_" - -- "// ** __ ++ == ~~ $$" =?> - -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) - "// ** ++ == ~~ $$" =?> - para (spcSep [ "//", "**", "++", "==", "~~", "$$" ]) + "// ** __ ++ == ~~ $$" =?> + para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) , "Adherence to Org's rules for markup borders" =: "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> @@ -143,6 +140,30 @@ tests = ])) , "emph/" ]) + , "Sub- and superscript expressions" =: + unlines [ "a_(a(b)(c)d)" + , "e^(f(g)h)" + , "i_(jk)l)" + , "m^()n" + , "o_{p{q{}r}}" + , "s^{t{u}v}" + , "w_{xy}z}" + , "1^{}2" + , "3_{{}}" + , "4^(a(*b(c*)d))" + ] =?> + para (spcSep [ "a" <> subscript "(a(b)(c)d)" + , "e" <> superscript "(f(g)h)" + , "i" <> (subscript "(jk)") <> "l)" + , "m" <> (superscript "()") <> "n" + , "o" <> subscript "p{q{}r}" + , "s" <> superscript "t{u}v" + , "w" <> (subscript "xy") <> "z}" + , "1" <> (superscript "") <> "2" + , "3" <> subscript "{}" + , "4" <> superscript ("(a(" <> strong "b(c" <> ")d))") + ]) + , "Image" =: "[[./sunset.jpg]]" =?> (para $ image "./sunset.jpg" "" "") -- cgit v1.2.3 From ae4280fba528efe68c5955cb3ca0779e6910f43b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 12 Apr 2014 00:17:46 +0200 Subject: Org reader: Add support for figures Support for figures (images with name and caption) is added. --- src/Text/Pandoc/Readers/Org.hs | 57 ++++++++++++++++++++++++++++++++++++------ tests/Tests/Readers/Org.hs | 21 ++++++++++++++++ 2 files changed, 70 insertions(+), 8 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceac69367..8f0ce61e0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateL import Text.Pandoc.Shared (compactify') import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Arrow ((***)) import Control.Monad (guard, when) import Data.Char (toLower) import Data.Default @@ -158,6 +159,7 @@ block = choice [ mempty <$ blanklines , orgBlock , example , drawer + , figure , specialLine , header , hline @@ -252,6 +254,43 @@ drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline +-- +-- Figures +-- + +-- Figures (Image on a line by itself, preceded by name and/or caption) +figure :: OrgParser Blocks +figure = try $ do + (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty) + <$> nameAndOrCaption + src <- skipSpaces *> selfTarget <* skipSpaces <* newline + guard (isImageFilename src) + return . B.para $ B.image src tit cap + where withFigPrefix cs = if "fig:" `isPrefixOf` cs + then cs + else "fig:" ++ cs + +nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines) +nameAndOrCaption = try $ nameFirst <|> captionFirst + where + nameFirst = try $ do + n <- name + c <- optionMaybe caption + return (Just n, c) + captionFirst = try $ do + c <- caption + n <- optionMaybe name + return (n, Just c) + +caption :: OrgParser Inlines +caption = try $ annotation "CAPTION" *> inlinesTillNewline + +name :: OrgParser String +name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline + +annotation :: String -> OrgParser String +annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':' + -- Comments, Options and Metadata specialLine :: OrgParser Blocks specialLine = try $ metaLine <|> commentLine @@ -277,7 +316,7 @@ declarationLine = try $ do return mempty metaValue :: OrgParser MetaValue -metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine +metaValue = MetaInlines . B.toList <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -288,7 +327,7 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") header :: OrgParser Blocks header = try $ B.header <$> headerStart - <*> (trimInlines <$> restOfLine) + <*> inlinesTillNewline headerStart :: OrgParser Int headerStart = try $ @@ -424,13 +463,10 @@ setAligns aligns t = t{ orgTableAlignments = aligns } -- Paragraphs or Plain text paraOrPlain :: OrgParser Blocks paraOrPlain = try $ - trimInlines . mconcat - <$> many1 inline - <**> option B.plain - (try $ newline *> pure B.para) + parseInlines <**> option B.plain (try $ newline *> pure B.para) -restOfLine :: OrgParser Inlines -restOfLine = mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser Inlines +inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline -- @@ -523,6 +559,8 @@ inline = ] <* (guard =<< newlinesCountWithinLimits) "inline" +parseInlines :: OrgParser Inlines +parseInlines = trimInlines . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] @@ -580,6 +618,9 @@ selflinkOrImage = try $ do then B.image src "" "" else B.link src "" (B.str src) +selfTarget :: OrgParser String +selfTarget = try $ char '[' *> linkTarget <* char ']' + linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 49130f0ab..99dadc0ac 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -377,6 +377,27 @@ tests = code' = "main = putStrLn greeting\n" ++ " where greeting = \"moin\"\n" in codeBlockWith attr' code' + + , "Figure" =: + unlines [ "#+caption: A very courageous man." + , "#+name: goodguy" + , "[[edward.jpg]]" + ] =?> + para (image "edward.jpg" "fig:goodguy" "A very courageous man.") + + , "Unnamed figure" =: + unlines [ "#+caption: A great whistleblower." + , "[[snowden.png]]" + ] =?> + para (image "snowden.png" "" "A great whistleblower.") + + , "Figure with `fig:` prefix in name" =: + unlines [ "#+caption: Used as a metapher in evolutionary biology." + , "#+name: fig:redqueen" + , "[[the-red-queen.jpg]]" + ] =?> + para (image "the-red-queen.jpg" "fig:redqueen" + "Used as a metapher in evolutionary biology.") ] , testGroup "Lists" $ -- cgit v1.2.3 From 36066699c31ca50566ca2492a5c112ecbe690a63 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 12 Apr 2014 00:22:49 +0200 Subject: Org writer: Fix output for linebreaks Hard linebreaks in Org mode are represented by the string "\\" as the last characters in a line. Adds this feature to the Org-mode writer. --- src/Text/Pandoc/Writers/Org.hs | 2 +- tests/writer.org | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index d318c5f6a..58a5729e7 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -271,7 +271,7 @@ inlineToOrg (Math t str) = do else "$$" <> text str <> "$$" inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str inlineToOrg (RawInline _ _) = return empty -inlineToOrg (LineBreak) = return cr -- there's no line break in Org +inlineToOrg (LineBreak) = return (text "\\\\" <> cr) inlineToOrg Space = return space inlineToOrg (Link txt (src, _)) = do case txt of diff --git a/tests/writer.org b/tests/writer.org index 85016f352..524d49305 100644 --- a/tests/writer.org +++ b/tests/writer.org @@ -42,7 +42,7 @@ item. Here's one with a bullet. * criminey. -There should be a hard line break +There should be a hard line break\\ here. -------------- -- cgit v1.2.3 From 82d4160bdcc149df020d1f95f4a7d893a9ecb42a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 12 Apr 2014 11:07:38 +0200 Subject: Org reader: Read linebreaks Linebreaks are marked by the string `\\` at the end of a line. --- src/Text/Pandoc/Readers/Org.hs | 4 ++++ tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceac69367..19dd03c6b 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -506,6 +506,7 @@ anyLineNewline = (++ "\n") <$> anyLine inline :: OrgParser Inlines inline = choice [ whitespace + , linebreak , link , str , endline @@ -535,6 +536,9 @@ whitespace = B.space <$ skipMany1 spaceChar <* updateLastForbiddenCharPos "whitespace" +linebreak :: OrgParser Inlines +linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline + str :: OrgParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 49130f0ab..567cc4c41 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -94,6 +94,10 @@ tests = "a_{n+1}" =?> para (str "a" <> subscript "n+1") + , "Linebreak" =: + "line \\\\ \nbreak" =?> + para ("line" <> linebreak <> "break") + , "Markup-chars not occuring on word break are symbols" =: unlines [ "this+that+ +so+on" , "seven*eight* nine*" -- cgit v1.2.3 From 464d7a8e49b81a0c883b8c4a9bfd5d26cdfb38ba Mon Sep 17 00:00:00 2001 From: Neil Mayhew Date: Thu, 6 Mar 2014 07:31:49 -0700 Subject: Improve handling of hard line breaks in Docbook writer * Use a for the entire paragraph, not just for the newline character * Don't let LineBreaks inside footnotes influence the enclosing paragraph --- src/Text/Pandoc/Writers/Docbook.hs | 19 ++++++++++++++++--- tests/writer.docbook | 6 ++---- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 02d875be3..2d6ce3020 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -32,12 +32,14 @@ module Text.Pandoc.Writers.Docbook ( writeDocbook) where import Text.Pandoc.Definition import Text.Pandoc.XML import Text.Pandoc.Shared +import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Readers.TeXMath import Data.List ( isPrefixOf, intercalate, isSuffixOf ) import Data.Char ( toLower ) +import Data.Monoid ( Any(..) ) import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty import qualified Text.Pandoc.Builder as B @@ -165,8 +167,9 @@ blockToDocbook opts (Para [Image txt (src,'f':'i':'g':':':_)]) = (inTagsIndented "imageobject" (selfClosingTag "imagedata" [("fileref",src)])) $$ inTagsSimple "textobject" (inTagsSimple "phrase" alt)) -blockToDocbook opts (Para lst) = - inTagsIndented "para" $ inlinesToDocbook opts lst +blockToDocbook opts (Para lst) + | hasLineBreaks lst = flush $ nowrap $ inTagsSimple "literallayout" $ inlinesToDocbook opts lst + | otherwise = inTagsIndented "para" $ inlinesToDocbook opts lst blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" $ blocksToDocbook opts blocks blockToDocbook _ (CodeBlock (_,classes,_) str) = @@ -226,6 +229,16 @@ blockToDocbook opts (Table caption aligns widths headers rows) = (inTags True "tgroup" [("cols", show (length headers))] $ coltags $$ head' $$ body') +hasLineBreaks :: [Inline] -> Bool +hasLineBreaks = getAny . query isLineBreak . walk removeNote + where + removeNote :: Inline -> Inline + removeNote (Note _) = Str "" + removeNote x = x + isLineBreak :: Inline -> Any + isLineBreak LineBreak = Any True + isLineBreak _ = Any False + alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of AlignLeft -> "left" @@ -293,7 +306,7 @@ inlineToDocbook opts (Math t str) fixNS = everywhere (mkT fixNS') inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x | otherwise = empty -inlineToDocbook _ LineBreak = flush $ inTagsSimple "literallayout" (text "\n") +inlineToDocbook _ LineBreak = text "\n" inlineToDocbook _ Space = space inlineToDocbook opts (Link txt (src, _)) = if isPrefixOf "mailto:" src diff --git a/tests/writer.docbook b/tests/writer.docbook index d4b3c7efd..9cb9a5359 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -68,10 +68,8 @@ Here’s one with a bullet. * criminey. - - There should be a hard line break -here. - +There should be a hard line break +here. Block Quotes -- cgit v1.2.3 From f22ce4ff283ac48a50d999ee5fad56ac1e4d1dce Mon Sep 17 00:00:00 2001 From: Neil Mayhew Date: Fri, 11 Apr 2014 23:19:51 -0600 Subject: Add some unit tests for Writers.Docbook These are primarily aimed at testing the new treatment of line breaks, but hopefully other tests can be added more easily now as features and changes are implemented in the writer. Adapted from Tests.Writers.HTML.tests. --- tests/Tests/Writers/Docbook.hs | 52 ++++++++++++++++++++++++++++++++++++++++++ tests/test-pandoc.hs | 2 ++ 2 files changed, 54 insertions(+) create mode 100644 tests/Tests/Writers/Docbook.hs (limited to 'tests') diff --git a/tests/Tests/Writers/Docbook.hs b/tests/Tests/Writers/Docbook.hs new file mode 100644 index 000000000..e815b4f5a --- /dev/null +++ b/tests/Tests/Writers/Docbook.hs @@ -0,0 +1,52 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Writers.Docbook (tests) where + +import Test.Framework +import Text.Pandoc.Builder +import Text.Pandoc +import Tests.Helpers +import Tests.Arbitrary() + +docbook :: (ToString a, ToPandoc a) => a -> String +docbook = writeDocbook def{ writerWrapText = False } . toPandoc + +{- + "my test" =: X =?> Y + +is shorthand for + + test docbook "my test" $ X =?> Y + +which is in turn shorthand for + + test docbook "my test" (X,Y) +-} + +infix 4 =: +(=:) :: (ToString a, ToPandoc a) + => String -> (a, String) -> Test +(=:) = test docbook + +lineblock :: Blocks +lineblock = para ("some text" <> linebreak <> + "and more lines" <> linebreak <> + "and again") +lineblock_out :: String +lineblock_out = "some text\n" ++ + "and more lines\n" ++ + "and again" + +tests :: [Test] +tests = [ testGroup "line blocks" + [ "none" =: para "This is a test" + =?> "\n This is a test\n" + , "basic" =: lineblock + =?> lineblock_out + , "blockquote" =: blockQuote lineblock + =?> ("\n" ++ lineblock_out ++ "\n") + , "footnote" =: para ("This is a test" <> note lineblock <> " of footnotes") + =?> ("\n This is a test\n" ++ + lineblock_out ++ + "\n of footnotes\n") + ] + ] diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 74f8e5044..a7d4fca01 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -12,6 +12,7 @@ import qualified Tests.Readers.RST import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX import qualified Tests.Writers.HTML +import qualified Tests.Writers.Docbook import qualified Tests.Writers.Native import qualified Tests.Writers.Markdown import qualified Tests.Shared @@ -27,6 +28,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "ConTeXt" Tests.Writers.ConTeXt.tests , testGroup "LaTeX" Tests.Writers.LaTeX.tests , testGroup "HTML" Tests.Writers.HTML.tests + , testGroup "Docbook" Tests.Writers.Docbook.tests , testGroup "Markdown" Tests.Writers.Markdown.tests ] , testGroup "Readers" -- cgit v1.2.3 From 0672f58a445c289c58e42cffbbf32a273e801e39 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 6 Apr 2014 18:43:49 +0200 Subject: Org reader: Support footnotes --- src/Text/Pandoc/Readers/Org.hs | 66 ++++++++++++++++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 4 +++ 2 files changed, 68 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index bdff4869c..17f8a1c9e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -44,7 +44,7 @@ import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow ((***)) import Control.Monad (foldM, guard, liftM, liftM2, when) -import Control.Monad.Reader (Reader, runReader) +import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default import Data.List (isPrefixOf, isSuffixOf) @@ -59,7 +59,7 @@ readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") type OrgParser = Parser [Char] OrgParserState -parseOrg:: OrgParser Pandoc +parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks st <- getState @@ -70,6 +70,9 @@ parseOrg = do -- Parser State for Org -- +type OrgNoteRecord = (String, F Blocks) +type OrgNoteTable = [OrgNoteRecord] + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions @@ -80,6 +83,7 @@ data OrgParserState = OrgParserState , orgStateLastStrPos :: Maybe SourcePos , orgStateMeta :: Meta , orgStateMeta' :: F Meta + , orgStateNotes' :: OrgNoteTable } instance HasReaderOptions OrgParserState where @@ -104,6 +108,7 @@ defaultOrgParserState = OrgParserState , orgStateLastStrPos = Nothing , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta + , orgStateNotes' = [] } updateLastStrPos :: OrgParser () @@ -146,6 +151,11 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } +addToNotesTable :: OrgNoteRecord -> OrgParser () +addToNotesTable note = do + oldnotes <- orgStateNotes' <$> getState + updateState $ \s -> s{ orgStateNotes' = note:oldnotes } + -- -- Adaptions and specializations of parsing utilities @@ -157,6 +167,12 @@ newtype F a = F { unF :: Reader OrgParserState a runF :: F a -> OrgParserState -> a runF = runReader . unF +askF :: F OrgParserState +askF = F ask + +asksF :: (OrgParserState -> a) -> F a +asksF f = F $ asks f + instance Monoid a => Monoid (F a) where mempty = return mempty mappend = liftM2 mappend @@ -191,6 +207,7 @@ block = choice [ mempty <$ blanklines , return <$> hline , list , table + , noteBlock , paraOrPlain ] "block" @@ -500,6 +517,16 @@ setAligns :: [Alignment] -> F OrgTable setAligns aligns t = return $ t{ orgTableAlignments = aligns } +-- +-- Footnote defintions +-- +noteBlock :: OrgParser (F Blocks) +noteBlock = try $ do + ref <- noteMarker + content <- skipSpaces *> paraOrPlain + addToNotesTable (ref, content) + return mempty + -- Paragraphs or Plain text paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ @@ -587,6 +614,7 @@ inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak + , footnote , linkOrImage , str , endline @@ -632,6 +660,7 @@ endline = try $ do notFollowedBy blankline notFollowedBy' exampleLine notFollowedBy' hline + notFollowedBy' noteMarker notFollowedBy' tableStart notFollowedBy' drawerStart notFollowedBy' headerStart @@ -644,6 +673,39 @@ endline = try $ do updateLastPreCharPos return . return $ B.space +footnote :: OrgParser (F Inlines) +footnote = try $ inlineNote <|> referencedNote + +inlineNote :: OrgParser (F Inlines) +inlineNote = try $ do + string "[fn:" + ref <- many alphaNum + char ':' + note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') + when (not $ null ref) $ + addToNotesTable ("fn:" ++ ref, note) + return $ B.note <$> note + +referencedNote :: OrgParser (F Inlines) +referencedNote = try $ do + ref <- noteMarker + return $ do + notes <- asksF orgStateNotes' + case lookup ref notes of + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just contents -> do + st <- askF + let contents' = runF contents st{ orgStateNotes' = [] } + return $ B.note contents' + +noteMarker :: OrgParser String +noteMarker = try $ do + char '[' + choice [ many1Till digit (char ']') + , (++) <$> string "fn:" + <*> many1Till (noneOf "\n\r\t ") (char ']') + ] + linkOrImage :: OrgParser (F Inlines) linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index f39bd7992..7f9c5f1d5 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -98,6 +98,10 @@ tests = "line \\\\ \nbreak" =?> para ("line" <> linebreak <> "break") + , "Inline note" =: + "[fn::Schreib mir eine E-Mail]" =?> + para (note $ para "Schreib mir eine E-Mail") + , "Markup-chars not occuring on word break are symbols" =: unlines [ "this+that+ +so+on" , "seven*eight* nine*" -- cgit v1.2.3 From 6d6724cf2c6ae6bcc0df312c476e45644c972a85 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 17 Apr 2014 18:09:27 +0200 Subject: Org reader: Support more types of '#+BEGIN_' blocks Support for standard org-blocks is improved. The parser now handles "HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a sensible fashion. --- src/Text/Pandoc/Readers/Org.hs | 41 +++++++++++++++--- tests/Tests/Readers/Org.hs | 97 ++++++++++++++++++++++++++++++++---------- 2 files changed, 108 insertions(+), 30 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 17f8a1c9e..88e81f5fc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker + , parseFromString , updateLastStrPos ) import Text.Pandoc.Shared (compactify') @@ -47,7 +48,7 @@ import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default -import Data.List (isPrefixOf, isSuffixOf) +import Data.List (intersperse, isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (Monoid, mconcat, mempty, mappend) @@ -156,6 +157,16 @@ addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState updateState $ \s -> s{ orgStateNotes' = note:oldnotes } +-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts +-- of the state saved and restored. +parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString parser str' = do + oldLastPreCharPos <- orgStateLastPreCharPos <$> getState + updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } + result <- P.parseFromString parser str' + updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } + return result + -- -- Adaptions and specializations of parsing utilities @@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines orgBlock :: OrgParser (F Blocks) orgBlock = try $ do (indent, blockType, args) <- blockHeader - blockStr <- rawBlockContent indent blockType + content <- rawBlockContent indent blockType + contentBlocks <- parseFromString parseBlocks (content ++ "\n") let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr - _ -> fmap (B.divWith ("", [blockType], [])) - <$> parseFromString parseBlocks blockStr + "src" -> returnF $ B.codeBlockWith ("", classArgs, []) content + "html" -> returnF $ B.rawBlock "html" content + "latex" -> returnF $ B.rawBlock "latex" content + "ascii" -> returnF $ B.rawBlock "ascii" content + "example" -> returnF $ exampleCode content + "quote" -> return $ B.blockQuote <$> contentBlocks + "verse" -> parseVerse content + _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + where + returnF :: a -> OrgParser (F a) + returnF = return . return + + parseVerse :: String -> OrgParser (F Blocks) + parseVerse cs = + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines cs) blockHeader :: OrgParser (Int, String, [String]) blockHeader = (,,) <$> blockIndent @@ -270,8 +295,10 @@ commaEscaped cs = cs example :: OrgParser (F Blocks) example = try $ do - body <- unlines <$> many1 exampleLine - return . return $ B.codeBlockWith ("", ["example"], []) body + return . return . exampleCode =<< unlines <$> many1 exampleLine + +exampleCode :: String -> Blocks +exampleCode = B.codeBlockWith ("", ["example"], []) exampleLine :: OrgParser String exampleLine = try $ string ": " *> anyLine diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 7f9c5f1d5..7d5bfe650 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -363,29 +363,6 @@ tests = , "#+END_COMMENT"] =?> (mempty::Blocks) - , "Source Block in Text" =: - unlines [ "Low German greeting" - , " #+BEGIN_SRC haskell" - , " main = putStrLn greeting" - , " where greeting = \"moin\"" - , " #+END_SRC" ] =?> - let attr' = ("", ["haskell"], []) - code' = "main = putStrLn greeting\n" ++ - " where greeting = \"moin\"\n" - in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] - , codeBlockWith attr' code' - ] - - , "Source Block" =: - unlines [ " #+BEGIN_SRC haskell" - , " main = putStrLn greeting" - , " where greeting = \"moin\"" - , " #+END_SRC" ] =?> - let attr' = ("", ["haskell"], []) - code' = "main = putStrLn greeting\n" ++ - " where greeting = \"moin\"\n" - in codeBlockWith attr' code' - , "Figure" =: unlines [ "#+caption: A very courageous man." , "#+name: goodguy" @@ -661,4 +638,78 @@ tests = , [ plain "2" , plain mempty , plain mempty ] ] ] + + , testGroup "Blocks" + [ "Source block" =: + unlines [ " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + + , "Source block between paragraphs" =: + unlines [ "Low German greeting" + , " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"Moin!\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"Moin!\"\n" + in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] + , codeBlockWith attr' code' + ] + + , "Example block" =: + unlines [ "#+begin_example" + , "A chosen representation of" + , "a rule." + , "#+eND_exAMPle" + ] =?> + codeBlockWith ("", ["example"], []) + "A chosen representation of\na rule.\n" + + , "HTML block" =: + unlines [ "#+BEGIN_HTML" + , "HTML5 is pretty nice." + , "#+END_HTML" + ] =?> + rawBlock "html" "HTML5 is pretty nice.\n" + + , "Quote block" =: + unlines [ "#+BEGIN_QUOTE" + , "/Niemand/ hat die Absicht, eine Mauer zu errichten!" + , "#+END_QUOTE" + ] =?> + blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht," + , "eine", "Mauer", "zu", "errichten!" + ])) + + , "Verse block" =: + unlines [ "The first lines of Goethe's /Faust/:" + , "#+begin_verse" + , "Habe nun, ach! Philosophie," + , "Juristerei und Medizin," + , "Und leider auch Theologie!" + , "Durchaus studiert, mit heißem Bemühn." + , "#+end_verse" + ] =?> + mconcat + [ para $ spcSep [ "The", "first", "lines", "of" + , "Goethe's", emph "Faust" <> ":"] + , para $ mconcat + [ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ] + , linebreak + , spcSep [ "Juristerei", "und", "Medizin," ] + , linebreak + , spcSep [ "Und", "leider", "auch", "Theologie!" ] + , linebreak + , spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ] + ] + ] + + ] ] -- cgit v1.2.3 From f19d7233d8d3e47912b760fc62a253e5baf8275a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 08:33:25 +0200 Subject: Org reader: Fix parsing of loose lists Loose lists (i.e. lists with blankline separated items), were parsed as multiple lists, each containing a single item. This patch fixes this issue. --- src/Text/Pandoc/Readers/Org.hs | 11 +++++++---- tests/Tests/Readers/Org.hs | 21 ++++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 88e81f5fc..1fa8d4d5e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -605,9 +605,10 @@ definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") first <- anyLineNewline + blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term - contents' <- parseFromString parseBlocks $ first ++ cont + contents' <- parseFromString parseBlocks $ first ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' @@ -617,16 +618,18 @@ listItem :: OrgParser Int listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline + blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) - parseFromString parseBlocks $ firstLine ++ rest + parseFromString parseBlocks $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Int -> OrgParser String listContinuation markerLength = try $ - mappend <$> many blankline - <*> (concat <$> many1 listLine) + notFollowedBy' blankline + *> (mappend <$> (concat <$> many1 listLine) + <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline anyLineNewline :: OrgParser String diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 7d5bfe650..572fc501f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -518,13 +518,24 @@ tests = , ("TTL", [ plain $ "transistor-transistor" <> space <> "logic" ]) , ("PSK", [ mconcat - [ para $ "phase-shift" <> space <> "keying" - , plain $ spcSep [ "a", "digital" - , "modulation", "scheme" ] + [ para $ "phase-shift" <> space <> "keying" + , para $ spcSep [ "a", "digital" + , "modulation", "scheme" ] ] - ] - ) + ]) ] + + , "Loose bullet list" =: + unlines [ "- apple" + , "" + , "- orange" + , "" + , "- peach" + ] =?> + bulletList [ para "apple" + , para "orange" + , para "peach" + ] ] , testGroup "Tables" -- cgit v1.2.3 From 09441b65a83f372410394a88af7808f494c3aa57 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 10:15:58 +0200 Subject: Org reader: Add support for plain LaTeX fragments This adds support for LaTeX fragments like the following: ``` \begin{equation} \int fg \mathrm{d}x \end{equation} ``` --- src/Text/Pandoc/Readers/Org.hs | 41 ++++++++++++++++++++++++++++++++++++++++- tests/Tests/Readers/Org.hs | 20 +++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 1fa8d4d5e..66cfe720e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -218,6 +218,7 @@ block = choice [ mempty <$ blanklines , return <$> hline , list , table + , latexFragment , noteBlock , paraOrPlain ] "block" @@ -544,6 +545,41 @@ setAligns :: [Alignment] -> F OrgTable setAligns aligns t = return $ t{ orgTableAlignments = aligns } + +-- +-- LaTeX fragments +-- +latexFragment :: OrgParser (F Blocks) +latexFragment = try $ do + envName <- latexEnvStart + content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) + return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + where + c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" + , c + , "\\end{", e, "}\n" + ] + +latexEnvStart :: OrgParser String +latexEnvStart = try $ do + skipSpaces *> string "\\begin{" + *> latexEnvName + <* string "}" + <* blankline + +latexEnd :: String -> OrgParser () +latexEnd envName = try $ + () <$ skipSpaces + <* string ("\\end{" ++ envName ++ "}") + <* blankline + +-- | Parses a LaTeX environment name. +latexEnvName :: OrgParser String +latexEnvName = try $ do + mappend <$> many1 alphaNum + <*> option "" (string "*") + + -- -- Footnote defintions -- @@ -683,7 +719,9 @@ str :: OrgParser (F Inlines) str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos --- an endline character that can be treated as a space, not a structural break +-- | An endline character that can be treated as a space, not a structural +-- break. This should reflect the values of the Emacs variable +-- @org-element-pagaraph-separate@. endline :: OrgParser (F Inlines) endline = try $ do newline @@ -695,6 +733,7 @@ endline = try $ do notFollowedBy' drawerStart notFollowedBy' headerStart notFollowedBy' metaLineStart + notFollowedBy' latexEnvStart notFollowedBy' commentLineStart notFollowedBy' bulletListStart notFollowedBy' orderedListStart diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 572fc501f..1ac2c1fd8 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -650,7 +650,7 @@ tests = ] ] - , testGroup "Blocks" + , testGroup "Blocks and fragments" [ "Source block" =: unlines [ " #+BEGIN_SRC haskell" , " main = putStrLn greeting" @@ -722,5 +722,23 @@ tests = ] ] + , "LaTeX fragment" =: + unlines [ "\\begin{equation}" + , "X_i = \\begin{cases}" + , " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) = \\alpha(i)\\\\" + , " C_{\\alpha(i)} & \\text{otherwise}" + , " \\end{cases}" + , "\\end{equation}" + ] =?> + rawBlock "latex" + (unlines [ "\\begin{equation}" + , "X_i = \\begin{cases}" + , " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) =" ++ + " \\alpha(i)\\\\" + , " C_{\\alpha(i)} & \\text{otherwise}" + , " \\end{cases}" + , "\\end{equation}" + ]) + ] ] -- cgit v1.2.3 From 6ded3d41d94c1e90d1d30a1f99ddad62e62d9ce6 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 20:47:50 +0200 Subject: Org reader: Apply captions to code blocks and tables The `Table` blocktype already takes the caption as an argument, while code blocks are wrapped in a `Div` block together with a labelling `Span`. --- src/Text/Pandoc/Readers/Org.hs | 134 +++++++++++++++++++++++++++++------------ tests/Tests/Readers/Org.hs | 31 +++++++++- 2 files changed, 124 insertions(+), 41 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 66cfe720e..025158060 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,13 +43,13 @@ import Text.Pandoc.Shared (compactify') import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) -import Control.Arrow ((***)) import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) -import Data.Maybe (listToMaybe, fromMaybe) +import qualified Data.Map as M +import Data.Maybe (listToMaybe, fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) -- | Parse org-mode string and return a Pandoc document. @@ -74,9 +74,12 @@ parseOrg = do type OrgNoteRecord = (String, F Blocks) type OrgNoteTable = [OrgNoteRecord] +type OrgBlockAttributes = M.Map String String + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions + , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int , orgStateLastForbiddenCharPos :: Maybe SourcePos @@ -102,6 +105,7 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def + , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateLastForbiddenCharPos = Nothing @@ -112,6 +116,19 @@ defaultOrgParserState = OrgParserState , orgStateNotes' = [] } +addBlockAttribute :: String -> String -> OrgParser () +addBlockAttribute key val = updateState $ \s -> + let attrs = orgStateBlockAttributes s + in s{ orgStateBlockAttributes = M.insert key val attrs } + +lookupBlockAttribute :: String -> OrgParser (Maybe String) +lookupBlockAttribute key = + M.lookup key . orgStateBlockAttributes <$> getState + +resetBlockAttributes :: OrgParser () +resetBlockAttributes = updateState $ \s -> + s{ orgStateBlockAttributes = orgStateBlockAttributes def } + updateLastStrPos :: OrgParser () updateLastStrPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastStrPos = Just p } @@ -125,19 +142,19 @@ updateLastPreCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastPreCharPos = Just p} pushToInlineCharStack :: Char -> OrgParser () -pushToInlineCharStack c = updateState $ \st -> - st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st } +pushToInlineCharStack c = updateState $ \s -> + s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } popInlineCharStack :: OrgParser () -popInlineCharStack = updateState $ \st -> - st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st } +popInlineCharStack = updateState $ \s -> + s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } surroundingEmphasisChar :: OrgParser [Char] surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState startEmphasisNewlinesCounting :: Int -> OrgParser () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> - s { orgStateEmphasisNewlines = Just maxNewlines } + s{ orgStateEmphasisNewlines = Just maxNewlines } decEmphasisNewlinesCount :: OrgParser () decEmphasisNewlinesCount = updateState $ \s -> @@ -209,20 +226,50 @@ parseBlocks = mconcat <$> manyTill block eof block :: OrgParser (F Blocks) block = choice [ mempty <$ blanklines - , orgBlock + , optionalAttributes $ choice + [ orgBlock + , figure + , table + ] , example , drawer - , figure , specialLine , header , return <$> hline , list - , table , latexFragment , noteBlock , paraOrPlain ] "block" +optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) +optionalAttributes parser = try $ + resetBlockAttributes *> parseBlockAttributes *> parser + +parseBlockAttributes :: OrgParser () +parseBlockAttributes = do + attrs <- many attribute + () <$ mapM (uncurry parseAndAddAttribute) attrs + where + attribute :: OrgParser (String, String) + attribute = try $ do + key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':') + val <- skipSpaces *> anyLine + return (map toLower key, val) + +parseAndAddAttribute :: String -> String -> OrgParser () +parseAndAddAttribute key value = do + let key' = map toLower key + () <$ addBlockAttribute key' value + +lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines)) +lookupInlinesAttr attr = try $ do + val <- lookupBlockAttribute attr + maybe (return Nothing) + (fmap Just . parseFromString parseInlines) + val + + -- -- Org Blocks (#+BEGIN_... / #+END_...) -- @@ -235,13 +282,13 @@ orgBlock = try $ do let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> returnF $ B.codeBlockWith ("", classArgs, []) content "html" -> returnF $ B.rawBlock "html" content "latex" -> returnF $ B.rawBlock "latex" content "ascii" -> returnF $ B.rawBlock "ascii" content "example" -> returnF $ exampleCode content "quote" -> return $ B.blockQuote <$> contentBlocks "verse" -> parseVerse content + "src" -> codeBlockWithAttr classArgs content _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks where returnF :: a -> OrgParser (F a) @@ -260,6 +307,18 @@ blockHeader = (,,) <$> blockIndent blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline +codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks) +codeBlockWithAttr classArgs content = do + identifier <- fromMaybe "" <$> lookupBlockAttribute "name" + caption <- lookupInlinesAttr "caption" + let codeBlck = B.codeBlockWith (identifier, classArgs, []) content + return $ maybe (pure codeBlck) (labelDiv codeBlck) caption + where + labelDiv blk value = + B.divWith nullAttr <$> (mappend <$> labelledBlock value + <*> pure blk) + labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + rawBlockContent :: Int -> String -> OrgParser String rawBlockContent indent blockType = unlines . map commaEscaped <$> manyTill indentedLine blockEnder @@ -333,38 +392,26 @@ drawerEnd = try $ -- Figures (Image on a line by itself, preceded by name and/or caption) figure :: OrgParser (F Blocks) figure = try $ do - (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty) - <$> nameAndOrCaption + (cap, nam) <- nameAndCaption src <- skipSpaces *> selfTarget <* skipSpaces <* newline guard (isImageFilename src) return $ do cap' <- cap - return $ B.para $ B.image src tit cap' - where withFigPrefix cs = if "fig:" `isPrefixOf` cs - then cs - else "fig:" ++ cs - -nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines)) -nameAndOrCaption = try $ nameFirst <|> captionFirst + return $ B.para $ B.image src nam cap' where - nameFirst = try $ do - n <- name - c <- optionMaybe caption - return (Just n, c) - captionFirst = try $ do - c <- caption - n <- optionMaybe name - return (n, Just c) - -caption :: OrgParser (F Inlines) -caption = try $ annotation "CAPTION" *> inlinesTillNewline - -name :: OrgParser String -name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline - -annotation :: String -> OrgParser String -annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':' + nameAndCaption = + do + maybeCap <- lookupInlinesAttr "caption" + maybeNam <- lookupBlockAttribute "name" + guard $ isJust maybeCap || isJust maybeNam + return ( fromMaybe mempty maybeCap + , maybe mempty withFigPrefix maybeNam ) + withFigPrefix cs = + if "fig:" `isPrefixOf` cs + then cs + else "fig:" ++ cs +-- -- Comments, Options and Metadata specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine @@ -400,6 +447,10 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +-- +-- Headers +-- + -- | Headers header :: OrgParser (F Blocks) header = try $ do @@ -411,6 +462,7 @@ headerStart :: OrgParser Int headerStart = try $ (length <$> many1 (char '*')) <* many1 (char ' ') + -- Don't use (or need) the reader wrapper here, we want hline to be -- @show@able. Otherwise we can't use it with @notFollowedBy'@. @@ -444,12 +496,14 @@ table = try $ do lookAhead tableStart do rows <- tableRows - return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows + cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption" + return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows orgToPandocTable :: OrgTable + -> Inlines -> Blocks -orgToPandocTable (OrgTable _ aligns heads lns) = - B.table "" (zip aligns $ repeat 0) heads lns +orgToPandocTable (OrgTable _ aligns heads lns) caption = + B.table caption (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char tableStart = try $ skipSpaces *> char '|' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 1ac2c1fd8..80a95d36b 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -8,7 +8,7 @@ import Tests.Arbitrary() import Text.Pandoc.Builder import Text.Pandoc import Data.List (intersperse) -import Data.Monoid (mempty, mconcat) +import Data.Monoid (mempty, mappend, mconcat) org :: String -> Pandoc org = readOrg def @@ -648,6 +648,18 @@ tests = [ [ plain "1" , plain "One" , plain "foo" ] , [ plain "2" , plain mempty , plain mempty ] ] + + , "Table with caption" =: + unlines [ "#+CAPTION: Hitchhiker's Multiplication Table" + , "| x | 6 |" + , "| 9 | 42 |" + ] =?> + table "Hitchhiker's Multiplication Table" + [(AlignDefault, 0), (AlignDefault, 0)] + [] + [ [ plain "x", plain "6" ] + , [ plain "9", plain "42" ] + ] ] , testGroup "Blocks and fragments" @@ -740,5 +752,22 @@ tests = , "\\end{equation}" ]) + , "Code block with caption" =: + unlines [ "#+CAPTION: Functor laws in Haskell" + , "#+NAME: functor-laws" + , "#+BEGIN_SRC haskell" + , "fmap id = id" + , "fmap (p . q) = (fmap p) . (fmap q)" + , "#+END_SRC" + ] =?> + divWith + nullAttr + (mappend + (plain $ spanWith ("", ["label"], []) + (spcSep [ "Functor", "laws", "in", "Haskell" ])) + (codeBlockWith ("functor-laws", ["haskell"], []) + (unlines [ "fmap id = id" + , "fmap (p . q) = (fmap p) . (fmap q)" + ]))) ] ] -- cgit v1.2.3 From 8e91d362a392d1ee90a497f39cfcf90fee8d8da0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 13:15:47 +0200 Subject: Org reader: Fix parsing of footnotes Footnotes can consist of multiple blocks and end only at a header or at the beginning of another footnote. This fixes the previous behavior, which restricted notes to a single paragraph. --- src/Text/Pandoc/Readers/Org.hs | 8 ++++++-- tests/Tests/Readers/Org.hs | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 66211b20e..0bc0a2668 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -639,10 +639,14 @@ latexEnvName = try $ do -- noteBlock :: OrgParser (F Blocks) noteBlock = try $ do - ref <- noteMarker - content <- skipSpaces *> paraOrPlain + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillHeaderOrNote addToNotesTable (ref, content) return mempty + where + blocksTillHeaderOrNote = + many1Till block (eof <|> () <$ lookAhead noteMarker + <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text paraOrPlain :: OrgParser (F Blocks) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 80a95d36b..4cc405c0f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -383,6 +383,48 @@ tests = ] =?> para (image "the-red-queen.jpg" "fig:redqueen" "Used as a metapher in evolutionary biology.") + + , "Footnote" =: + unlines [ "A footnote[1]" + , "" + , "[1] First paragraph" + , "" + , "second paragraph" + ] =?> + para (mconcat + [ "A", space, "footnote" + , note $ mconcat [ para ("First" <> space <> "paragraph") + , para ("second" <> space <> "paragraph") + ] + ]) + + , "Two footnotes" =: + unlines [ "Footnotes[fn:1][fn:2]" + , "" + , "[fn:1] First note." + , "" + , "[fn:2] Second note." + ] =?> + para (mconcat + [ "Footnotes" + , note $ para ("First" <> space <> "note.") + , note $ para ("Second" <> space <> "note.") + ]) + + , "Footnote followed by header" =: + unlines [ "Another note[fn:yay]" + , "" + , "[fn:yay] This is great!" + , "" + , "** Headline" + ] =?> + mconcat + [ para (mconcat + [ "Another", space, "note" + , note $ para ("This" <> space <> "is" <> space <> "great!") + ]) + , header 2 "Headline" + ] ] , testGroup "Lists" $ -- cgit v1.2.3 From 8276449520ba85c78f0b4e919fbc9bcf893a74f0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 15:05:03 +0200 Subject: Org reader: Allow for compact definition lists Use `Text.Pandoc.Shared.compactify'DL` to allow for compact definition lists. --- src/Text/Pandoc/Readers/Org.hs | 4 ++-- tests/Tests/Readers/Org.hs | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0bc0a2668..c71cc24be 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString , updateLastStrPos ) -import Text.Pandoc.Shared (compactify') +import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) @@ -665,7 +665,7 @@ list :: OrgParser (F Blocks) list = choice [ definitionList, bulletList, orderedList ] "list" definitionList :: OrgParser (F Blocks) -definitionList = fmap B.definitionList . sequence +definitionList = fmap B.definitionList . fmap compactify'DL . sequence <$> many1 (definitionListItem bulletListStart) bulletList :: OrgParser (F Blocks) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4cc405c0f..f62b73ce4 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -567,6 +567,18 @@ tests = ]) ] + , "Compact definition list" =: + unlines [ "- ATP :: adenosine 5' triphosphate" + , "- DNA :: deoxyribonucleic acid" + , "- PCR :: polymerase chain reaction" + , "" + ] =?> + definitionList + [ ("ATP", [ plain $ spcSep [ "adenosine", "5'", "triphosphate" ] ]) + , ("DNA", [ plain $ spcSep [ "deoxyribonucleic", "acid" ] ]) + , ("PCR", [ plain $ spcSep [ "polymerase", "chain", "reaction" ] ]) + ] + , "Loose bullet list" =: unlines [ "- apple" , "" -- cgit v1.2.3 From c128daba9dee096ce0e78b81a381f43337b74285 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 17:42:01 +0200 Subject: Org reader: Recognize plain and angle links This adds support for plain links (like http://zeitlens.com) and angle links (like ). --- src/Text/Pandoc/Readers/Org.hs | 34 +++++++++++++++++++++++++--------- tests/Tests/Readers/Org.hs | 14 ++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c71cc24be..7a50b1db9 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -45,7 +45,7 @@ import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) -import Data.Char (toLower) +import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M @@ -209,6 +209,9 @@ instance Monoid a => Monoid (F a) where trimInlinesF :: F Inlines -> F Inlines trimInlinesF = liftM trimInlines +returnF :: a -> OrgParser (F a) +returnF = return . return + -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char @@ -291,9 +294,6 @@ orgBlock = try $ do "src" -> codeBlockWithAttr classArgs content _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks where - returnF :: a -> OrgParser (F a) - returnF = return . return - parseVerse :: String -> OrgParser (F Blocks) parseVerse cs = fmap B.para . mconcat . intersperse (pure B.linebreak) @@ -834,7 +834,11 @@ noteMarker = try $ do ] linkOrImage :: OrgParser (F Inlines) -linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" +linkOrImage = explicitOrImageLink + <|> selflinkOrImage + <|> angleLink + <|> plainLink + "link or image" explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do @@ -851,15 +855,27 @@ explicitOrImageLink = try $ do selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return . return $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + returnF $ if isImageFilename src + then B.image src "" "" + else B.link src "" (B.str src) + +plainLink :: OrgParser (F Inlines) +plainLink = try $ do + (orig, src) <- uri + returnF $ B.link src "" (B.str orig) + +angleLink :: OrgParser (F Inlines) +angleLink = try $ do + char '<' + link <- plainLink + char '>' + return link selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String -linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") +linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") isImageFilename :: String -> Bool isImageFilename filename = diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index f62b73ce4..ed774f527 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -188,6 +188,20 @@ tests = , "Image link" =: "[[sunset.png][dusk.svg]]" =?> (para $ link "sunset.png" "" (image "dusk.svg" "" "")) + + , "Plain link" =: + "Posts on http://zeitlens.com/ can be funny at times." =?> + (para $ spcSep [ "Posts", "on" + , link "http://zeitlens.com/" "" "http://zeitlens.com/" + , "can", "be", "funny", "at", "times." + ]) + + , "Angle link" =: + "Look at for fnords." =?> + (para $ spcSep [ "Look", "at" + , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" + , "for", "fnords." + ]) ] , testGroup "Meta Information" $ -- cgit v1.2.3 From e6333a9d7cfe8ec36acae59bd19a654d868f4b8d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 24 Apr 2014 16:44:49 -0700 Subject: Markdown writer: Use proper escapes to avoid unwanted lists. Previously we used 0-width spaces, an ugly hack. Closes #980. --- src/Text/Pandoc/Writers/Markdown.hs | 27 ++++++++++++++++----------- tests/writer.opml | 10 +++++----- 2 files changed, 21 insertions(+), 16 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index e8f976da1..95082add6 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -316,20 +316,25 @@ blockToMarkdown opts (Div attrs ils) = do contents <> blankline <> "" <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines - return $ contents <> cr + -- escape if para starts with ordered list marker + st <- get + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let rendered = render colwidth contents + let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs + | otherwise = x : escapeDelimiter xs + escapeDelimiter [] = [] + let contents' = if isEnabled Ext_all_symbols_escapable opts && + not (stPlain st) && beginsWithOrderedListMarker rendered + then text $ escapeDelimiter rendered + else contents + return $ contents' <> cr -- title beginning with fig: indicates figure blockToMarkdown opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = blockToMarkdown opts (Para [Image alt (src,tit)]) -blockToMarkdown opts (Para inlines) = do - contents <- inlineListToMarkdown opts inlines - -- escape if para starts with ordered list marker - st <- get - let esc = if isEnabled Ext_all_symbols_escapable opts && - not (stPlain st) && - beginsWithOrderedListMarker (render Nothing contents) - then text "\x200B" -- zero-width space, a hack - else empty - return $ esc <> contents <> blankline +blockToMarkdown opts (Para inlines) = + (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown opts (RawBlock f str) | f == "html" = do st <- get diff --git a/tests/writer.opml b/tests/writer.opml index 228cad247..34d161fb8 100644 --- a/tests/writer.opml +++ b/tests/writer.opml @@ -18,7 +18,7 @@ - + @@ -55,18 +55,18 @@ - + - + - + - + -- cgit v1.2.3 From 2eec20d92fd0f498da5b66ac03cf6f8159392323 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 25 Apr 2014 15:29:28 +0200 Subject: Org reader: Enable internal links Internal links in Org are possible by using an anchor-name as the target of a link: [[some-anchor][This]] is an internal link. It links <> here. --- src/Text/Pandoc/Readers/Org.hs | 50 ++++++++++++++++++++++++++++++++---------- tests/Tests/Readers/Org.hs | 25 +++++++++++++++++++++ 2 files changed, 63 insertions(+), 12 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7f1893936..0e52bff90 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -79,6 +79,7 @@ type OrgBlockAttributes = M.Map String String -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions + , orgStateAnchorIds :: [String] , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int @@ -105,6 +106,7 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def + , orgStateAnchorIds = [] , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing @@ -116,6 +118,10 @@ defaultOrgParserState = OrgParserState , orgStateNotes' = [] } +recordAnchorId :: String -> OrgParser () +recordAnchorId i = updateState $ \s -> + s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> let attrs = orgStateBlockAttributes s @@ -848,17 +854,14 @@ explicitOrImageLink = try $ do title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ B.link src "" <$> - if isImageFilename src && isImageFilename title - then return $ B.image title mempty mempty - else title' + return $ if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - returnF $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + return $ linkToInlinesF src (B.str src) plainLink :: OrgParser (F Inlines) plainLink = try $ do @@ -878,6 +881,26 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF s@('#':_) = pure . B.link s "" +linkToInlinesF s + | isImageFilename s = const . pure $ B.image s "" "" + | isUri s = pure . B.link s "" + | isRelativeUrl s = pure . B.link s "" +linkToInlinesF s = \title -> do + anchorB <- (s `elem`) <$> asksF orgStateAnchorIds + if anchorB + then pure $ B.link ('#':s) "" title + else pure $ B.emph title + +isRelativeUrl :: String -> Bool +isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s) + +isUri :: String -> Bool +isUri s = let (scheme, path) = break (== ':') s + in all (\c -> isAlphaNum c || c `elem` ".-") scheme + && not (null path) + isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && @@ -894,12 +917,15 @@ isImageFilename filename = -- an anchor. anchor :: OrgParser (F Inlines) -anchor = try $ pure <$> (B.spanWith <$> attributes <*> pure mempty) +anchor = try $ do + anchorId <- parseAnchor + recordAnchorId anchorId + returnF $ B.spanWith (solidify anchorId, [], []) mempty where - name = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") - <* string ">>" - attributes = name >>= \n -> return (solidify n, [], []) + parseAnchor = string "<<" + *> many1 (noneOf "\t\n\r<>\"' ") + <* string ">>" + <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors -- the org function @org-export-solidify-link-text@. diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ed774f527..96747d148 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -202,6 +202,11 @@ tests = , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" , "for", "fnords." ]) + + , "Anchor" =: + "<> Link here later." =?> + (para $ spanWith ("anchor", [], []) mempty <> + "Link" <> space <> "here" <> space <> "later.") ] , testGroup "Meta Information" $ @@ -279,6 +284,26 @@ tests = , ":END:" ] =?> para (":FOO:" <> space <> ":END:") + + , "Anchor reference" =: + unlines [ "<> Target." + , "" + , "[[link-here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (link "#link-here" "" ("See" <> space <> "here!"))) + + , "Search links are read as emph" =: + "[[Wally][Where's Wally?]]" =?> + (para (emph $ "Where's" <> space <> "Wally?")) + + , "Link to nonexistent anchor" =: + unlines [ "<> Target." + , "" + , "[[link$here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (emph ("See" <> space <> "here!"))) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3 From f81fc6c3bdfcb439892f92b69c02b77342277cc8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 26 Apr 2014 12:32:22 -0700 Subject: Update latex reader test for change in latex reader. We use 'author' for metadata, not 'authors'. --- tests/latex-reader.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/latex-reader.native b/tests/latex-reader.native index fcc3153cf..abc4b05a7 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [RawBlock (Format "latex") "\\maketitle" ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule -- cgit v1.2.3 From b6ae5d5e99a1575fcba3512b3a82ff77773a80ec Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 29 Apr 2014 18:14:39 -0700 Subject: ADded SmallCaps to Arbitrary instance. --- tests/Tests/Arbitrary.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/Tests/Arbitrary.hs b/tests/Tests/Arbitrary.hs index 31c0cb46a..82346d65f 100644 --- a/tests/Tests/Arbitrary.hs +++ b/tests/Tests/Arbitrary.hs @@ -49,7 +49,7 @@ arbInline n = frequency $ [ (60, liftM Str realString) , (10, liftM Strikeout $ arbInlines (n-1)) , (10, liftM Superscript $ arbInlines (n-1)) , (10, liftM Subscript $ arbInlines (n-1)) --- , (10, liftM SmallCaps $ arbInlines (n-1)) + , (10, liftM SmallCaps $ arbInlines (n-1)) , (10, do x1 <- arbitrary x2 <- arbInlines (n-1) return $ Quoted x1 x2) -- cgit v1.2.3 From f8a34f1694c9fda3319fcface155eed97608ded1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 29 Apr 2014 18:32:42 -0700 Subject: Added Cite to Arbitrary instance. See #1269. This reveals some test failures. --- tests/Tests/Arbitrary.hs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tests') diff --git a/tests/Tests/Arbitrary.hs b/tests/Tests/Arbitrary.hs index 82346d65f..3675d97bf 100644 --- a/tests/Tests/Arbitrary.hs +++ b/tests/Tests/Arbitrary.hs @@ -64,6 +64,7 @@ arbInline n = frequency $ [ (60, liftM Str realString) x3 <- realString x2 <- liftM escapeURI realString return $ Image x1 (x2,x3)) + , (2, liftM2 Cite arbitrary (arbInlines 1)) , (2, liftM Note $ resize 3 $ listOf1 $ arbBlock (n-1)) ] @@ -111,7 +112,6 @@ instance Arbitrary Pandoc where arbitrary = resize 8 $ liftM normalize $ liftM2 Pandoc arbitrary arbitrary -{- instance Arbitrary CitationMode where arbitrary = do x <- choose (0 :: Int, 2) @@ -123,14 +123,13 @@ instance Arbitrary CitationMode where instance Arbitrary Citation where arbitrary - = do x1 <- liftM (filter (`notElem` ",;]@ \t\n")) arbitrary - x2 <- arbitrary - x3 <- arbitrary + = do x1 <- listOf $ elements $ ['a'..'z'] ++ ['0'..'9'] ++ ['_'] + x2 <- arbInlines 1 + x3 <- arbInlines 1 x4 <- arbitrary x5 <- arbitrary x6 <- arbitrary return (Citation x1 x2 x3 x4 x5 x6) --} instance Arbitrary MathType where arbitrary -- cgit v1.2.3 From 093229dc35506bff88f4edc6f2ae5316d621f8ff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 08:58:10 -0700 Subject: ConTeXt writer: Improved autolinks. Closes #1270. --- src/Text/Pandoc/Writers/ConTeXt.hs | 16 +++++----------- tests/writer.context | 10 ++++------ 2 files changed, 9 insertions(+), 17 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 3095cf508..cec420dcf 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Walk (query) import Text.Printf ( printf ) -import Data.List ( intercalate, isPrefixOf ) +import Data.List ( intercalate ) import Control.Monad.State import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate' ) @@ -283,14 +283,6 @@ inlineToConTeXt (RawInline "tex" str) = return $ text str inlineToConTeXt (RawInline _ _) = return empty inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr inlineToConTeXt Space = return space --- autolink -inlineToConTeXt (Link [Str str] (src, tit)) - | if "mailto:" `isPrefixOf` src - then src == escapeURI ("mailto:" ++ str) - else src == escapeURI str = - inlineToConTeXt (Link - [RawInline "context" "\\hyphenatedurl{", Str str, RawInline "context" "}"] - (src, tit)) -- Handle HTML-like internal document references to sections inlineToConTeXt (Link txt (('#' : ref), _)) = do opts <- gets stOptions @@ -305,6 +297,7 @@ inlineToConTeXt (Link txt (('#' : ref), _)) = do <> brackets (text ref) inlineToConTeXt (Link txt (src, _)) = do + let isAutolink = txt == [Str src] st <- get let next = stNextRef st put $ st {stNextRef = next + 1} @@ -313,8 +306,9 @@ inlineToConTeXt (Link txt (src, _)) = do return $ "\\useURL" <> brackets (text ref) <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) - <> brackets empty - <> brackets label + <> (if isAutolink + then empty + else brackets empty <> brackets label) <> "\\from" <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do diff --git a/tests/writer.context b/tests/writer.context index 0b031fd76..0c5024d89 100644 --- a/tests/writer.context +++ b/tests/writer.context @@ -813,24 +813,22 @@ braces]\from[url26]. \subsection[autolinks]{Autolinks} -With an ampersand: -\useURL[url27][http://example.com/?foo=1&bar=2][][\hyphenatedurl{http://example.com/?foo=1&bar=2}]\from[url27] +With an ampersand: \useURL[url27][http://example.com/?foo=1&bar=2]\from[url27] \startitemize[packed] \item In a list? \item - \useURL[url28][http://example.com/][][\hyphenatedurl{http://example.com/}]\from[url28] + \useURL[url28][http://example.com/]\from[url28] \item It should. \stopitemize An e-mail address: -\useURL[url29][mailto:nobody@nowhere.net][][\hyphenatedurl{nobody@nowhere.net}]\from[url29] +\useURL[url29][mailto:nobody@nowhere.net][][nobody@nowhere.net]\from[url29] \startblockquote -Blockquoted: -\useURL[url30][http://example.com/][][\hyphenatedurl{http://example.com/}]\from[url30] +Blockquoted: \useURL[url30][http://example.com/]\from[url30] \stopblockquote Auto-links should not occur here: \type{} -- cgit v1.2.3 From eaba340b9381264f3706c780182711a8713b3def Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 11:28:18 -0700 Subject: RST reader: Some fixes to last change, and use "author" not "authors". (in metadata) --- src/Text/Pandoc/Readers/RST.hs | 13 +++++++------ tests/rst-reader.native | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4dc1fa006..54b6fa34a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -113,15 +113,16 @@ titleTransform (bs, meta) = metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta metaFromDefList ds meta = adjustAuthors $ foldr f meta ds where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) - adjustAuthors (Meta metamap) = Meta $ M.adjust toPlain "author" + adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" - $ M.adjust splitAuthors "authors" + $ M.mapKeys (\k -> if k == "authors" then "author" else k) $ metamap toPlain (MetaBlocks [Para xs]) = MetaInlines xs toPlain x = x - splitAuthors (MetaBlocks [Para xs]) = MetaList $ map MetaInlines - $ splitAuthors' xs + splitAuthors (MetaBlocks [Para xs]) + = MetaList $ map MetaInlines + $ splitAuthors' xs splitAuthors x = x splitAuthors' = map normalizeSpaces . splitOnSemi . concatMap factorSemi @@ -196,7 +197,7 @@ rawFieldListItem minIndent = try $ do rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" - return (name, trimr raw) + return (name, raw) fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) fieldListItem minIndent = try $ do @@ -577,7 +578,7 @@ directive' = do "code" -> codeblock (lookup "number-lines" fields) (trim top) body "code-block" -> codeblock (lookup "number-lines" fields) (trim top) body "aafig" -> do - let attribs = ("", ["aafig"], fields) + let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body diff --git a/tests/rst-reader.native b/tests/rst-reader.native index fd48bc60c..c77d15775 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Header 1 ("level-one-header",[],[]) [Str "Level",Space,Str "one",Space,Str "header"] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 2 ("level-two-header",[],[]) [Str "Level",Space,Str "two",Space,Str "header"] -- cgit v1.2.3 From ac104c4fdb957937c163a558348934a95fc13727 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 23:29:10 -0700 Subject: Updated tests for new LaTeX template. --- tests/writer.latex | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'tests') diff --git a/tests/writer.latex b/tests/writer.latex index 4cb989fba..a2f973854 100644 --- a/tests/writer.latex +++ b/tests/writer.latex @@ -22,24 +22,14 @@ \IfFileExists{microtype.sty}{\usepackage{microtype}}{} \usepackage{fancyvrb} \usepackage{graphicx} -% Redefine \includegraphics so that, unless explicit options are -% given, the image width will not exceed the width of the page. -% Images get their normal width if they fit onto the page, but -% are scaled down if they would overflow the margins. \makeatletter -\def\ScaleIfNeeded{% - \ifdim\Gin@nat@width>\linewidth - \linewidth - \else - \Gin@nat@width - \fi -} +\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} +\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} \makeatother -\let\Oldincludegraphics\includegraphics -{% - \catcode`\@=11\relax% - \gdef\includegraphics{\@ifnextchar[{\Oldincludegraphics}{\Oldincludegraphics[width=\ScaleIfNeeded]}}% -}% +% Scale images if necessary, so that they will not overflow the page +% margins by default, and it is still possible to overwrite the defaults +% using explicit options in \includegraphics[width, height, ...]{} +\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} \ifxetex \usepackage[setpagesize=false, % page size defined by xetex unicode=false, % unicode breaks when used with xetex -- cgit v1.2.3 From 8726eebcd363ccb33ea8c297b004feca7ef37ceb Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 30 Apr 2014 11:16:01 +0200 Subject: Org reader: Add support for custom link types Org allows users to define their own custom link types. E.g., in a document with a lot of links to Wikipedia articles, one can define a custom wikipedia link-type via #+LINK: wp https://en.wikipedia.org/wiki/ This allows to write [[wp:Org_mode][Org-mode]] instead of the equivallent [[https://en.wikipedia.org/wiki/Org_mode][Org-mode]]. --- src/Text/Pandoc/Readers/Org.hs | 68 +++++++++++++++++++++++++++++++++++++----- tests/Tests/Readers/Org.hs | 26 ++++++++++++++++ 2 files changed, 86 insertions(+), 8 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e52bff90..d68ef45ef 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) -import Control.Monad (foldM, guard, liftM, liftM2, when) +import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -51,6 +51,7 @@ import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M import Data.Maybe (listToMaybe, fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) +import Network.HTTP (urlEncode) -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options @@ -76,6 +77,8 @@ type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String +type OrgLinkFormatters = M.Map String (String -> String) + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions @@ -86,6 +89,7 @@ data OrgParserState = OrgParserState , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos + , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable @@ -113,6 +117,7 @@ defaultOrgParserState = OrgParserState , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing + , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] @@ -175,6 +180,13 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + addToNotesTable :: OrgNoteRecord -> OrgParser () addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState @@ -423,7 +435,8 @@ specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks -metaLine = try $ metaLineStart *> declarationLine +metaLine = try $ mempty + <$ (metaLineStart *> (optionLine <|> declarationLine)) commentLine :: OrgParser Blocks commentLine = try $ commentLineStart *> anyLine *> pure mempty @@ -436,14 +449,14 @@ metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" commentLineStart :: OrgParser String commentLineStart = try $ mappend <$> many spaceChar <*> string "# " -declarationLine :: OrgParser Blocks +declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey inlinesF <- metaInlines updateState $ \st -> let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta in st { orgStateMeta' = orgStateMeta' st <> meta' } - return mempty + return () metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline @@ -453,6 +466,35 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +optionLine :: OrgParser () +optionLine = try $ do + key <- metaKey + case key of + "link" -> parseLinkFormat >>= uncurry addLinkFormat + _ -> mzero + +parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat = try $ do + linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkSubst <- parseFormat + return (linkType, linkSubst) + +-- | An ad-hoc, single-argument-only implementation of a printf-style format +-- parser. +parseFormat :: OrgParser (String -> String) +parseFormat = try $ do + replacePlain <|> replaceUrl <|> justAppend + where + -- inefficient, but who cares + replacePlain = try $ (\x -> concat . flip intersperse x) + <$> sequence [tillSpecifier 's', rest] + replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + <$> sequence [tillSpecifier 'h', rest] + justAppend = try $ (++) <$> rest + + rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + -- -- Headers -- @@ -850,13 +892,15 @@ linkOrImage = explicitOrImageLink explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' - src <- linkTarget + srcF <- applyCustomLinkFormat =<< linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if isImageFilename src && isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + return $ do + src <- srcF + if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do @@ -881,6 +925,14 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat link = do + let (linkType, rest) = break (== ':') link + return $ do + formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters + return $ maybe link ($ drop 1 rest) formatter + + linkToInlinesF :: String -> Inlines -> F Inlines linkToInlinesF s@('#':_) = pure . B.link s "" linkToInlinesF s diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 96747d148..78684f0f1 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -304,6 +304,32 @@ tests = ] =?> (para (spanWith ("link-here", [], []) mempty <> "Target.") <> para (emph ("See" <> space <> "here!"))) + + , "Link abbreviation" =: + unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s" + , "[[wp:Org_mode][Wikipedia on Org-mode]]" + ] =?> + (para (link "https://en.wikipedia.org/wiki/Org_mode" "" + ("Wikipedia" <> space <> "on" <> space <> "Org-mode"))) + + , "Link abbreviation, defined after first use" =: + unlines [ "[[zl:non-sense][Non-sense articles]]" + , "#+LINK: zl http://zeitlens.com/tags/%s.html" + ] =?> + (para (link "http://zeitlens.com/tags/non-sense.html" "" + ("Non-sense" <> space <> "articles"))) + + , "Link abbreviation, URL encoded arguments" =: + unlines [ "#+link: expl http://example.com/%h/foo" + , "[[expl:Hello, World!][Moin!]]" + ] =?> + (para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")) + + , "Link abbreviation, append arguments" =: + unlines [ "#+link: expl http://example.com/" + , "[[expl:foo][bar]]" + ] =?> + (para (link "http://example.com/foo" "" "bar")) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3 From 007eb96e06bc1fff12119addf2e03552ac992b2e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 1 May 2014 09:23:21 -0700 Subject: Markdown reader: Make one-column pipe tables work. Closes #1218. --- src/Text/Pandoc/Readers/Markdown.hs | 22 ++++++++++++++++------ tests/pipe-tables.native | 10 +++++++++- tests/pipe-tables.txt | 10 ++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d3ca8d26f..97a3168da 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1221,11 +1221,20 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines +pipeBreak :: MarkdownParser [Alignment] +pipeBreak = try $ do + nonindentSpaces + openPipe <- (True <$ char '|') <|> return False + first <- pipeTableHeaderPart + rest <- many $ sepPipe *> pipeTableHeaderPart + -- surrounding pipes needed for a one-column table: + guard $ not (null rest && not openPipe) + optional (char '|') + blankline + return (first:rest) + pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do - let pipeBreak = nonindentSpaces *> optional (char '|') *> - pipeTableHeaderPart `sepBy1` sepPipe <* - optional (char '|') <* blankline (heads,aligns) <- try ( pipeBreak >>= \als -> return (return $ replicate (length als) mempty, als)) <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> @@ -1244,12 +1253,13 @@ sepPipe = try $ do pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = do nonindentSpaces - optional (char '|') + openPipe <- (True <$ char '|') <|> return False let cell = mconcat <$> many (notFollowedBy (blankline <|> char '|') >> inline) first <- cell - sepPipe - rest <- cell `sepBy1` sepPipe + rest <- many $ sepPipe *> cell + -- surrounding pipes needed for a one-column table: + guard $ not (null rest && not openPipe) optional (char '|') blankline let cells = sequence (first:rest) diff --git a/tests/pipe-tables.native b/tests/pipe-tables.native index 5420a7bd3..eafd21d22 100644 --- a/tests/pipe-tables.native +++ b/tests/pipe-tables.native @@ -67,4 +67,12 @@ ,[[Plain [Str "orange"]] ,[Plain [Str "17"]]] ,[[Plain [Str "pear"]] - ,[Plain [Str "302"]]]]] + ,[Plain [Str "302"]]]] +,Para [Str "One-column:"] +,Table [] [AlignDefault] [0.0] + [[Plain [Str "hi"]]] + [[[Plain [Str "lo"]]]] +,Para [Str "Header-less",Space,Str "one-column:"] +,Table [] [AlignCenter] [0.0] + [[]] + [[[Plain [Str "hi"]]]]] diff --git a/tests/pipe-tables.txt b/tests/pipe-tables.txt index 79d79200f..ee8d54d9f 100644 --- a/tests/pipe-tables.txt +++ b/tests/pipe-tables.txt @@ -40,3 +40,13 @@ apple | 5 orange| 17 pear | 302 +One-column: + +|hi| +|--| +|lo| + +Header-less one-column: + +|:-:| +|hi| -- cgit v1.2.3 From 4c4382420356928d73026395d4ab2f0f9957df08 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 2 May 2014 22:58:47 -0700 Subject: Fixed empty reference links. Closes #1186. Includes test. --- src/Text/Pandoc/Readers/Markdown.hs | 4 +--- tests/markdown-reader-more.native | 5 ++++- tests/markdown-reader-more.txt | 8 ++++++++ 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 97a3168da..36f73d847 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -349,10 +349,8 @@ referenceKey = try $ do char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') let sourceURL = liftM unwords $ many $ try $ do - notFollowedBy' referenceTitle - skipMany spaceChar - optional $ newline >> notFollowedBy blankline skipMany spaceChar + notFollowedBy' referenceTitle notFollowedBy' (() <$ reference) many1 $ notFollowedBy space >> litChar let betweenAngles = try $ char '<' >> diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 27f09dada..0d74c233d 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -136,4 +136,7 @@ ,Para [Link [Str "link"] ("/hithere)","")] ,Para [Link [Str "linky"] ("hi_(there_(nested))","")] ,Header 2 ("reference-link-fallbacks",[],[]) [Str "Reference",Space,Str "link",Space,Str "fallbacks"] -,Para [Str "[",Emph [Str "not",Space,Str "a",Space,Str "link"],Str "]",Space,Str "[",Emph [Str "nope"],Str "]\8230"]] +,Para [Str "[",Emph [Str "not",Space,Str "a",Space,Str "link"],Str "]",Space,Str "[",Emph [Str "nope"],Str "]\8230"] +,Header 2 ("empty-reference-links",[],[]) [Str "Empty",Space,Str "reference",Space,Str "links"] +,Para [Str "bar"] +,Para [Link [Str "foo2"] ("","")]] diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index d133b3dbb..739543bfd 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -235,3 +235,11 @@ Empty cells ## Reference link fallbacks [*not a link*] [*nope*]... + +## Empty reference links + +[foo2]: + +bar + +[foo2] -- cgit v1.2.3 From ccbf4fc9c20ccdce0f7144845cd022ee8cfca1af Mon Sep 17 00:00:00 2001 From: Neil Mayhew Date: Mon, 14 Apr 2014 19:07:14 -0600 Subject: Distinguish tight and loose lists in Docbook output Determined by the first block of the first item being Plain. --- src/Text/Pandoc/Writers/Docbook.hs | 26 +++-- tests/Tests/Writers/Docbook.hs | 199 +++++++++++++++++++++++++++++++++++-- tests/writer.docbook | 60 +++++------ 3 files changed, 236 insertions(+), 49 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 2d6ce3020..e1b62f02d 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -185,10 +185,13 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = - inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst + let attribs = case lst of + ((Plain _:_):_) -> [("spacing", "compact")] + _ -> [] + in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = - let attribs = case numstyle of + let numeration = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] Example -> [("numeration", "arabic")] @@ -196,14 +199,21 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("numeration", "loweralpha")] UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest + spacing = case first of + (Plain _:_) -> [("spacing", "compact")] + _ -> [] + attribs = numeration ++ spacing + items = if start == 1 + then listItemsToDocbook opts (first:rest) + else (inTags True "listitem" [("override",show start)] + (blocksToDocbook opts $ map plainToPara first)) $$ + listItemsToDocbook opts rest in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = - inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst + let attribs = case lst of + ((_, (Plain _:_):_):_) -> [("spacing", "compact")] + _ -> [] + in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst blockToDocbook _ (RawBlock f str) | f == "docbook" = text str -- raw XML block | f == "html" = text str -- allow html for backwards compatibility diff --git a/tests/Tests/Writers/Docbook.hs b/tests/Tests/Writers/Docbook.hs index e815b4f5a..97126b473 100644 --- a/tests/Tests/Writers/Docbook.hs +++ b/tests/Tests/Writers/Docbook.hs @@ -31,22 +31,199 @@ lineblock :: Blocks lineblock = para ("some text" <> linebreak <> "and more lines" <> linebreak <> "and again") -lineblock_out :: String -lineblock_out = "some text\n" ++ - "and more lines\n" ++ - "and again" +lineblock_out :: [String] +lineblock_out = [ "some text" + , "and more lines" + , "and again" + ] tests :: [Test] tests = [ testGroup "line blocks" [ "none" =: para "This is a test" - =?> "\n This is a test\n" + =?> unlines + [ "" + , " This is a test" + , "" + ] , "basic" =: lineblock - =?> lineblock_out + =?> unlines lineblock_out , "blockquote" =: blockQuote lineblock - =?> ("\n" ++ lineblock_out ++ "\n") - , "footnote" =: para ("This is a test" <> note lineblock <> " of footnotes") - =?> ("\n This is a test\n" ++ - lineblock_out ++ - "\n of footnotes\n") + =?> unlines + ( [ "" ] ++ + lineblock_out ++ + [ "" ] + ) + , "footnote" =: para ("This is a test" <> + note lineblock <> + " of footnotes") + =?> unlines + ( [ "" + , " This is a test" ] ++ + lineblock_out ++ + [ " of footnotes" + , "" ] + ) + ] + , testGroup "compact lists" + [ testGroup "bullet" + [ "compact" =: bulletList [plain "a", plain "b", plain "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + , "loose" =: bulletList [para "a", para "b", para "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + ] + , testGroup "ordered" + [ "compact" =: orderedList [plain "a", plain "b", plain "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + , "loose" =: orderedList [para "a", para "b", para "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + ] + , testGroup "definition" + [ "compact" =: definitionList [ ("an", [plain "apple" ]) + , ("a", [plain "banana"]) + , ("an", [plain "orange"])] + =?> unlines + [ "" + , " " + , " " + , " an" + , " " + , " " + , " " + , " apple" + , " " + , " " + , " " + , " " + , " " + , " a" + , " " + , " " + , " " + , " banana" + , " " + , " " + , " " + , " " + , " " + , " an" + , " " + , " " + , " " + , " orange" + , " " + , " " + , " " + , "" + ] + , "loose" =: definitionList [ ("an", [para "apple" ]) + , ("a", [para "banana"]) + , ("an", [para "orange"])] + =?> unlines + [ "" + , " " + , " " + , " an" + , " " + , " " + , " " + , " apple" + , " " + , " " + , " " + , " " + , " " + , " a" + , " " + , " " + , " " + , " banana" + , " " + , " " + , " " + , " " + , " " + , " an" + , " " + , " " + , " " + , " orange" + , " " + , " " + , " " + , "" + ] + ] ] ] diff --git a/tests/writer.docbook b/tests/writer.docbook index 9cb9a5359..26dcbadaa 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -93,7 +93,7 @@ sub status { A list: - + item one @@ -156,7 +156,7 @@ These should not be escaped: \$ \\ \> \[ \{ Asterisks tight: - + asterisk 1 @@ -196,7 +196,7 @@ These should not be escaped: \$ \\ \> \[ \{ Pluses tight: - + Plus 1 @@ -236,7 +236,7 @@ These should not be escaped: \$ \\ \> \[ \{ Minuses tight: - + Minus 1 @@ -279,7 +279,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight: - + First @@ -299,7 +299,7 @@ These should not be escaped: \$ \\ \> \[ \{ and: - + One @@ -383,17 +383,17 @@ These should not be escaped: \$ \\ \> \[ \{ Nested - + Tab - + Tab - + Tab @@ -407,7 +407,7 @@ These should not be escaped: \$ \\ \> \[ \{ Here’s another: - + First @@ -417,7 +417,7 @@ These should not be escaped: \$ \\ \> \[ \{ Second: - + Fee @@ -454,7 +454,7 @@ These should not be escaped: \$ \\ \> \[ \{ Second: - + Fee @@ -508,7 +508,7 @@ These should not be escaped: \$ \\ \> \[ \{ Fancy list markers - + begins with 2 @@ -521,7 +521,7 @@ These should not be escaped: \$ \\ \> \[ \{ with a continuation - + sublist with roman numerals, starting with 4 @@ -531,7 +531,7 @@ These should not be escaped: \$ \\ \> \[ \{ more items - + a subsublist @@ -550,22 +550,22 @@ These should not be escaped: \$ \\ \> \[ \{ Nesting: - + Upper Alpha - + Upper Roman. - + Decimal start with 6 - + Lower alpha with paren @@ -581,7 +581,7 @@ These should not be escaped: \$ \\ \> \[ \{ Autonumbering: - + Autonumber. @@ -591,7 +591,7 @@ These should not be escaped: \$ \\ \> \[ \{ More. - + Nested. @@ -616,7 +616,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight using spaces: - + apple @@ -651,7 +651,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight using tabs: - + apple @@ -757,7 +757,7 @@ These should not be escaped: \$ \\ \> \[ \{ Multiple definitions, tight: - + apple @@ -841,7 +841,7 @@ These should not be escaped: \$ \\ \> \[ \{ orange fruit - + sublist @@ -1051,7 +1051,7 @@ These should not be escaped: \$ \\ \> \[ \{ LaTeX - + @@ -1097,7 +1097,7 @@ These should not be escaped: \$ \\ \> \[ \{ These shouldn’t be math: - + To get the famous equation, write $e = mc^2$. @@ -1130,7 +1130,7 @@ These should not be escaped: \$ \\ \> \[ \{ Here is some unicode: - + I hat: Î @@ -1316,7 +1316,7 @@ These should not be escaped: \$ \\ \> \[ \{ With an ampersand: http://example.com/?foo=1&bar=2 - + In a list? @@ -1414,7 +1414,7 @@ or here: <http://example.com/> - + And in list items. -- cgit v1.2.3 From abd3a039b9adcafa8aa1df6e0753a725f90c78fc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 20:45:05 -0700 Subject: DocBook writer: Small tweaks to last commit. * Use isTightList from Shared. * Adjust writer test, since isTightList is a bit different from what was used before. Closes #1250. --- src/Text/Pandoc/Writers/Docbook.hs | 12 +++--------- tests/writer.docbook | 2 +- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index e1b62f02d..1a8e58354 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -185,9 +185,7 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = - let attribs = case lst of - ((Plain _:_):_) -> [("spacing", "compact")] - _ -> [] + let attribs = [("spacing", "compact") | isTightList lst] in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = @@ -199,9 +197,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("numeration", "loweralpha")] UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] - spacing = case first of - (Plain _:_) -> [("spacing", "compact")] - _ -> [] + spacing = [("spacing", "compact") | isTightList (first:rest)] attribs = numeration ++ spacing items = if start == 1 then listItemsToDocbook opts (first:rest) @@ -210,9 +206,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = listItemsToDocbook opts rest in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = - let attribs = case lst of - ((_, (Plain _:_):_):_) -> [("spacing", "compact")] - _ -> [] + let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst blockToDocbook _ (RawBlock f str) | f == "docbook" = text str -- raw XML block diff --git a/tests/writer.docbook b/tests/writer.docbook index 26dcbadaa..01daa2c30 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -508,7 +508,7 @@ These should not be escaped: \$ \\ \> \[ \{ Fancy list markers - + begins with 2 -- cgit v1.2.3 From 41c89d51c7b3d0099e72c0693bfd413bb4498de9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 21:24:20 -0700 Subject: AsciiDoc writer: Added test for empty table cells. --- pandoc.cabal | 1 + tests/tables.asciidoc | 11 ++++------- tests/test-pandoc.hs | 2 ++ 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'tests') diff --git a/pandoc.cabal b/pandoc.cabal index 002d1671c..3991d2d82 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -392,6 +392,7 @@ Test-Suite test-pandoc Tests.Writers.ConTeXt Tests.Writers.HTML Tests.Writers.Markdown + Tests.Writers.AsciiDoc Tests.Writers.LaTeX Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Default-Language: Haskell98 diff --git a/tests/tables.asciidoc b/tests/tables.asciidoc index 38daca192..ba647866a 100644 --- a/tests/tables.asciidoc +++ b/tests/tables.asciidoc @@ -52,20 +52,17 @@ Multiline table without caption: Table without column headers: [cols=">,<,^,>",] -|============================================================================= +|================== |12 |12 |12 |12 - |123 |123 |123 |123 - |1 |1 |1 |1 -|============================================================================= +|================== Multiline table without column headers: [width="78%",cols="^21%,<17%,>20%,42%",] -|============================================================================= +|======================================================================= |First |row |12.0 |Example of a row that spans multiple lines. - |Second |row |5.0 |Here's another one. Note the blank line between rows. -|============================================================================= +|======================================================================= diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index a7d4fca01..80d672589 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -15,6 +15,7 @@ import qualified Tests.Writers.HTML import qualified Tests.Writers.Docbook import qualified Tests.Writers.Native import qualified Tests.Writers.Markdown +import qualified Tests.Writers.AsciiDoc import qualified Tests.Shared import qualified Tests.Walk import Text.Pandoc.Shared (inDirectory) @@ -30,6 +31,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "HTML" Tests.Writers.HTML.tests , testGroup "Docbook" Tests.Writers.Docbook.tests , testGroup "Markdown" Tests.Writers.Markdown.tests + , testGroup "AsciiDoc" Tests.Writers.AsciiDoc.tests ] , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests -- cgit v1.2.3 From 6b532c2131f13f5c294dcea88a8d041f8be388dd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 22:33:36 -0700 Subject: Added Tests.Writer.AsciiDoc to repository. --- tests/Tests/Writers/AsciiDoc.hs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/Tests/Writers/AsciiDoc.hs (limited to 'tests') diff --git a/tests/Tests/Writers/AsciiDoc.hs b/tests/Tests/Writers/AsciiDoc.hs new file mode 100644 index 000000000..118e648d3 --- /dev/null +++ b/tests/Tests/Writers/AsciiDoc.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Writers.AsciiDoc (tests) where + +import Test.Framework +import Text.Pandoc.Builder +import Text.Pandoc +import Tests.Helpers +import Tests.Arbitrary() +import Data.Monoid + +asciidoc :: (ToString a, ToPandoc a) => a -> String +asciidoc = writeAsciiDoc def{ writerWrapText = False } . toPandoc + +tests :: [Test] +tests = [ testGroup "tables" + [ test asciidoc "empty cells" $ + simpleTable [] [[mempty],[mempty]] =?> unlines + [ "[cols=\"\",]" + , "|====" + , "|" + , "|" + , "|====" + ] + , test asciidoc "multiblock cells" $ + simpleTable [] [[para "Para 1" <> para "Para 2"]] + =?> unlines + [ "[cols=\"\",]" + , "|=====" + , "a|" + , "Para 1" + , "" + , "Para 2" + , "" + , "|=====" + ] + ] + ] -- cgit v1.2.3 From 1e5042489223edc4eb5fa428ee47ed525bc1f83f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 08:19:48 -0700 Subject: Added test for #1154. --- tests/Tests/Readers/Markdown.hs | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tests') diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index 492680a35..5a51fe759 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -216,4 +216,13 @@ tests = [ testGroup "inline code" -- , testGroup "round trip" -- [ property "p_markdown_round_trip" p_markdown_round_trip -- ] + , testGroup "lists" + [ "issue #1154" =: + " - \n first div breaks\n \n\n \n\n \n with this div too.\n \n" + =?> bulletList [divWith nullAttr (plain $ text "first div breaks") <> + rawBlock "html" "\n" <> + divWith nullAttr (plain $ text "with this div too.")] + ] ] -- cgit v1.2.3 From 71bd4fb2b3778d3906a63938625ebcadca40b8c8 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 5 May 2014 14:39:25 +0200 Subject: Org reader: Read inline code blocks Org's inline code blocks take forms like `src_haskell(print "hi")` and are frequently used to include results from computations called from within the document. The blocks are read as inline code and marked with the special class `rundoc-block`. Proper handling and execution of these blocks is the subject of a separate library, rundoc, which is work in progress. This closes #1278. --- src/Text/Pandoc/Readers/Org.hs | 43 +++++++++++++++++++++++++++++++++++++++--- tests/Tests/Readers/Org.hs | 18 ++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index d68ef45ef..dba61dfe0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) +import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) @@ -721,7 +722,6 @@ bulletList = fmap B.bulletList . fmap compactify' . sequence <$> many1 (listItem bulletListStart) orderedList :: OrgParser (F Blocks) --- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) orderedList = fmap B.orderedList . fmap compactify' . sequence <$> many1 (listItem orderedListStart) @@ -746,11 +746,11 @@ definitionListItem :: OrgParser Int definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") - first <- anyLineNewline + line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term - contents' <- parseFromString parseBlocks $ first ++ blank ++ cont + contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' @@ -789,6 +789,7 @@ inline = , footnote , linkOrImage , anchor + , inlineCodeBlock , str , endline , emph @@ -989,6 +990,42 @@ solidify = map replaceSpecialChar | c `elem` "_.-:" = c | otherwise = '-' +-- | Parses an inline code block and marks it as an babel block. +inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock = try $ do + string "src_" + lang <- many1 orgArgWordChar + opts <- option [] $ enclosedByPair '[' ']' blockOption + inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") + let attrClasses = [translateLang lang, rundocBlockClass] + let attrKeyVal = map toRundocAttrib (("language", lang) : opts) + returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + where enclosedByPair s e p = char s *> many1Till p (char e) + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = "rundoc-block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + <* many1 spaceChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar + <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 78684f0f1..949976aba 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -207,6 +207,24 @@ tests = "<> Link here later." =?> (para $ spanWith ("anchor", [], []) mempty <> "Link" <> space <> "here" <> space <> "later.") + + , "Inline code block" =: + "src_emacs-lisp{(message \"Hello\")}" =?> + (para $ codeWith ( "" + , [ "commonlisp", "rundoc-block" ] + , [ ("rundoc-language", "emacs-lisp") ]) + "(message \"Hello\")") + + , "Inline code block with arguments" =: + "src_sh[:export both :results output]{echo 'Hello, World'}" =?> + (para $ codeWith ( "" + , [ "bash", "rundoc-block" ] + , [ ("rundoc-language", "sh") + , ("rundoc-export", "both") + , ("rundoc-results", "output") + ] + ) + "echo 'Hello, World'") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From e7b42947bfa3d59ac59bf2b8d1e17415c24f518f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 20:20:28 -0700 Subject: Updated tests for template changes. --- tests/lhs-test.latex | 6 ++---- tests/lhs-test.latex+lhs | 6 ++---- tests/writer.latex | 4 ++-- 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'tests') diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex index 78f072600..0ed6640bd 100644 --- a/tests/lhs-test.latex +++ b/tests/lhs-test.latex @@ -3,8 +3,6 @@ \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} \usepackage{fixltx2e} % provides \textsubscript -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} @@ -18,6 +16,8 @@ \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} \newcommand{\euro}{€} \fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} % use microtype if available \IfFileExists{microtype.sty}{\usepackage{microtype}}{} \usepackage{color} @@ -63,8 +63,6 @@ \setlength{\emergencystretch}{3em} % prevent overfull lines \setcounter{secnumdepth}{0} -\author{} -\date{} \begin{document} diff --git a/tests/lhs-test.latex+lhs b/tests/lhs-test.latex+lhs index 50a0e15e1..67841d54b 100644 --- a/tests/lhs-test.latex+lhs +++ b/tests/lhs-test.latex+lhs @@ -3,8 +3,6 @@ \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} \usepackage{fixltx2e} % provides \textsubscript -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} @@ -18,6 +16,8 @@ \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} \newcommand{\euro}{€} \fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} % use microtype if available \IfFileExists{microtype.sty}{\usepackage{microtype}}{} \usepackage{listings} @@ -44,8 +44,6 @@ \setlength{\emergencystretch}{3em} % prevent overfull lines \setcounter{secnumdepth}{0} -\author{} -\date{} \begin{document} diff --git a/tests/writer.latex b/tests/writer.latex index a2f973854..1ac79feca 100644 --- a/tests/writer.latex +++ b/tests/writer.latex @@ -3,8 +3,6 @@ \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} \usepackage{fixltx2e} % provides \textsubscript -% use upquote if available, for straight quotes in verbatim environments -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} @@ -18,6 +16,8 @@ \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase} \newcommand{\euro}{€} \fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} % use microtype if available \IfFileExists{microtype.sty}{\usepackage{microtype}}{} \usepackage{fancyvrb} -- cgit v1.2.3 From 757c4f68f3f3cab99db9499936e3ae4775ebbddf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:07:37 +0200 Subject: Org reader: Support arguments for code blocks The general form of source block headers (`#+BEGIN_SRC `) was not recognized by the reader. This patch adds support for the above form, adds header arguments to the block's key-value pairs and marks the block as a rundoc block if header arguments are present. This closes #1286. --- src/Text/Pandoc/Readers/Org.hs | 98 ++++++++++++++++++++++++------------------ tests/Tests/Readers/Org.hs | 14 ++++++ 2 files changed, 70 insertions(+), 42 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 9df8ce0b3..c05ac92d0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -276,7 +276,7 @@ parseBlockAttributes = do where attribute :: OrgParser (String, String) attribute = try $ do - key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':') + key <- metaLineStart *> many1Till nonspaceChar (char ':') val <- skipSpaces *> anyLine return (map toLower key, val) @@ -342,16 +342,11 @@ verseBlock blkProp = try $ do codeBlock :: BlockProperties -> OrgParser (F Blocks) codeBlock blkProp = do skipSpaces - language <- optionMaybe orgArgWord - (classes, kv) <- codeHeaderArgs + (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) id' <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" content <- rawBlockContent blkProp - let attr = ( id' - , maybe id (\l -> (l:)) language $ classes - , kv ) - let codeBlck = B.codeBlockWith attr content - return $ maybe (pure codeBlck) (labelDiv codeBlck) caption + let codeBlck = B.codeBlockWith ( id', classes, kv ) content + maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption" where labelDiv blk value = B.divWith nullAttr <$> (mappend <$> labelledBlock value @@ -383,12 +378,33 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +type SwitchOption = (Char, Maybe String) + orgArgWord :: OrgParser String orgArgWord = many1 orgArgWordChar +-- | Parse code block arguments +-- TODO: We currently don't handle switches. codeHeaderArgs :: OrgParser ([String], [(String, String)]) -codeHeaderArgs = - (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline +codeHeaderArgs = try $ do + language <- skipSpaces *> orgArgWord + _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + parameters <- manyTill blockOption newline + let pandocLang = translateLang language + return $ + if hasRundocParameters parameters + then ( [ pandocLang, rundocBlockClass ] + , map toRundocAttrib (("language", language) : parameters) + ) + else ([ pandocLang ], parameters) + where hasRundocParameters = not . null + +switch :: OrgParser SwitchOption +switch = try $ simpleSwitch <|> lineNumbersSwitch + where + simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) + lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> + (string "-l \"" *> many1Till nonspaceChar (char '"')) translateLang :: String -> String translateLang "C" = "c" @@ -401,6 +417,32 @@ translateLang "sh" = "bash" translateLang "sqlite" = "sql" translateLang cs = cs +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = rundocPrefix ++ "block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + commaEscaped :: String -> String commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs @@ -425,7 +467,7 @@ drawer = try $ do drawerStart :: OrgParser String drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline + skipSpaces *> drawerName <* skipSpaces <* P.newline where drawerName = try $ char ':' *> validDrawerName <* char ':' validDrawerName = stringAnyCase "PROPERTIES" <|> stringAnyCase "LOGBOOK" @@ -435,7 +477,7 @@ drawerLine = try anyLine drawerEnd :: OrgParser String drawerEnd = try $ - skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline -- @@ -446,7 +488,7 @@ drawerEnd = try $ figure :: OrgParser (F Blocks) figure = try $ do (cap, nam) <- nameAndCaption - src <- skipSpaces *> selfTarget <* skipSpaces <* newline + src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) return $ do cap' <- cap @@ -1036,34 +1078,6 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - -blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue - -orgArgKey :: OrgParser String -orgArgKey = try $ - skipSpaces *> char ':' - *> many1 orgArgWordChar - <* many1 spaceChar - -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar - <* skipSpaces - -orgArgWordChar :: OrgParser Char -orgArgWordChar = alphaNum <|> oneOf "-_" - -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first ("rundoc-" ++) - emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 949976aba..a78e8861f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -822,6 +822,20 @@ tests = in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] , codeBlockWith attr' code' ] + , "Source block with rundoc/babel arguments" =: + unlines [ "#+BEGIN_SRC emacs-lisp :exports both" + , "(progn (message \"Hello, World!\")" + , " (+ 23 42))" + , "#+END_SRC" ] =?> + let classes = [ "commonlisp" -- as kate doesn't know emacs-lisp syntax + , "rundoc-block" + ] + params = [ ("rundoc-language", "emacs-lisp") + , ("rundoc-exports", "both") + ] + code' = unlines [ "(progn (message \"Hello, World!\")" + , " (+ 23 42))" ] + in codeBlockWith ("", classes, params) code' , "Example block" =: unlines [ "#+begin_example" -- cgit v1.2.3 From 07694b30184bcf2ed0e2998016df394f47a1996f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:23:23 +0200 Subject: Org reader: Fix parsing of blank lines within blocks Blank lines were parsed as two newlines instead of just one. Thanks to Xiao Hanyu (@xiaohanyu) for pointing this out. --- src/Text/Pandoc/Readers/Org.hs | 8 ++------ tests/Tests/Readers/Org.hs | 9 +++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c05ac92d0..0f218d43f 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -357,12 +357,8 @@ rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ - choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ - indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) parsedBlockContent blkProps = try $ do diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index a78e8861f..87b0d0c90 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -920,5 +920,14 @@ tests = (unlines [ "fmap id = id" , "fmap (p . q) = (fmap p) . (fmap q)" ]))) + + , "Convert blank lines in blocks to single newlines" =: + unlines [ "#+begin_html" + , "" + , "boring" + , "" + , "#+end_html" + ] =?> + rawBlock "html" "\nboring\n\n" ] ] -- cgit v1.2.3 From c5fd631b550a3b05b60de1684c80387bc46a88cc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 10 May 2014 11:25:20 +0200 Subject: Org reader: Fix block parameter reader, relax constraints The reader produced wrong results for block containing non-letter chars in their parameter arguments. This patch relaxes constraints in that it allows block header arguments to contain any non-space character (except for ']' for inline blocks). Thanks to Xiao Hanyu for noticing this. --- src/Text/Pandoc/Readers/Org.hs | 19 +++++++++++++------ tests/Tests/Readers/Org.hs | 12 ++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0f218d43f..2e4a29beb 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -318,7 +318,7 @@ blockHeaderStart :: OrgParser (Int, String) blockHeaderStart = try $ (,) <$> indent <*> blockType where indent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) @@ -422,16 +422,23 @@ rundocBlockClass :: String rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue +blockOption = try $ (,) <$> orgArgKey <*> orgParamValue + +inlineBlockOption :: OrgParser (String, String) +inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue orgArgKey :: OrgParser String orgArgKey = try $ skipSpaces *> char ':' *> many1 orgArgWordChar -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar <* skipSpaces +orgParamValue :: OrgParser String +orgParamValue = try $ + skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces + +orgInlineParamValue :: OrgParser String +orgInlineParamValue = try $ + skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" @@ -1067,7 +1074,7 @@ inlineCodeBlock :: OrgParser (F Inlines) inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar - opts <- option [] $ enclosedByPair '[' ']' blockOption + opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 87b0d0c90..4ef7a7731 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -929,5 +929,17 @@ tests = , "#+end_html" ] =?> rawBlock "html" "\nboring\n\n" + + , "Non-letter chars in source block parameters" =: + unlines [ "#+BEGIN_SRC C :tangle xxxx.c :city Zürich" + , "code body" + , "#+END_SRC" + ] =?> + let classes = [ "c", "rundoc-block" ] + params = [ ("rundoc-language", "C") + , ("rundoc-tangle", "xxxx.c") + , ("rundoc-city", "Zürich") + ] + in codeBlockWith ( "", classes, params) "code body\n" ] ] -- cgit v1.2.3 From ceeb701c254c6dc4c054e10dd151d9ef6f751ad7 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:49:30 +0200 Subject: Org reader: support Pandocs citation extension Citations are defined via the "normal citation" syntax used in markdown, with the sole difference that newlines are not allowed between "[...]". This is for consistency, as org-mode generally disallows newlines between square brackets. The extension is turned on by default and can be turned off via the default syntax-extension mechanism, i.e. by specifying "org-citation" as the input format. Move `citeKey` from Readers.Markdown into Parsing The function can be used by other readers, so it is made accessible for all parsers. --- src/Text/Pandoc.hs | 2 +- src/Text/Pandoc/Readers/Org.hs | 55 ++++++++++++++++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 22 +++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index dd5bc18f6..130338f0e 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = pandocExtensions +getDefaultExtensions "org" = Set.fromList [Ext_citations] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] @@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode - diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5dbcaee98..86dda2732 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -869,6 +869,7 @@ inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak + , cite , footnote , linkOrImage , anchor @@ -933,6 +934,51 @@ endline = try $ do updateLastPreCharPos return . return $ B.space +cite :: OrgParser (F Inlines) +cite = try $ do + guardEnabled Ext_citations + (cs, raw) <- withRaw normalCite + return $ (flip B.cite (B.text raw)) <$> cs + +normalCite :: OrgParser (F [Citation]) +normalCite = try $ char '[' + *> skipSpaces + *> citeList + <* skipSpaces + <* char ']' + +citeList :: OrgParser (F [Citation]) +citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) + +citation :: OrgParser (F Citation) +citation = try $ do + pref <- prefix + (suppress_author, key) <- citeKey + suff <- suffix + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + where + prefix = trimInlinesF . mconcat <$> + manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) + suffix = try $ do + hasSpace <- option False (notFollowedBy nonspaceChar >> return True) + skipSpaces + rest <- trimInlinesF . mconcat <$> + many (notFollowedBy (oneOf ";]") *> inline) + return $ if hasSpace + then (B.space <>) <$> rest + else rest + footnote :: OrgParser (F Inlines) footnote = try $ inlineNote <|> referencedNote @@ -1007,7 +1053,7 @@ selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String -linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") applyCustomLinkFormat :: String -> OrgParser (F String) applyCustomLinkFormat link = do @@ -1083,7 +1129,12 @@ inlineCodeBlock = try $ do let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode - where enclosedByPair s e p = char s *> many1Till p (char e) + +enclosedByPair :: Char -- ^ opening char + -> Char -- ^ closing char + -> OrgParser a -- ^ parser + -> OrgParser [a] +enclosedByPair s e p = char s *> many1Till p (char e) emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4ef7a7731..ca97ba348 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -225,6 +225,28 @@ tests = ] ) "echo 'Hello, World'") + + , "Citation" =: + "[@nonexistent]" =?> + let citation = Citation + { citationId = "nonexistent" + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0} + in (para $ cite [citation] "[@nonexistent]") + + , "Citation containing text" =: + "[see @item1 p. 34-35]" =?> + let citation = Citation + { citationId = "item1" + , citationPrefix = [Str "see"] + , citationSuffix = [Space ,Str "p.",Space,Str "34-35"] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0} + in (para $ cite [citation] "[see @item1 p. 34-35]") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 3238a2f9191b83864abd682261634a603ec89056 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 20 May 2014 22:29:21 +0200 Subject: Org reader: support for inline LaTeX Inline LaTeX is now accepted and parsed by the org-mode reader. Both, math symbols (like \tau) and LaTeX commands (like \cite{Coffee}), can be used without any further escaping. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + src/Text/Pandoc/Readers/Org.hs | 32 +++++++++++++++++++++++++++++++- tests/Tests/Readers/Org.hs | 27 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6f870318f..7fc587882 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -31,6 +31,7 @@ Conversion of LaTeX to 'Pandoc' document. module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, rawLaTeXBlock, + inlineCommand, handleIncludes ) where diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 86dda2732..c3ea8d7c2 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,12 +39,15 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString , updateLastStrPos ) +import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) +import Text.Parsec.Pos (updatePosString) +import Text.TeXMath (texMathToPandoc, DisplayType(..)) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow (first) -import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) +import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -886,6 +889,7 @@ inline = , verbatim , subscript , superscript + , inlineLaTeX , symbol ] <* (guard =<< newlinesCountWithinLimits) "inline" @@ -1351,3 +1355,29 @@ simpleSubOrSuperString = try $ , mappend <$> option [] ((:[]) <$> oneOf "+-") <*> many1 alphaNum ] + +inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX = try $ do + cmd <- inlineLaTeXCommand + maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd + where + parseAsMath :: String -> Maybe Inlines + parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs + + parseAsInlineLaTeX :: String -> Maybe Inlines + parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + + state :: ParserState + state = def{ stateOptions = def{ readerParseRaw = True }} + +maybeRight :: Either a b -> Maybe b +maybeRight = either (const Nothing) Just + +inlineLaTeXCommand :: OrgParser String +inlineLaTeXCommand = try $ do + rest <- getInput + pos <- getPosition + case runParser rawLaTeXInline def "source" rest of + Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest) + <* (setPosition $ updatePosString pos cs) + _ -> mzero diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ca97ba348..4ed77887f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -247,6 +247,33 @@ tests = , citationNoteNum = 0 , citationHash = 0} in (para $ cite [citation] "[see @item1 p. 34-35]") + + , "Inline LaTeX symbol" =: + "\\dots" =?> + para "…" + + , "Inline LaTeX command" =: + "\\textit{Emphasised}" =?> + para (emph "Emphasised") + + , "Inline LaTeX math symbol" =: + "\\tau" =?> + para (emph "τ") + + , "Unknown inline LaTeX command" =: + "\\notacommand{foo}" =?> + para (rawInline "latex" "\\notacommand{foo}") + + , "LaTeX citation" =: + "\\cite{Coffee}" =?> + let citation = Citation + { citationId = "Coffee" + , citationPrefix = [] + , citationSuffix = [] + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0} + in (para . cite [citation] $ rawInline "latex" "\\cite{Coffee}") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 2e80613451651ec8f1945daa7540168a427f0507 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 11:59:28 -0700 Subject: Markdown reader: inline math must have nonspace before final `$`. Closes #1313. --- src/Text/Pandoc/Parsing.hs | 10 ++++++---- tests/markdown-reader-more.native | 2 ++ tests/markdown-reader-more.txt | 4 ++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4cd6591c0..8bc042e28 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -464,11 +464,13 @@ mathInlineWith :: String -> String -> Parser [Char] st String mathInlineWith op cl = try $ do string op notFollowedBy space - words' <- many1Till (count 1 (noneOf "\n\\") + words' <- many1Till (count 1 (noneOf " \t\n\\") <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) - <|> count 1 newline <* notFollowedBy' blankline - *> return " ") - (try $ string cl) + <|> do (blankline <* notFollowedBy' blankline) <|> + (oneOf " \t" <* skipMany (oneOf " \t")) + notFollowedBy (char '$') + return " " + ) (try $ string cl) notFollowedBy digit -- to prevent capture of $5 return $ concat words' diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 0d74c233d..b4713bc93 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -16,6 +16,8 @@ ,Header 3 ("my-header",[],[]) [Str "my",Space,Str "header"] ,Header 2 ("in-math",[],[]) [Str "$",Space,Str "in",Space,Str "math"] ,Para [Math InlineMath "\\$2 + \\$3"] +,Para [Str "This",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "math:"] +,Para [Str "$PATH",Space,Str "90",Space,Str "$PATH"] ,Header 2 ("commented-out-list-item",[],[]) [Str "Commented-out",Space,Str "list",Space,Str "item"] ,BulletList [[Plain [Str "one",Space,RawInline (Format "html") ""]] diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index 739543bfd..4cd69c9d8 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -58,6 +58,10 @@ $\$2 + \$3$ +This should not be math: + +$PATH 90 $PATH + ## Commented-out list item - one -- cgit v1.2.3 From 0bd8d5f86b4733fdce89deb78471bbd7daa45f9e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 23:39:15 -0700 Subject: Updated tests for c8bc70a6bb12e0fbfadadb9480efafb3be4f11a5. --- tests/lhs-test.latex | 1 + tests/lhs-test.latex+lhs | 1 + 2 files changed, 2 insertions(+) (limited to 'tests') diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex index 0ed6640bd..6600608fe 100644 --- a/tests/lhs-test.latex +++ b/tests/lhs-test.latex @@ -63,6 +63,7 @@ \setlength{\emergencystretch}{3em} % prevent overfull lines \setcounter{secnumdepth}{0} +\date{} \begin{document} diff --git a/tests/lhs-test.latex+lhs b/tests/lhs-test.latex+lhs index 67841d54b..d6cb7c497 100644 --- a/tests/lhs-test.latex+lhs +++ b/tests/lhs-test.latex+lhs @@ -44,6 +44,7 @@ \setlength{\emergencystretch}{3em} % prevent overfull lines \setcounter{secnumdepth}{0} +\date{} \begin{document} -- cgit v1.2.3 From f928e4c8dce694c47e0cda35f82bfadd55c0e4b2 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 15 Jun 2014 14:55:17 -0400 Subject: Add DocX automated tests. Note this makes use of input and output files in the tests/ dir. --- tests/Tests/Readers/DocX.hs | 68 ++++++++++++++++++++++++++++ tests/docx.block_quotes.docx | Bin 0 -> 41855 bytes tests/docx.block_quotes_parse_indent.native | 8 ++++ tests/docx.headers.docx | Bin 0 -> 30298 bytes tests/docx.headers.native | 5 ++ tests/docx.image.docx | Bin 0 -> 109656 bytes tests/docx.image_no_embed.native | 2 + tests/docx.inline_formatting.docx | Bin 0 -> 32322 bytes tests/docx.inline_formatting.native | 5 ++ tests/docx.links.docx | Bin 0 -> 41751 bytes tests/docx.links.native | 6 +++ tests/docx.lists.docx | Bin 0 -> 31775 bytes tests/docx.lists.native | 18 ++++++++ tests/docx.notes.docx | Bin 0 -> 30734 bytes tests/docx.notes.native | 2 + tests/docx.tables.docx | Bin 0 -> 42792 bytes tests/docx.tables.native | 24 ++++++++++ tests/docx.unicode.docx | Bin 0 -> 13098 bytes tests/docx.unicode.native | 1 + 19 files changed, 139 insertions(+) create mode 100644 tests/Tests/Readers/DocX.hs create mode 100644 tests/docx.block_quotes.docx create mode 100644 tests/docx.block_quotes_parse_indent.native create mode 100644 tests/docx.headers.docx create mode 100644 tests/docx.headers.native create mode 100644 tests/docx.image.docx create mode 100644 tests/docx.image_no_embed.native create mode 100644 tests/docx.inline_formatting.docx create mode 100644 tests/docx.inline_formatting.native create mode 100644 tests/docx.links.docx create mode 100644 tests/docx.links.native create mode 100644 tests/docx.lists.docx create mode 100644 tests/docx.lists.native create mode 100644 tests/docx.notes.docx create mode 100644 tests/docx.notes.native create mode 100644 tests/docx.tables.docx create mode 100644 tests/docx.tables.native create mode 100644 tests/docx.unicode.docx create mode 100644 tests/docx.unicode.native (limited to 'tests') diff --git a/tests/Tests/Readers/DocX.hs b/tests/Tests/Readers/DocX.hs new file mode 100644 index 000000000..f4564ea1d --- /dev/null +++ b/tests/Tests/Readers/DocX.hs @@ -0,0 +1,68 @@ +module Tests.Readers.DocX (tests) where + +import Text.Pandoc.Options +import Text.Pandoc.Readers.Native +import Text.Pandoc.Definition +import Tests.Helpers +import Test.Framework +import qualified Data.ByteString.Lazy as B +import Text.Pandoc.Readers.DocX + +compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) +compareOutput docxFile nativeFile = do + df <- B.readFile docxFile + nf <- Prelude.readFile nativeFile + return $ (readDocX def df, readNative nf) + +testCompare' :: String -> FilePath -> FilePath -> IO Test +testCompare' name docxFile nativeFile = do + (dp, np) <- compareOutput docxFile nativeFile + return $ test id name (dp, np) + +testCompare :: String -> FilePath -> FilePath -> Test +testCompare name docxFile nativeFile = + buildTest $ testCompare' name docxFile nativeFile + + +tests :: [Test] +tests = [ testGroup "inlines" + [ testCompare + "font formatting" + "docx.inline_formatting.docx" + "docx.inline_formatting.native" + , testCompare + "hyperlinks" + "docx.links.docx" + "docx.links.native" + , testCompare + "inline image with reference output" + "docx.image.docx" + "docx.image_no_embed.native" + , testCompare + "handling unicode input" + "docx.unicode.docx" + "docx.unicode.native"] + , testGroup "blocks" + [ testCompare + "headers" + "docx.headers.docx" + "docx.headers.native" + , testCompare + "lists" + "docx.lists.docx" + "docx.lists.native" + , testCompare + "footnotes and endnotes" + "docx.notes.docx" + "docx.notes.native" + , testCompare + "blockquotes (parsing indent as blockquote)" + "docx.block_quotes.docx" + "docx.block_quotes_parse_indent.native" + , testCompare + "tables" + "docx.tables.docx" + "docx.tables.native" + ] + ] + diff --git a/tests/docx.block_quotes.docx b/tests/docx.block_quotes.docx new file mode 100644 index 000000000..729ae1f43 Binary files /dev/null and b/tests/docx.block_quotes.docx differ diff --git a/tests/docx.block_quotes_parse_indent.native b/tests/docx.block_quotes_parse_indent.native new file mode 100644 index 000000000..da1cef110 --- /dev/null +++ b/tests/docx.block_quotes_parse_indent.native @@ -0,0 +1,8 @@ +[Header 2 ("",[],[]) [Str "Some",Space,Str "block",Space,Str "quotes,",Space,Str "in",Space,Str "different",Space,Str "ways"] +,Para [Str "This",Space,Str "is",Space,Str "the",Space,Str "proper",Space,Str "way,",Space,Str "with",Space,Str "a",Space,Str "style"] +,BlockQuote + [Para [Str "I",Space,Str "don\8217t",Space,Str "know",Space,Str "why",Space,Str "this",Space,Str "would",Space,Str "be",Space,Str "in",Space,Str "italics,",Space,Str "but",Space,Str "so",Space,Str "it",Space,Str "appears",Space,Str "to",Space,Str "be",Space,Str "on",Space,Str "my",Space,Str "screen."]] +,Para [Str "And",Space,Str "this",Space,Str "is",Space,Str "the",Space,Str "way",Space,Str "that",Space,Str "most",Space,Str "people",Space,Str "do",Space,Str "it:"] +,BlockQuote + [Para [Str "I",Space,Str "just",Space,Str "indented",Space,Str "this,",Space,Str "so",Space,Str "it",Space,Str "looks",Space,Str "like",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "I",Space,Str "think",Space,Str "this",Space,Str "is",Space,Str "how",Space,Str "most",Space,Str "people",Space,Str "do",Space,Str "block",Space,Str "quotes",Space,Str "in",Space,Str "their",Space,Str "documents."]] +,Para [Str "And",Space,Str "back",Space,Str "to",Space,Str "the",Space,Str "normal",Space,Str "style."]] diff --git a/tests/docx.headers.docx b/tests/docx.headers.docx new file mode 100644 index 000000000..630b6bfc5 Binary files /dev/null and b/tests/docx.headers.docx differ diff --git a/tests/docx.headers.native b/tests/docx.headers.native new file mode 100644 index 000000000..e4d4a4781 --- /dev/null +++ b/tests/docx.headers.native @@ -0,0 +1,5 @@ +[Header 1 ("",[],[]) [Str "A",Space,Str "Test",Space,Str "of",Space,Str "Headers"] +,Header 2 ("",[],[]) [Str "Second",Space,Str "Level"] +,Para [Str "Some",Space,Str "plain",Space,Str "text."] +,Header 3 ("",[],[]) [Str "Third",Space,Str "level"] +,Para [Str "Some",Space,Str "more",Space,Str "plain",Space,Str "text."]] diff --git a/tests/docx.image.docx b/tests/docx.image.docx new file mode 100644 index 000000000..060f2b204 Binary files /dev/null and b/tests/docx.image.docx differ diff --git a/tests/docx.image_no_embed.native b/tests/docx.image_no_embed.native new file mode 100644 index 000000000..18debf135 --- /dev/null +++ b/tests/docx.image_no_embed.native @@ -0,0 +1,2 @@ +[Header 2 ("",[],[]) [Str "An",Space,Str "image"] +,Para [Image [] ("word/media/image1.jpeg","")]] diff --git a/tests/docx.inline_formatting.docx b/tests/docx.inline_formatting.docx new file mode 100644 index 000000000..eccf26425 Binary files /dev/null and b/tests/docx.inline_formatting.docx differ diff --git a/tests/docx.inline_formatting.native b/tests/docx.inline_formatting.native new file mode 100644 index 000000000..dc8a3d19a --- /dev/null +++ b/tests/docx.inline_formatting.native @@ -0,0 +1,5 @@ +[Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."] +,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."] +,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Span ("",[],[("underline","single")]) [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."] +,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."] +,Para [Str "A",Space,Str "line",LineBreak,Str "break."]] diff --git a/tests/docx.links.docx b/tests/docx.links.docx new file mode 100644 index 000000000..10ec62fd7 Binary files /dev/null and b/tests/docx.links.docx differ diff --git a/tests/docx.links.native b/tests/docx.links.native new file mode 100644 index 000000000..98768de5a --- /dev/null +++ b/tests/docx.links.native @@ -0,0 +1,6 @@ +[Header 2 ("",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] +,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."] +,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#_A_section_for",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] +,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."] +,Header 2 ("_A_section_for",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"] +,Para [Str "A",Space,Str "bookmark",Space,Str "right",Space,Span ("my_bookmark",["anchor"],[]) [],Str "here"]] diff --git a/tests/docx.lists.docx b/tests/docx.lists.docx new file mode 100644 index 000000000..bf7fd8ae4 Binary files /dev/null and b/tests/docx.lists.docx differ diff --git a/tests/docx.lists.native b/tests/docx.lists.native new file mode 100644 index 000000000..e46bc140b --- /dev/null +++ b/tests/docx.lists.native @@ -0,0 +1,18 @@ +[Header 2 ("",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"] +,OrderedList (1,Decimal,Period) + [[Para [Str "one"]] + ,[Para [Str "two"] + ,OrderedList (1,LowerAlpha,DefaultDelim) + [[Para [Str "a"]] + ,[Para [Str "b"]]]]] +,BulletList + [[Para [Str "one"]] + ,[Para [Str "two"] + ,BulletList + [[Para [Str "three"] + ,BulletList + [[Para [Str "four"] + ,Para [Str "Sub",Space,Str "paragraph"]]]]]] + ,[Para [Str "Same",Space,Str "list"]]] +,BulletList + [[Para [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]] diff --git a/tests/docx.notes.docx b/tests/docx.notes.docx new file mode 100644 index 000000000..eb6fa12d4 Binary files /dev/null and b/tests/docx.notes.docx differ diff --git a/tests/docx.notes.native b/tests/docx.notes.native new file mode 100644 index 000000000..1e9b6bba4 --- /dev/null +++ b/tests/docx.notes.native @@ -0,0 +1,2 @@ +[Header 2 ("",[],[]) [Str "A",Space,Str "footnote"] +,Para [Str "Test",Space,Str "footnote.",Note [Para [Space,Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Space,Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] diff --git a/tests/docx.tables.docx b/tests/docx.tables.docx new file mode 100644 index 000000000..7dcff8d35 Binary files /dev/null and b/tests/docx.tables.docx differ diff --git a/tests/docx.tables.native b/tests/docx.tables.native new file mode 100644 index 000000000..8dbaabda7 --- /dev/null +++ b/tests/docx.tables.native @@ -0,0 +1,24 @@ +[Header 2 ("",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] +,Table [] [AlignDefault,AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0,0.0] + [[Para [Str "Name"]] + ,[Para [Str "Game"]] + ,[Para [Str "Fame"]] + ,[Para [Str "Blame"]]] + [[[Para [Str "Lebron",Space,Str "James"]] + ,[Para [Str "Basketball"]] + ,[Para [Str "Very",Space,Str "High"]] + ,[Para [Str "Leaving",Space,Str "Cleveland"]]] + ,[[Para [Str "Ryan",Space,Str "Braun"]] + ,[Para [Str "Baseball"]] + ,[Para [Str "Moderate"]] + ,[Para [Str "Steroids"]]] + ,[[Para [Str "Russell",Space,Str "Wilson"]] + ,[Para [Str "Football"]] + ,[Para [Str "High"]] + ,[Para [Str "Tacky",Space,Str "uniform"]]]] +,Table [] [AlignDefault,AlignDefault] [0.0,0.0] + [] + [[[Para [Str "Sinple"]] + ,[Para [Str "Table"]]] + ,[[Para [Str "Without"]] + ,[Para [Str "Header"]]]]] diff --git a/tests/docx.unicode.docx b/tests/docx.unicode.docx new file mode 100644 index 000000000..78d0107a1 Binary files /dev/null and b/tests/docx.unicode.docx differ diff --git a/tests/docx.unicode.native b/tests/docx.unicode.native new file mode 100644 index 000000000..e636355c7 --- /dev/null +++ b/tests/docx.unicode.native @@ -0,0 +1 @@ +[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10."]] -- cgit v1.2.3 From 6b939134e72ca736f6d1f95527c1a7cefb2c0837 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 15 Jun 2014 15:00:04 -0400 Subject: Add DocX tests to test-pandoc.hs --- tests/test-pandoc.hs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tests') diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 80d672589..9f9d85147 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -9,6 +9,7 @@ import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown import qualified Tests.Readers.Org import qualified Tests.Readers.RST +import qualified Tests.Readers.DocX import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX import qualified Tests.Writers.HTML @@ -38,6 +39,8 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Markdown" Tests.Readers.Markdown.tests , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests + , testGroup "DocX" Tests.Readers.DocX.tests + ] ] -- cgit v1.2.3 From 9090c549aab29552ee29e88ab2d38995a42a421e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 15:12:10 -0700 Subject: Added failing HTML reader test for table. See #1341. --- tests/html-reader.html | 20 +++++++++++++++++++- tests/html-reader.native | 13 ++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/html-reader.html b/tests/html-reader.html index 1e104b00f..d059d7b4b 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -431,6 +431,24 @@ An e-mail address: nobody [at] nowhere.net Trailing space text text Leading spaces Trailing spaces text - +Tables + First + + + + + + + + + + + + + + + +X Y Z 1 2 3 4 5 6 + diff --git a/tests/html-reader.native b/tests/html-reader.native index 8fbecf34f..85866aef1 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -308,4 +308,15 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Str "text",Space,Emph [Str "Leading",Space,Str "space"]] ,Para [Emph [Str "Trailing",Space,Str "space"],Space,Str "text"] ,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]] -,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"]] +,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"] +,Header 1 ("",[],[]) [Str "Tables"] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.3333333333333333,0.3333333333333333,0.3333333333333333] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]]] -- cgit v1.2.3 From e7d6b2e6ad26d90b1c07c57a6ad6a43064c52a28 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 15:15:35 -0700 Subject: Updated HTML reader table test - should be simple table. --- tests/html-reader.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/html-reader.native b/tests/html-reader.native index 85866aef1..c6ed36910 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -310,7 +310,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]] ,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"] ,Header 1 ("",[],[]) [Str "Tables"] -,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.3333333333333333,0.3333333333333333,0.3333333333333333] +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [[Plain [Str "X"]] ,[Plain [Str "Y"]] ,[Plain [Str "Z"]]] -- cgit v1.2.3 From 78ee2416d105bd25337819a49835623a8a296224 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 22:03:26 -0700 Subject: Org reader: make tildes create inline code. Closes #1345. Also relabeled 'code' and 'verbatim' parsers to accord with the org-mode manual. I'm not sure what the distinction between code and verbatim is supposed to be, but I'm pretty sure both should be represented as Code inlines in pandoc. The previous behavior resulted in the text not appearing in any output format. --- src/Text/Pandoc/Readers/Org.hs | 8 ++++---- tests/Tests/Readers/Org.hs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e872abf0..7a35e2ca0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1148,11 +1148,11 @@ strikeout = fmap B.strikeout <$> emphasisBetween '+' underline :: OrgParser (F Inlines) underline = fmap B.strong <$> emphasisBetween '_' -code :: OrgParser (F Inlines) -code = return . B.code <$> verbatimBetween '=' - verbatim :: OrgParser (F Inlines) -verbatim = return . B.rawInline "" <$> verbatimBetween '~' +verbatim = return . B.code <$> verbatimBetween '=' + +code :: OrgParser (F Inlines) +code = return . B.code <$> verbatimBetween '~' subscript :: OrgParser (F Inlines) subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4ed77887f..f8240ca3d 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -50,13 +50,13 @@ tests = "+Kill Bill+" =?> para (strikeout . spcSep $ [ "Kill", "Bill" ]) - , "Code" =: + , "Verbatim" =: "=Robot.rock()=" =?> para (code "Robot.rock()") - , "Verbatim" =: + , "Code" =: "~word for word~" =?> - para (rawInline "" "word for word") + para (code "word for word") , "Math $..$" =: "$E=mc^2$" =?> -- cgit v1.2.3 From bbe99003f8d25dc65ab12851907ecd5d9aad746c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 22:44:40 -0700 Subject: Naming: Use Docx instead of DocX. For consistency with the existing writer. --- pandoc.cabal | 8 +- src/Text/Pandoc.hs | 6 +- src/Text/Pandoc/Readers/DocX.hs | 479 --------------------------- src/Text/Pandoc/Readers/DocX/Lists.hs | 208 ------------ src/Text/Pandoc/Readers/DocX/Parse.hs | 604 ---------------------------------- src/Text/Pandoc/Readers/Docx.hs | 479 +++++++++++++++++++++++++++ src/Text/Pandoc/Readers/Docx/Lists.hs | 208 ++++++++++++ src/Text/Pandoc/Readers/Docx/Parse.hs | 604 ++++++++++++++++++++++++++++++++++ tests/Tests/Readers/DocX.hs | 68 ---- tests/Tests/Readers/Docx.hs | 68 ++++ tests/test-pandoc.hs | 4 +- 11 files changed, 1368 insertions(+), 1368 deletions(-) delete mode 100644 src/Text/Pandoc/Readers/DocX.hs delete mode 100644 src/Text/Pandoc/Readers/DocX/Lists.hs delete mode 100644 src/Text/Pandoc/Readers/DocX/Parse.hs create mode 100644 src/Text/Pandoc/Readers/Docx.hs create mode 100644 src/Text/Pandoc/Readers/Docx/Lists.hs create mode 100644 src/Text/Pandoc/Readers/Docx/Parse.hs delete mode 100644 tests/Tests/Readers/DocX.hs create mode 100644 tests/Tests/Readers/Docx.hs (limited to 'tests') diff --git a/pandoc.cabal b/pandoc.cabal index a6126a331..5898af5ad 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -293,7 +293,7 @@ Library Text.Pandoc.Readers.Textile, Text.Pandoc.Readers.Native, Text.Pandoc.Readers.Haddock, - Text.Pandoc.Readers.DocX, + Text.Pandoc.Readers.Docx, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.OPML, @@ -324,8 +324,8 @@ Library Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Haddock.Lex, Text.Pandoc.Readers.Haddock.Parse, - Text.Pandoc.Readers.DocX.Lists, - Text.Pandoc.Readers.DocX.Parse, + Text.Pandoc.Readers.Docx.Lists, + Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, @@ -411,7 +411,7 @@ Test-Suite test-pandoc Tests.Readers.Markdown Tests.Readers.Org Tests.Readers.RST - Tests.Readers.DocX + Tests.Readers.Docx Tests.Writers.Native Tests.Writers.ConTeXt Tests.Writers.HTML diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index aff471a3c..45c2f453b 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -63,7 +63,7 @@ module Text.Pandoc , writers -- * Readers: converting /to/ Pandoc format , Reader (..) - , readDocX + , readDocx , readMarkdown , readMediaWiki , readRST @@ -127,7 +127,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock -import Text.Pandoc.Readers.DocX +import Text.Pandoc.Readers.Docx import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST @@ -222,7 +222,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("html" , mkStringReader readHtml) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) - ,("docx" , mkBSReader readDocX) + ,("docx" , mkBSReader readDocx) ] data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) diff --git a/src/Text/Pandoc/Readers/DocX.hs b/src/Text/Pandoc/Readers/DocX.hs deleted file mode 100644 index 976e2e271..000000000 --- a/src/Text/Pandoc/Readers/DocX.hs +++ /dev/null @@ -1,479 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Conversion of DocX type (defined in Text.Pandoc.Readers.DocX.Parse) -to 'Pandoc' document. -} - -{- -Current state of implementation of DocX entities ([x] means -implemented, [-] means partially implemented): - -* Blocks - - - [X] Para - - [X] CodeBlock (styled with `SourceCode`) - - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, - indented) - - [X] OrderedList - - [X] BulletList - - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) - - [X] Header (styled with `Heading#`) - - [ ] HorizontalRule - - [-] Table (column widths and alignments not yet implemented) - -* Inlines - - - [X] Str - - [X] Emph (From italics. `underline` currently read as span. In - future, it might optionally be emph as well) - - [X] Strong - - [X] Strikeout - - [X] Superscript - - [X] Subscript - - [X] SmallCaps - - [ ] Quoted - - [ ] Cite - - [X] Code (styled with `VerbatimChar`) - - [X] Space - - [X] LineBreak (these are invisible in Word: entered with Shift-Return) - - [ ] Math - - [X] Link (links to an arbitrary bookmark create a span with the target as - id and "anchor" class) - - [-] Image (Links to path in archive. Future option for - data-encoded URI likely.) - - [X] Note (Footnotes and Endnotes are silently combined.) --} - -module Text.Pandoc.Readers.DocX - ( readDocX - ) where - -import Codec.Archive.Zip -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Builder (text, toList) -import Text.Pandoc.Generic (bottomUp) -import Text.Pandoc.MIME (getMimeType) -import Text.Pandoc.UTF8 (toString) -import Text.Pandoc.Readers.DocX.Parse -import Text.Pandoc.Readers.DocX.Lists -import Data.Maybe (mapMaybe, isJust, fromJust) -import Data.List (delete, isPrefixOf, (\\), intersect) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Base64 (encode) -import System.FilePath (combine) - -readDocX :: ReaderOptions - -> B.ByteString - -> Pandoc -readDocX opts bytes = - case archiveToDocX (toArchive bytes) of - Just docx -> Pandoc nullMeta (docxToBlocks opts docx) - Nothing -> error $ "couldn't parse docx file" - -runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) -runStyleToSpanAttr rPr = ("", - mapMaybe id [ - if isBold rPr then (Just "strong") else Nothing, - if isItalic rPr then (Just "emph") else Nothing, - if isSmallCaps rPr then (Just "smallcaps") else Nothing, - if isStrike rPr then (Just "strike") else Nothing, - if isSuperScript rPr then (Just "superscript") else Nothing, - if isSubScript rPr then (Just "subscript") else Nothing, - rStyle rPr], - case underline rPr of - Just fmt -> [("underline", fmt)] - _ -> [] - ) - -parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) -parStyleToDivAttr pPr = ("", - pStyle pPr, - case indent pPr of - Just n -> [("indent", (show n))] - Nothing -> [] - ) - -strToInlines :: String -> [Inline] -strToInlines = toList . text - -codeSpans :: [String] -codeSpans = ["VerbatimChar"] - -blockQuoteDivs :: [String] -blockQuoteDivs = ["Quote", "BlockQuote"] - -codeDivs :: [String] -codeDivs = ["SourceCode"] - -runElemToInlines :: RunElem -> [Inline] -runElemToInlines (TextRun s) = strToInlines s -runElemToInlines (LnBrk) = [LineBreak] - -runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString (LnBrk) = ['\n'] - -runElemsToString :: [RunElem] -> String -runElemsToString = concatMap runElemToString - -strNormalize :: [Inline] -> [Inline] -strNormalize [] = [] -strNormalize (Str "" : ils) = strNormalize ils -strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) -strNormalize (il:ils) = il : (strNormalize ils) - -runToInlines :: ReaderOptions -> DocX -> Run -> [Inline] -runToInlines _ _ (Run rs runElems) - | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = - case runStyleToSpanAttr rs == ("", [], []) of - True -> [Str (runElemsToString runElems)] - False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] - | otherwise = case runStyleToSpanAttr rs == ("", [], []) of - True -> concatMap runElemToInlines runElems - False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] -runToInlines opts docx@(DocX _ notes _ _ _ ) (Footnote fnId) = - case (getFootNote fnId notes) of - Just bodyParts -> - [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] - Nothing -> - [Note [Div ("", ["footnote"], []) []]] -runToInlines opts docx@(DocX _ notes _ _ _) (Endnote fnId) = - case (getEndNote fnId notes) of - Just bodyParts -> - [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] - Nothing -> - [Note [Div ("", ["endnote"], []) []]] - -parPartToInlines :: ReaderOptions -> DocX -> ParPart -> [Inline] -parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines _ _ (BookMark _ anchor) = - [Span (anchor, ["anchor"], []) []] -parPartToInlines _ (DocX _ _ _ rels _) (Drawing relid) = - case lookupRelationship relid rels of - Just target -> [Image [] (combine "word" target, "")] - Nothing -> [Image [] ("", "")] -parPartToInlines opts docx (InternalHyperLink anchor runs) = - [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] -parPartToInlines opts docx@(DocX _ _ _ rels _) (ExternalHyperLink relid runs) = - case lookupRelationship relid rels of - Just target -> - [Link (concatMap (runToInlines opts docx) runs) (target, "")] - Nothing -> - [Link (concatMap (runToInlines opts docx) runs) ("", "")] - -isAnchorSpan :: Inline -> Bool -isAnchorSpan (Span (ident, classes, kvs) ils) = - (not . null) ident && - classes == ["anchor"] && - null kvs && - null ils -isAnchorSpan _ = False - -dummyAnchors :: [String] -dummyAnchors = ["_GoBack"] - -makeHeaderAnchors :: Block -> Block -makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = - case filter isAnchorSpan ils of - [] -> h - (x@(Span (ident, _, _) _) : xs) -> - case ident `elem` dummyAnchors of - True -> h - False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) - _ -> h -makeHeaderAnchors blk = blk - - -parPartsToInlines :: ReaderOptions -> DocX -> [ParPart] -> [Inline] -parPartsToInlines opts docx parparts = - -- - -- We're going to skip data-uri's for now. It should be an option, - -- not mandatory. - -- - --bottomUp (makeImagesSelfContained docx) $ - bottomUp spanCorrect $ - bottomUp spanTrim $ - bottomUp spanReduce $ - concatMap (parPartToInlines opts docx) parparts - -cellToBlocks :: ReaderOptions -> DocX -> Cell -> [Block] -cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps - -rowToBlocksList :: ReaderOptions -> DocX -> Row -> [[Block]] -rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells - -bodyPartToBlock :: ReaderOptions -> DocX -> BodyPart -> Block -bodyPartToBlock opts docx (Paragraph pPr parparts) = - Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] -bodyPartToBlock opts docx@(DocX _ _ numbering _ _) (ListItem pPr numId lvl parparts) = - let - kvs = case lookupLevel numId lvl numbering of - Just (_, fmt, txt, Just start) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - , ("start", (show start)) - ] - - Just (_, fmt, txt, Nothing) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - ] - Nothing -> [] - in - Div - ("", ["list-item"], kvs) - [bodyPartToBlock opts docx (Paragraph pPr parparts)] -bodyPartToBlock _ _ (Tbl _ _ _ []) = - Para [] -bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = - let caption = strToInlines cap - (hdr, rows) = case firstRowFormatting look of - True -> (Just r, rs) - False -> (Nothing, r:rs) - hdrCells = case hdr of - Just r' -> rowToBlocksList opts docx r' - Nothing -> [] - cells = map (rowToBlocksList opts docx) rows - - size = case null hdrCells of - True -> length $ head cells - False -> length $ hdrCells - -- - -- The two following variables (horizontal column alignment and - -- relative column widths) go to the default at the - -- moment. Width information is in the TblGrid field of the Tbl, - -- so should be possible. Alignment might be more difficult, - -- since there doesn't seem to be a column entity in docx. - alignments = take size (repeat AlignDefault) - widths = take size (repeat 0) :: [Double] - in - Table caption alignments widths hdrCells cells - -makeImagesSelfContained :: DocX -> Inline -> Inline -makeImagesSelfContained (DocX _ _ _ _ media) i@(Image alt (uri, title)) = - case lookup uri media of - Just bs -> case getMimeType uri of - Just mime -> let data_uri = - "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) - in - Image alt (data_uri, title) - Nothing -> i - Nothing -> i -makeImagesSelfContained _ inline = inline - -bodyToBlocks :: ReaderOptions -> DocX -> Body -> [Block] -bodyToBlocks opts docx (Body bps) = - bottomUp removeEmptyPars $ - bottomUp strNormalize $ - bottomUp spanRemove $ - bottomUp divRemove $ - map (makeHeaderAnchors) $ - bottomUp divCorrect $ - bottomUp divReduce $ - bottomUp divCorrectPreReduce $ - bottomUp blocksToDefinitions $ - blocksToBullets $ - map (bodyPartToBlock opts docx) bps - -docxToBlocks :: ReaderOptions -> DocX -> [Block] -docxToBlocks opts d@(DocX (Document _ body) _ _ _ _) = bodyToBlocks opts d body - -spanReduce :: [Inline] -> [Inline] -spanReduce [] = [] -spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) - | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) -spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : - s2@(Span (id2, classes2, kvs2) ils2) : - ils) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> s1 : (spanReduce (s2 : ils)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : - ils) -spanReduce (il:ils) = il : (spanReduce ils) - -ilToCode :: Inline -> String -ilToCode (Str s) = s -ilToCode _ = "" - -spanRemove' :: Inline -> [Inline] -spanRemove' s@(Span (ident, classes, _) []) - -- "_GoBack" is automatically inserted. We don't want to keep it. - | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] -spanRemove' (Span (_, _, kvs) ils) = - case lookup "underline" kvs of - Just val -> [Span ("", [], [("underline", val)]) ils] - Nothing -> ils -spanRemove' il = [il] - -spanRemove :: [Inline] -> [Inline] -spanRemove = concatMap spanRemove' - -spanTrim' :: Inline -> [Inline] -spanTrim' il@(Span _ []) = [il] -spanTrim' il@(Span attr (il':[])) - | il' == Space = [Span attr [], Space] - | otherwise = [il] -spanTrim' (Span attr ils) - | head ils == Space && last ils == Space = - [Space, Span attr (init $ tail ils), Space] - | head ils == Space = [Space, Span attr (tail ils)] - | last ils == Space = [Span attr (init ils), Space] -spanTrim' il = [il] - -spanTrim :: [Inline] -> [Inline] -spanTrim = concatMap spanTrim' - -spanCorrect' :: Inline -> [Inline] -spanCorrect' (Span ("", [], []) ils) = ils -spanCorrect' (Span (ident, classes, kvs) ils) - | "emph" `elem` classes = - [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] - | "strong" `elem` classes = - [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] - | "smallcaps" `elem` classes = - [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] - | "strike" `elem` classes = - [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] - | "superscript" `elem` classes = - [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] - | "subscript" `elem` classes = - [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] - | (not . null) (codeSpans `intersect` classes) = - [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] - | otherwise = - [Span (ident, classes, kvs) ils] -spanCorrect' il = [il] - -spanCorrect :: [Inline] -> [Inline] -spanCorrect = concatMap spanCorrect' - -removeEmptyPars :: [Block] -> [Block] -removeEmptyPars blks = filter (\b -> b /= (Para [])) blks - -divReduce :: [Block] -> [Block] -divReduce [] = [] -divReduce ((Div (id1, classes1, kvs1) blks1) : blks) - | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) -divReduce (d1@(Div (id1, classes1, kvs1) blks1) : - d2@(Div (id2, classes2, kvs2) blks2) : - blks) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> d1 : (divReduce (d2 : blks)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : - blks) -divReduce (blk:blks) = blk : (divReduce blks) - -isHeaderClass :: String -> Maybe Int -isHeaderClass s | "Heading" `isPrefixOf` s = - case reads (drop (length "Heading") s) :: [(Int, String)] of - [] -> Nothing - ((n, "") : []) -> Just n - _ -> Nothing -isHeaderClass _ = Nothing - -findHeaderClass :: [String] -> Maybe Int -findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of - [] -> Nothing - n : _ -> Just n - -blksToInlines :: [Block] -> [Inline] -blksToInlines (Para ils : _) = ils -blksToInlines (Plain ils : _) = ils -blksToInlines _ = [] - -divCorrectPreReduce' :: Block -> [Block] -divCorrectPreReduce' (Div (ident, classes, kvs) blks) - | isJust $ findHeaderClass classes = - let n = fromJust $ findHeaderClass classes - in - [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] - | otherwise = [Div (ident, classes, kvs) blks] -divCorrectPreReduce' blk = [blk] - -divCorrectPreReduce :: [Block] -> [Block] -divCorrectPreReduce = concatMap divCorrectPreReduce' - -blkToCode :: Block -> String -blkToCode (Para []) = "" -blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) -blkToCode (Para ((Span (_, classes, _) ils'): ils)) - | (not . null) (codeSpans `intersect` classes) = - (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) -blkToCode _ = "" - -divRemove' :: Block -> [Block] -divRemove' (Div (_, _, kvs) blks) = - case lookup "indent" kvs of - Just val -> [Div ("", [], [("indent", val)]) blks] - Nothing -> blks -divRemove' blk = [blk] - -divRemove :: [Block] -> [Block] -divRemove = concatMap divRemove' - -divCorrect' :: Block -> [Block] -divCorrect' b@(Div (ident, classes, kvs) blks) - | (not . null) (blockQuoteDivs `intersect` classes) = - [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] - | (not . null) (codeDivs `intersect` classes) = - [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] - | otherwise = - case lookup "indent" kvs of - Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] - Just _ -> - [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] - Nothing -> [b] -divCorrect' blk = [blk] - -divCorrect :: [Block] -> [Block] -divCorrect = concatMap divCorrect' diff --git a/src/Text/Pandoc/Readers/DocX/Lists.hs b/src/Text/Pandoc/Readers/DocX/Lists.hs deleted file mode 100644 index b20679261..000000000 --- a/src/Text/Pandoc/Readers/DocX/Lists.hs +++ /dev/null @@ -1,208 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX.Lists - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Functions for converting flat DocX paragraphs into nested lists. --} - -module Text.Pandoc.Readers.DocX.Lists ( blocksToBullets - , blocksToDefinitions) where - -import Text.Pandoc.JSON -import Text.Pandoc.Shared (trim) -import Control.Monad -import Data.List -import Data.Maybe - -isListItem :: Block -> Bool -isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True -isListItem _ = False - -getLevel :: Block -> Maybe Integer -getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs -getLevel _ = Nothing - -getLevelN :: Block -> Integer -getLevelN b = case getLevel b of - Just n -> n - Nothing -> -1 - -getNumId :: Block -> Maybe Integer -getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs -getNumId _ = Nothing - -getNumIdN :: Block -> Integer -getNumIdN b = case getNumId b of - Just n -> n - Nothing -> -1 - -getText :: Block -> Maybe String -getText (Div (_, _, kvs) _) = lookup "text" kvs -getText _ = Nothing - -data ListType = Itemized | Enumerated ListAttributes - -listStyleMap :: [(String, ListNumberStyle)] -listStyleMap = [("upperLetter", UpperAlpha), - ("lowerLetter", LowerAlpha), - ("upperRoman", UpperRoman), - ("lowerRoman", LowerRoman), - ("decimal", Decimal)] - -listDelimMap :: [(String, ListNumberDelim)] -listDelimMap = [("%1)", OneParen), - ("(%1)", TwoParens), - ("%1.", Period)] - -getListType :: Block -> Maybe ListType -getListType b@(Div (_, _, kvs) _) | isListItem b = - let - start = lookup "start" kvs - frmt = lookup "format" kvs - txt = lookup "text" kvs - in - case frmt of - Just "bullet" -> Just Itemized - Just f -> - case txt of - Just t -> Just $ Enumerated ( - read (fromMaybe "1" start) :: Int, - fromMaybe DefaultStyle (lookup f listStyleMap), - fromMaybe DefaultDelim (lookup t listDelimMap)) - Nothing -> Nothing - _ -> Nothing -getListType _ = Nothing - -listParagraphDivs :: [String] -listParagraphDivs = ["ListParagraph"] - --- This is a first stab at going through and attaching meaning to list --- paragraphs, without an item marker, following a list item. We --- assume that these are paragraphs in the same item. - -handleListParagraphs :: [Block] -> [Block] -handleListParagraphs [] = [] -handleListParagraphs ( - (Div attr1@(_, classes1, _) blks1) : - (Div (ident2, classes2, kvs2) blks2) : - blks - ) | "list-item" `elem` classes1 && - not ("list-item" `elem` classes2) && - (not . null) (listParagraphDivs `intersect` classes2) = - -- We don't want to keep this indent. - let newDiv2 = - (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) - in - handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) -handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) - -separateBlocks' :: Block -> [[Block]] -> [[Block]] -separateBlocks' blk ([] : []) = [[blk]] -separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] --- The following is for the invisible bullet lists. This is how --- pandoc-generated ooxml does multiparagraph item lists. -separateBlocks' b acc | liftM trim (getText b) == Just "" = - (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b acc = acc ++ [[b]] - -separateBlocks :: [Block] -> [[Block]] -separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) - -flatToBullets' :: Integer -> [Block] -> [Block] -flatToBullets' _ [] = [] -flatToBullets' num xs@(b : elems) - | getLevelN b == num = b : (flatToBullets' num elems) - | otherwise = - let bNumId = getNumIdN b - bLevel = getLevelN b - (children, remaining) = - span - (\b' -> - ((getLevelN b') > bLevel || - ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) - xs - in - case getListType b of - Just (Enumerated attr) -> - (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) - _ -> - (BulletList (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) - -flatToBullets :: [Block] -> [Block] -flatToBullets elems = flatToBullets' (-1) elems - -blocksToBullets :: [Block] -> [Block] -blocksToBullets blks = - -- bottomUp removeListItemDivs $ - flatToBullets $ (handleListParagraphs blks) - - -plainParaInlines :: Block -> [Inline] -plainParaInlines (Plain ils) = ils -plainParaInlines (Para ils) = ils -plainParaInlines _ = [] - -blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] -blocksToDefinitions' [] acc [] = reverse acc -blocksToDefinitions' defAcc acc [] = - reverse $ (DefinitionList (reverse defAcc)) : acc -blocksToDefinitions' defAcc acc - ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) - | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = - let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - pair = case remainingAttr2 == ("", [], []) of - True -> (concatMap plainParaInlines blks1, [blks2]) - False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) - in - blocksToDefinitions' (pair : defAcc) acc blks -blocksToDefinitions' defAcc acc - ((Div (ident2, classes2, kvs2) blks2) : blks) - | (not . null) defAcc && "Definition" `elem` classes2 = - let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - defItems2 = case remainingAttr2 == ("", [], []) of - True -> blks2 - False -> [Div remainingAttr2 blks2] - ((defTerm, defItems):defs) = defAcc - defAcc' = case null defItems of - True -> (defTerm, [defItems2]) : defs - False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs - in - blocksToDefinitions' defAcc' acc blks -blocksToDefinitions' [] acc (b:blks) = - blocksToDefinitions' [] (b:acc) blks -blocksToDefinitions' defAcc acc (b:blks) = - blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks - - -blocksToDefinitions :: [Block] -> [Block] -blocksToDefinitions = blocksToDefinitions' [] [] - - - - diff --git a/src/Text/Pandoc/Readers/DocX/Parse.hs b/src/Text/Pandoc/Readers/DocX/Parse.hs deleted file mode 100644 index d7033d9e8..000000000 --- a/src/Text/Pandoc/Readers/DocX/Parse.hs +++ /dev/null @@ -1,604 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX.Parse - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Conversion of DocX archive into DocX haskell type --} - - -module Text.Pandoc.Readers.DocX.Parse ( DocX(..) - , Document(..) - , Body(..) - , BodyPart(..) - , TblLook(..) - , ParPart(..) - , Run(..) - , RunElem(..) - , Notes - , Numbering - , Relationship - , Media - , RunStyle(..) - , ParagraphStyle(..) - , Row(..) - , Cell(..) - , getFootNote - , getEndNote - , lookupLevel - , lookupRelationship - , archiveToDocX - ) where -import Codec.Archive.Zip -import Text.XML.Light -import Data.Maybe -import Data.List -import System.FilePath -import Data.Bits ((.|.)) -import qualified Data.ByteString.Lazy as B -import qualified Text.Pandoc.UTF8 as UTF8 - -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - - -type NameSpaces = [(String, String)] - -data DocX = DocX Document Notes Numbering [Relationship] Media - deriving Show - -archiveToDocX :: Archive -> Maybe DocX -archiveToDocX archive = do - let notes = archiveToNotes archive - rels = archiveToRelationships archive - media = archiveToMedia archive - doc <- archiveToDocument archive - numbering <- archiveToNumbering archive - return $ DocX doc notes numbering rels media - -data Document = Document NameSpaces Body - deriving Show - -archiveToDocument :: Archive -> Maybe Document -archiveToDocument zf = do - entry <- findEntryByPath "word/document.xml" zf - docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) - bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem - body <- elemToBody namespaces bodyElem - return $ Document namespaces body - -type Media = [(FilePath, B.ByteString)] - -filePathIsMedia :: FilePath -> Bool -filePathIsMedia fp = - let (dir, _) = splitFileName fp - in - (dir == "word/media/") - -getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) -getMediaPair zf fp = - case findEntryByPath fp zf of - Just e -> Just (fp, fromEntry e) - Nothing -> Nothing - -archiveToMedia :: Archive -> Media -archiveToMedia zf = - mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) - -data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] - deriving Show - -data Numb = Numb String String -- right now, only a key to an abstract num - deriving Show - -data AbstractNumb = AbstractNumb String [Level] - deriving Show - --- (ilvl, format, string, start) -type Level = (String, String, String, Maybe Integer) - -lookupLevel :: String -> String -> Numbering -> Maybe Level -lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do - absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs - lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs - lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls - return lvl - -numElemToNum :: NameSpaces -> Element -> Maybe Numb -numElemToNum ns element | - qName (elName element) == "num" && - qURI (elName element) == (lookup "w" ns) = do - numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element - absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - return $ Numb numId absNumId -numElemToNum _ _ = Nothing - -absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb -absNumElemToAbsNum ns element | - qName (elName element) == "abstractNum" && - qURI (elName element) == (lookup "w" ns) = do - absNumId <- findAttr - (QName "abstractNumId" (lookup "w" ns) (Just "w")) - element - let levelElems = findChildren - (QName "lvl" (lookup "w" ns) (Just "w")) - element - levels = mapMaybe id $ map (levelElemToLevel ns) levelElems - return $ AbstractNumb absNumId levels -absNumElemToAbsNum _ _ = Nothing - -levelElemToLevel :: NameSpaces -> Element -> Maybe Level -levelElemToLevel ns element | - qName (elName element) == "lvl" && - qURI (elName element) == (lookup "w" ns) = do - ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element - fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) - return (ilvl, fmt, txt, start) -levelElemToLevel _ _ = Nothing - -archiveToNumbering :: Archive -> Maybe Numbering -archiveToNumbering zf = - case findEntryByPath "word/numbering.xml" zf of - Nothing -> Just $ Numbering [] [] [] - Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) - numElems = findChildren - (QName "num" (lookup "w" namespaces) (Just "w")) - numberingElem - absNumElems = findChildren - (QName "abstractNum" (lookup "w" namespaces) (Just "w")) - numberingElem - nums = mapMaybe id $ map (numElemToNum namespaces) numElems - absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems - return $ Numbering namespaces nums absNums - -data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) - deriving Show - -noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) -noteElemToNote ns element - | qName (elName element) `elem` ["endnote", "footnote"] && - qURI (elName element) == (lookup "w" ns) = - do - noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - let bps = map fromJust - $ filter isJust - $ map (elemToBodyPart ns) - $ filterChildrenName (isParOrTbl ns) element - return $ (noteId, bps) -noteElemToNote _ _ = Nothing - -getFootNote :: String -> Notes -> Maybe [BodyPart] -getFootNote s (Notes _ fns _) = fns >>= (lookup s) - -getEndNote :: String -> Notes -> Maybe [BodyPart] -getEndNote s (Notes _ _ ens) = ens >>= (lookup s) - -elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] -elemToNotes ns notetype element - | qName (elName element) == (notetype ++ "s") && - qURI (elName element) == (lookup "w" ns) = - Just $ map fromJust - $ filter isJust - $ map (noteElemToNote ns) - $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element -elemToNotes _ _ _ = Nothing - -archiveToNotes :: Archive -> Notes -archiveToNotes zf = - let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) - Nothing -> [] - en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) - Nothing -> [] - ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= (elemToNotes ns "footnote") - en = enElem >>= (elemToNotes ns "endnote") - in - Notes ns fn en - - -data Relationship = Relationship (RelId, Target) - deriving Show - -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) - -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") - -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = - do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - -archiveToRelationships :: Archive -> [Relationship] -archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths - relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems - in - rels - -data Body = Body [BodyPart] - deriving Show - -isParOrTbl :: NameSpaces -> QName -> Bool -isParOrTbl ns q = qName q `elem` ["p", "tbl"] && - qURI q == (lookup "w" ns) - -elemToBody :: NameSpaces -> Element -> Maybe Body -elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = - Just $ Body - $ map fromJust - $ filter isJust - $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element -elemToBody _ _ = Nothing - -isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool -isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && - qURI q == (lookup "w" ns) - -elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) -elemToNumInfo ns element - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) = - do - pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element - numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr - lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - return (numId, lvl) -elemToNumInfo _ _ = Nothing - --- isBookMarkTag :: NameSpaces -> QName -> Bool --- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && --- qURI q == (lookup "w" ns) - --- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark --- parChildrenToBookmark ns (bms : bme : _) --- | qName (elName bms) == "bookmarkStart" && --- qURI (elName bms) == (lookup "w" ns) && --- qName (elName bme) == "bookmarkEnd" && --- qURI (elName bme) == (lookup "w" ns) = do --- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms --- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms --- return $ (bmId, bmName) --- parChildrenToBookmark _ _ = Nothing - -elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart -elemToBodyPart ns element - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) = - let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe id - $ map (elemToParPart ns) - $ filterChildrenName (isRunOrLinkOrBookmark ns) element - in - case elemToNumInfo ns element of - Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts - Nothing -> Just $ Paragraph parstyle parparts - | qName (elName element) == "tbl" && - qURI (elName element) == (lookup "w" ns) = - let - caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element - >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - grid = case - findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element - of - Just g -> elemToTblGrid ns g - Nothing -> [] - tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element - >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) - >>= elemToTblLook ns - in - Just $ Tbl - (fromMaybe "" caption) - grid - (fromMaybe defaultTblLook tblLook) - (mapMaybe (elemToRow ns) (elChildren element)) - | otherwise = Nothing - -elemToTblLook :: NameSpaces -> Element -> Maybe TblLook -elemToTblLook ns element - | qName (elName element) == "tblLook" && - qURI (elName element) == (lookup "w" ns) = - let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element - val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element - firstRowFmt = - case firstRow of - Just "1" -> True - Just _ -> False - Nothing -> case val of - Just bitMask -> testBitMask bitMask 0x020 - Nothing -> False - in - Just $ TblLook{firstRowFormatting = firstRowFmt} -elemToTblLook _ _ = Nothing - -testBitMask :: String -> Int -> Bool -testBitMask bitMaskS n = - case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of - [] -> False - ((n', _) : _) -> ((n' .|. n) /= 0) - -data ParagraphStyle = ParagraphStyle { pStyle :: [String] - , indent :: Maybe Integer - } - deriving Show - -defaultParagraphStyle :: ParagraphStyle -defaultParagraphStyle = ParagraphStyle { pStyle = [] - , indent = Nothing - } - -elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle -elemToParagraphStyle ns element = - case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of - Just pPr -> - ParagraphStyle - {pStyle = - mapMaybe id $ - map - (findAttr (QName "val" (lookup "w" ns) (Just "w"))) - (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) - , indent = - findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= - findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= - stringToInteger - } - Nothing -> defaultParagraphStyle - - -data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String [ParPart] - | Tbl String TblGrid TblLook [Row] - - deriving Show - -type TblGrid = [Integer] - -data TblLook = TblLook {firstRowFormatting::Bool} - deriving Show - -defaultTblLook :: TblLook -defaultTblLook = TblLook{firstRowFormatting = False} - -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) - -elemToTblGrid :: NameSpaces -> Element -> TblGrid -elemToTblGrid ns element - | qName (elName element) == "tblGrid" && - qURI (elName element) == (lookup "w" ns) = - let - cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element - in - mapMaybe (\e -> - findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e - >>= stringToInteger - ) - cols -elemToTblGrid _ _ = [] - -data Row = Row [Cell] - deriving Show - - -elemToRow :: NameSpaces -> Element -> Maybe Row -elemToRow ns element - | qName (elName element) == "tr" && - qURI (elName element) == (lookup "w" ns) = - let - cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element - in - Just $ Row (mapMaybe (elemToCell ns) cells) -elemToRow _ _ = Nothing - -data Cell = Cell [BodyPart] - deriving Show - -elemToCell :: NameSpaces -> Element -> Maybe Cell -elemToCell ns element - | qName (elName element) == "tc" && - qURI (elName element) == (lookup "w" ns) = - Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) -elemToCell _ _ = Nothing - -data ParPart = PlainRun Run - | BookMark BookMarkId Anchor - | InternalHyperLink Anchor [Run] - | ExternalHyperLink RelId [Run] - | Drawing String - deriving Show - -data Run = Run RunStyle [RunElem] - | Footnote String - | Endnote String - deriving Show - -data RunElem = TextRun String | LnBrk - deriving Show - -data RunStyle = RunStyle { isBold :: Bool - , isItalic :: Bool - , isSmallCaps :: Bool - , isStrike :: Bool - , isSuperScript :: Bool - , isSubScript :: Bool - , underline :: Maybe String - , rStyle :: Maybe String } - deriving Show - -defaultRunStyle :: RunStyle -defaultRunStyle = RunStyle { isBold = False - , isItalic = False - , isSmallCaps = False - , isStrike = False - , isSuperScript = False - , isSubScript = False - , underline = Nothing - , rStyle = Nothing - } - -elemToRunStyle :: NameSpaces -> Element -> RunStyle -elemToRunStyle ns element = - case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of - Just rPr -> - RunStyle - { - isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr - , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr - , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr - , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr - , isSuperScript = - (Just "superscript" == - (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")))) - , isSubScript = - (Just "subscript" == - (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")))) - , underline = - findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - , rStyle = - findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - } - Nothing -> defaultRunStyle - -elemToRun :: NameSpaces -> Element -> Maybe Run -elemToRun ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - case - findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= - findAttr (QName "id" (lookup "w" ns) (Just "w")) - of - Just s -> Just $ Footnote s - Nothing -> - case - findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= - findAttr (QName "id" (lookup "w" ns) (Just "w")) - of - Just s -> Just $ Endnote s - Nothing -> Just $ - Run (elemToRunStyle ns element) - (elemToRunElems ns element) -elemToRun _ _ = Nothing - -elemToRunElem :: NameSpaces -> Element -> Maybe RunElem -elemToRunElem ns element - | qName (elName element) == "t" && - qURI (elName element) == (lookup "w" ns) = - Just $ TextRun (strContent element) - | qName (elName element) == "br" && - qURI (elName element) == (lookup "w" ns) = - Just $ LnBrk - | otherwise = Nothing - - -elemToRunElems :: NameSpaces -> Element -> [RunElem] -elemToRunElems ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - mapMaybe (elemToRunElem ns) (elChildren element) - | otherwise = [] - -elemToDrawing :: NameSpaces -> Element -> Maybe ParPart -elemToDrawing ns element - | qName (elName element) == "drawing" && - qURI (elName element) == (lookup "w" ns) = - let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - in - findElement (QName "blip" (Just a_ns) (Just "a")) element - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) - >>= (\s -> Just $ Drawing s) -elemToDrawing _ _ = Nothing - - -elemToParPart :: NameSpaces -> Element -> Maybe ParPart -elemToParPart ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of - Just drawingElem -> elemToDrawing ns drawingElem - Nothing -> do - r <- elemToRun ns element - return $ PlainRun r -elemToParPart ns element - | qName (elName element) == "bookmarkStart" && - qURI (elName element) == (lookup "w" ns) = do - bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element - return $ BookMark bmId bmName -elemToParPart ns element - | qName (elName element) == "hyperlink" && - qURI (elName element) == (lookup "w" ns) = - let runs = map fromJust $ filter isJust $ map (elemToRun ns) - $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element - in - case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of - Just anchor -> - Just $ InternalHyperLink anchor runs - Nothing -> - case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of - Just relId -> Just $ ExternalHyperLink relId runs - Nothing -> Nothing -elemToParPart _ _ = Nothing - -type Target = String -type Anchor = String -type BookMarkId = String -type RelId = String - diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs new file mode 100644 index 000000000..df4be41ff --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -0,0 +1,479 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse) +to 'Pandoc' document. -} + +{- +Current state of implementation of Docx entities ([x] means +implemented, [-] means partially implemented): + +* Blocks + + - [X] Para + - [X] CodeBlock (styled with `SourceCode`) + - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, + indented) + - [X] OrderedList + - [X] BulletList + - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) + - [X] Header (styled with `Heading#`) + - [ ] HorizontalRule + - [-] Table (column widths and alignments not yet implemented) + +* Inlines + + - [X] Str + - [X] Emph (From italics. `underline` currently read as span. In + future, it might optionally be emph as well) + - [X] Strong + - [X] Strikeout + - [X] Superscript + - [X] Subscript + - [X] SmallCaps + - [ ] Quoted + - [ ] Cite + - [X] Code (styled with `VerbatimChar`) + - [X] Space + - [X] LineBreak (these are invisible in Word: entered with Shift-Return) + - [ ] Math + - [X] Link (links to an arbitrary bookmark create a span with the target as + id and "anchor" class) + - [-] Image (Links to path in archive. Future option for + data-encoded URI likely.) + - [X] Note (Footnotes and Endnotes are silently combined.) +-} + +module Text.Pandoc.Readers.Docx + ( readDocx + ) where + +import Codec.Archive.Zip +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Builder (text, toList) +import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.Readers.Docx.Parse +import Text.Pandoc.Readers.Docx.Lists +import Data.Maybe (mapMaybe, isJust, fromJust) +import Data.List (delete, isPrefixOf, (\\), intersect) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Base64 (encode) +import System.FilePath (combine) + +readDocx :: ReaderOptions + -> B.ByteString + -> Pandoc +readDocx opts bytes = + case archiveToDocx (toArchive bytes) of + Just docx -> Pandoc nullMeta (docxToBlocks opts docx) + Nothing -> error $ "couldn't parse docx file" + +runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) +runStyleToSpanAttr rPr = ("", + mapMaybe id [ + if isBold rPr then (Just "strong") else Nothing, + if isItalic rPr then (Just "emph") else Nothing, + if isSmallCaps rPr then (Just "smallcaps") else Nothing, + if isStrike rPr then (Just "strike") else Nothing, + if isSuperScript rPr then (Just "superscript") else Nothing, + if isSubScript rPr then (Just "subscript") else Nothing, + rStyle rPr], + case underline rPr of + Just fmt -> [("underline", fmt)] + _ -> [] + ) + +parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) +parStyleToDivAttr pPr = ("", + pStyle pPr, + case indent pPr of + Just n -> [("indent", (show n))] + Nothing -> [] + ) + +strToInlines :: String -> [Inline] +strToInlines = toList . text + +codeSpans :: [String] +codeSpans = ["VerbatimChar"] + +blockQuoteDivs :: [String] +blockQuoteDivs = ["Quote", "BlockQuote"] + +codeDivs :: [String] +codeDivs = ["SourceCode"] + +runElemToInlines :: RunElem -> [Inline] +runElemToInlines (TextRun s) = strToInlines s +runElemToInlines (LnBrk) = [LineBreak] + +runElemToString :: RunElem -> String +runElemToString (TextRun s) = s +runElemToString (LnBrk) = ['\n'] + +runElemsToString :: [RunElem] -> String +runElemsToString = concatMap runElemToString + +strNormalize :: [Inline] -> [Inline] +strNormalize [] = [] +strNormalize (Str "" : ils) = strNormalize ils +strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) +strNormalize (il:ils) = il : (strNormalize ils) + +runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] +runToInlines _ _ (Run rs runElems) + | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = + case runStyleToSpanAttr rs == ("", [], []) of + True -> [Str (runElemsToString runElems)] + False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] + | otherwise = case runStyleToSpanAttr rs == ("", [], []) of + True -> concatMap runElemToInlines runElems + False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] +runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) = + case (getFootNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["footnote"], []) []]] +runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = + case (getEndNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["endnote"], []) []]] + +parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] +parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines _ _ (BookMark _ anchor) = + [Span (anchor, ["anchor"], []) []] +parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = + case lookupRelationship relid rels of + Just target -> [Image [] (combine "word" target, "")] + Nothing -> [Image [] ("", "")] +parPartToInlines opts docx (InternalHyperLink anchor runs) = + [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] +parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) = + case lookupRelationship relid rels of + Just target -> + [Link (concatMap (runToInlines opts docx) runs) (target, "")] + Nothing -> + [Link (concatMap (runToInlines opts docx) runs) ("", "")] + +isAnchorSpan :: Inline -> Bool +isAnchorSpan (Span (ident, classes, kvs) ils) = + (not . null) ident && + classes == ["anchor"] && + null kvs && + null ils +isAnchorSpan _ = False + +dummyAnchors :: [String] +dummyAnchors = ["_GoBack"] + +makeHeaderAnchors :: Block -> Block +makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = + case filter isAnchorSpan ils of + [] -> h + (x@(Span (ident, _, _) _) : xs) -> + case ident `elem` dummyAnchors of + True -> h + False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) + _ -> h +makeHeaderAnchors blk = blk + + +parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline] +parPartsToInlines opts docx parparts = + -- + -- We're going to skip data-uri's for now. It should be an option, + -- not mandatory. + -- + --bottomUp (makeImagesSelfContained docx) $ + bottomUp spanCorrect $ + bottomUp spanTrim $ + bottomUp spanReduce $ + concatMap (parPartToInlines opts docx) parparts + +cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block] +cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps + +rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] +rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells + +bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block +bodyPartToBlock opts docx (Paragraph pPr parparts) = + Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] +bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = + let + kvs = case lookupLevel numId lvl numbering of + Just (_, fmt, txt, Just start) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + , ("start", (show start)) + ] + + Just (_, fmt, txt, Nothing) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + ] + Nothing -> [] + in + Div + ("", ["list-item"], kvs) + [bodyPartToBlock opts docx (Paragraph pPr parparts)] +bodyPartToBlock _ _ (Tbl _ _ _ []) = + Para [] +bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = + let caption = strToInlines cap + (hdr, rows) = case firstRowFormatting look of + True -> (Just r, rs) + False -> (Nothing, r:rs) + hdrCells = case hdr of + Just r' -> rowToBlocksList opts docx r' + Nothing -> [] + cells = map (rowToBlocksList opts docx) rows + + size = case null hdrCells of + True -> length $ head cells + False -> length $ hdrCells + -- + -- The two following variables (horizontal column alignment and + -- relative column widths) go to the default at the + -- moment. Width information is in the TblGrid field of the Tbl, + -- so should be possible. Alignment might be more difficult, + -- since there doesn't seem to be a column entity in docx. + alignments = take size (repeat AlignDefault) + widths = take size (repeat 0) :: [Double] + in + Table caption alignments widths hdrCells cells + +makeImagesSelfContained :: Docx -> Inline -> Inline +makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) = + case lookup uri media of + Just bs -> case getMimeType uri of + Just mime -> let data_uri = + "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) + in + Image alt (data_uri, title) + Nothing -> i + Nothing -> i +makeImagesSelfContained _ inline = inline + +bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] +bodyToBlocks opts docx (Body bps) = + bottomUp removeEmptyPars $ + bottomUp strNormalize $ + bottomUp spanRemove $ + bottomUp divRemove $ + map (makeHeaderAnchors) $ + bottomUp divCorrect $ + bottomUp divReduce $ + bottomUp divCorrectPreReduce $ + bottomUp blocksToDefinitions $ + blocksToBullets $ + map (bodyPartToBlock opts docx) bps + +docxToBlocks :: ReaderOptions -> Docx -> [Block] +docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body + +spanReduce :: [Inline] -> [Inline] +spanReduce [] = [] +spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) + | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) +spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : + s2@(Span (id2, classes2, kvs2) ils2) : + ils) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> s1 : (spanReduce (s2 : ils)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : + ils) +spanReduce (il:ils) = il : (spanReduce ils) + +ilToCode :: Inline -> String +ilToCode (Str s) = s +ilToCode _ = "" + +spanRemove' :: Inline -> [Inline] +spanRemove' s@(Span (ident, classes, _) []) + -- "_GoBack" is automatically inserted. We don't want to keep it. + | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] +spanRemove' (Span (_, _, kvs) ils) = + case lookup "underline" kvs of + Just val -> [Span ("", [], [("underline", val)]) ils] + Nothing -> ils +spanRemove' il = [il] + +spanRemove :: [Inline] -> [Inline] +spanRemove = concatMap spanRemove' + +spanTrim' :: Inline -> [Inline] +spanTrim' il@(Span _ []) = [il] +spanTrim' il@(Span attr (il':[])) + | il' == Space = [Span attr [], Space] + | otherwise = [il] +spanTrim' (Span attr ils) + | head ils == Space && last ils == Space = + [Space, Span attr (init $ tail ils), Space] + | head ils == Space = [Space, Span attr (tail ils)] + | last ils == Space = [Span attr (init ils), Space] +spanTrim' il = [il] + +spanTrim :: [Inline] -> [Inline] +spanTrim = concatMap spanTrim' + +spanCorrect' :: Inline -> [Inline] +spanCorrect' (Span ("", [], []) ils) = ils +spanCorrect' (Span (ident, classes, kvs) ils) + | "emph" `elem` classes = + [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] + | "strong" `elem` classes = + [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] + | "smallcaps" `elem` classes = + [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] + | "strike" `elem` classes = + [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] + | "superscript" `elem` classes = + [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] + | "subscript" `elem` classes = + [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] + | (not . null) (codeSpans `intersect` classes) = + [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] + | otherwise = + [Span (ident, classes, kvs) ils] +spanCorrect' il = [il] + +spanCorrect :: [Inline] -> [Inline] +spanCorrect = concatMap spanCorrect' + +removeEmptyPars :: [Block] -> [Block] +removeEmptyPars blks = filter (\b -> b /= (Para [])) blks + +divReduce :: [Block] -> [Block] +divReduce [] = [] +divReduce ((Div (id1, classes1, kvs1) blks1) : blks) + | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) +divReduce (d1@(Div (id1, classes1, kvs1) blks1) : + d2@(Div (id2, classes2, kvs2) blks2) : + blks) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> d1 : (divReduce (d2 : blks)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : + blks) +divReduce (blk:blks) = blk : (divReduce blks) + +isHeaderClass :: String -> Maybe Int +isHeaderClass s | "Heading" `isPrefixOf` s = + case reads (drop (length "Heading") s) :: [(Int, String)] of + [] -> Nothing + ((n, "") : []) -> Just n + _ -> Nothing +isHeaderClass _ = Nothing + +findHeaderClass :: [String] -> Maybe Int +findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of + [] -> Nothing + n : _ -> Just n + +blksToInlines :: [Block] -> [Inline] +blksToInlines (Para ils : _) = ils +blksToInlines (Plain ils : _) = ils +blksToInlines _ = [] + +divCorrectPreReduce' :: Block -> [Block] +divCorrectPreReduce' (Div (ident, classes, kvs) blks) + | isJust $ findHeaderClass classes = + let n = fromJust $ findHeaderClass classes + in + [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] + | otherwise = [Div (ident, classes, kvs) blks] +divCorrectPreReduce' blk = [blk] + +divCorrectPreReduce :: [Block] -> [Block] +divCorrectPreReduce = concatMap divCorrectPreReduce' + +blkToCode :: Block -> String +blkToCode (Para []) = "" +blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) +blkToCode (Para ((Span (_, classes, _) ils'): ils)) + | (not . null) (codeSpans `intersect` classes) = + (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) +blkToCode _ = "" + +divRemove' :: Block -> [Block] +divRemove' (Div (_, _, kvs) blks) = + case lookup "indent" kvs of + Just val -> [Div ("", [], [("indent", val)]) blks] + Nothing -> blks +divRemove' blk = [blk] + +divRemove :: [Block] -> [Block] +divRemove = concatMap divRemove' + +divCorrect' :: Block -> [Block] +divCorrect' b@(Div (ident, classes, kvs) blks) + | (not . null) (blockQuoteDivs `intersect` classes) = + [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] + | (not . null) (codeDivs `intersect` classes) = + [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] + | otherwise = + case lookup "indent" kvs of + Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] + Just _ -> + [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] + Nothing -> [b] +divCorrect' blk = [blk] + +divCorrect :: [Block] -> [Block] +divCorrect = concatMap divCorrect' diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs new file mode 100644 index 000000000..68559d98b --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -0,0 +1,208 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Lists + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Functions for converting flat docx paragraphs into nested lists. +-} + +module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets + , blocksToDefinitions) where + +import Text.Pandoc.JSON +import Text.Pandoc.Shared (trim) +import Control.Monad +import Data.List +import Data.Maybe + +isListItem :: Block -> Bool +isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True +isListItem _ = False + +getLevel :: Block -> Maybe Integer +getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs +getLevel _ = Nothing + +getLevelN :: Block -> Integer +getLevelN b = case getLevel b of + Just n -> n + Nothing -> -1 + +getNumId :: Block -> Maybe Integer +getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs +getNumId _ = Nothing + +getNumIdN :: Block -> Integer +getNumIdN b = case getNumId b of + Just n -> n + Nothing -> -1 + +getText :: Block -> Maybe String +getText (Div (_, _, kvs) _) = lookup "text" kvs +getText _ = Nothing + +data ListType = Itemized | Enumerated ListAttributes + +listStyleMap :: [(String, ListNumberStyle)] +listStyleMap = [("upperLetter", UpperAlpha), + ("lowerLetter", LowerAlpha), + ("upperRoman", UpperRoman), + ("lowerRoman", LowerRoman), + ("decimal", Decimal)] + +listDelimMap :: [(String, ListNumberDelim)] +listDelimMap = [("%1)", OneParen), + ("(%1)", TwoParens), + ("%1.", Period)] + +getListType :: Block -> Maybe ListType +getListType b@(Div (_, _, kvs) _) | isListItem b = + let + start = lookup "start" kvs + frmt = lookup "format" kvs + txt = lookup "text" kvs + in + case frmt of + Just "bullet" -> Just Itemized + Just f -> + case txt of + Just t -> Just $ Enumerated ( + read (fromMaybe "1" start) :: Int, + fromMaybe DefaultStyle (lookup f listStyleMap), + fromMaybe DefaultDelim (lookup t listDelimMap)) + Nothing -> Nothing + _ -> Nothing +getListType _ = Nothing + +listParagraphDivs :: [String] +listParagraphDivs = ["ListParagraph"] + +-- This is a first stab at going through and attaching meaning to list +-- paragraphs, without an item marker, following a list item. We +-- assume that these are paragraphs in the same item. + +handleListParagraphs :: [Block] -> [Block] +handleListParagraphs [] = [] +handleListParagraphs ( + (Div attr1@(_, classes1, _) blks1) : + (Div (ident2, classes2, kvs2) blks2) : + blks + ) | "list-item" `elem` classes1 && + not ("list-item" `elem` classes2) && + (not . null) (listParagraphDivs `intersect` classes2) = + -- We don't want to keep this indent. + let newDiv2 = + (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) + in + handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) +handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) + +separateBlocks' :: Block -> [[Block]] -> [[Block]] +separateBlocks' blk ([] : []) = [[blk]] +separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] +-- The following is for the invisible bullet lists. This is how +-- pandoc-generated ooxml does multiparagraph item lists. +separateBlocks' b acc | liftM trim (getText b) == Just "" = + (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b acc = acc ++ [[b]] + +separateBlocks :: [Block] -> [[Block]] +separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) + +flatToBullets' :: Integer -> [Block] -> [Block] +flatToBullets' _ [] = [] +flatToBullets' num xs@(b : elems) + | getLevelN b == num = b : (flatToBullets' num elems) + | otherwise = + let bNumId = getNumIdN b + bLevel = getLevelN b + (children, remaining) = + span + (\b' -> + ((getLevelN b') > bLevel || + ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) + xs + in + case getListType b of + Just (Enumerated attr) -> + (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + _ -> + (BulletList (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + +flatToBullets :: [Block] -> [Block] +flatToBullets elems = flatToBullets' (-1) elems + +blocksToBullets :: [Block] -> [Block] +blocksToBullets blks = + -- bottomUp removeListItemDivs $ + flatToBullets $ (handleListParagraphs blks) + + +plainParaInlines :: Block -> [Inline] +plainParaInlines (Plain ils) = ils +plainParaInlines (Para ils) = ils +plainParaInlines _ = [] + +blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] +blocksToDefinitions' [] acc [] = reverse acc +blocksToDefinitions' defAcc acc [] = + reverse $ (DefinitionList (reverse defAcc)) : acc +blocksToDefinitions' defAcc acc + ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) + | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + pair = case remainingAttr2 == ("", [], []) of + True -> (concatMap plainParaInlines blks1, [blks2]) + False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) + in + blocksToDefinitions' (pair : defAcc) acc blks +blocksToDefinitions' defAcc acc + ((Div (ident2, classes2, kvs2) blks2) : blks) + | (not . null) defAcc && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + defItems2 = case remainingAttr2 == ("", [], []) of + True -> blks2 + False -> [Div remainingAttr2 blks2] + ((defTerm, defItems):defs) = defAcc + defAcc' = case null defItems of + True -> (defTerm, [defItems2]) : defs + False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs + in + blocksToDefinitions' defAcc' acc blks +blocksToDefinitions' [] acc (b:blks) = + blocksToDefinitions' [] (b:acc) blks +blocksToDefinitions' defAcc acc (b:blks) = + blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks + + +blocksToDefinitions :: [Block] -> [Block] +blocksToDefinitions = blocksToDefinitions' [] [] + + + + diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs new file mode 100644 index 000000000..22e9dd909 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -0,0 +1,604 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Parse + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of docx archive into Docx haskell type +-} + + +module Text.Pandoc.Readers.Docx.Parse ( Docx(..) + , Document(..) + , Body(..) + , BodyPart(..) + , TblLook(..) + , ParPart(..) + , Run(..) + , RunElem(..) + , Notes + , Numbering + , Relationship + , Media + , RunStyle(..) + , ParagraphStyle(..) + , Row(..) + , Cell(..) + , getFootNote + , getEndNote + , lookupLevel + , lookupRelationship + , archiveToDocx + ) where +import Codec.Archive.Zip +import Text.XML.Light +import Data.Maybe +import Data.List +import System.FilePath +import Data.Bits ((.|.)) +import qualified Data.ByteString.Lazy as B +import qualified Text.Pandoc.UTF8 as UTF8 + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + + +type NameSpaces = [(String, String)] + +data Docx = Docx Document Notes Numbering [Relationship] Media + deriving Show + +archiveToDocx :: Archive -> Maybe Docx +archiveToDocx archive = do + let notes = archiveToNotes archive + rels = archiveToRelationships archive + media = archiveToMedia archive + doc <- archiveToDocument archive + numbering <- archiveToNumbering archive + return $ Docx doc notes numbering rels media + +data Document = Document NameSpaces Body + deriving Show + +archiveToDocument :: Archive -> Maybe Document +archiveToDocument zf = do + entry <- findEntryByPath "word/document.xml" zf + docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem + body <- elemToBody namespaces bodyElem + return $ Document namespaces body + +type Media = [(FilePath, B.ByteString)] + +filePathIsMedia :: FilePath -> Bool +filePathIsMedia fp = + let (dir, _) = splitFileName fp + in + (dir == "word/media/") + +getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) +getMediaPair zf fp = + case findEntryByPath fp zf of + Just e -> Just (fp, fromEntry e) + Nothing -> Nothing + +archiveToMedia :: Archive -> Media +archiveToMedia zf = + mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) + +data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] + deriving Show + +data Numb = Numb String String -- right now, only a key to an abstract num + deriving Show + +data AbstractNumb = AbstractNumb String [Level] + deriving Show + +-- (ilvl, format, string, start) +type Level = (String, String, String, Maybe Integer) + +lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do + absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs + lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs + lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls + return lvl + +numElemToNum :: NameSpaces -> Element -> Maybe Numb +numElemToNum ns element | + qName (elName element) == "num" && + qURI (elName element) == (lookup "w" ns) = do + numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element + absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + return $ Numb numId absNumId +numElemToNum _ _ = Nothing + +absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb +absNumElemToAbsNum ns element | + qName (elName element) == "abstractNum" && + qURI (elName element) == (lookup "w" ns) = do + absNumId <- findAttr + (QName "abstractNumId" (lookup "w" ns) (Just "w")) + element + let levelElems = findChildren + (QName "lvl" (lookup "w" ns) (Just "w")) + element + levels = mapMaybe id $ map (levelElemToLevel ns) levelElems + return $ AbstractNumb absNumId levels +absNumElemToAbsNum _ _ = Nothing + +levelElemToLevel :: NameSpaces -> Element -> Maybe Level +levelElemToLevel ns element | + qName (elName element) == "lvl" && + qURI (elName element) == (lookup "w" ns) = do + ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element + fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + return (ilvl, fmt, txt, start) +levelElemToLevel _ _ = Nothing + +archiveToNumbering :: Archive -> Maybe Numbering +archiveToNumbering zf = + case findEntryByPath "word/numbering.xml" zf of + Nothing -> Just $ Numbering [] [] [] + Just entry -> do + numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + numElems = findChildren + (QName "num" (lookup "w" namespaces) (Just "w")) + numberingElem + absNumElems = findChildren + (QName "abstractNum" (lookup "w" namespaces) (Just "w")) + numberingElem + nums = mapMaybe id $ map (numElemToNum namespaces) numElems + absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems + return $ Numbering namespaces nums absNums + +data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) + deriving Show + +noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) +noteElemToNote ns element + | qName (elName element) `elem` ["endnote", "footnote"] && + qURI (elName element) == (lookup "w" ns) = + do + noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + let bps = map fromJust + $ filter isJust + $ map (elemToBodyPart ns) + $ filterChildrenName (isParOrTbl ns) element + return $ (noteId, bps) +noteElemToNote _ _ = Nothing + +getFootNote :: String -> Notes -> Maybe [BodyPart] +getFootNote s (Notes _ fns _) = fns >>= (lookup s) + +getEndNote :: String -> Notes -> Maybe [BodyPart] +getEndNote s (Notes _ _ ens) = ens >>= (lookup s) + +elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] +elemToNotes ns notetype element + | qName (elName element) == (notetype ++ "s") && + qURI (elName element) == (lookup "w" ns) = + Just $ map fromJust + $ filter isJust + $ map (noteElemToNote ns) + $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element +elemToNotes _ _ _ = Nothing + +archiveToNotes :: Archive -> Notes +archiveToNotes zf = + let fnElem = findEntryByPath "word/footnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + enElem = findEntryByPath "word/endnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + fn_namespaces = case fnElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + en_namespaces = case enElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces + fn = fnElem >>= (elemToNotes ns "footnote") + en = enElem >>= (elemToNotes ns "endnote") + in + Notes ns fn en + + +data Relationship = Relationship (RelId, Target) + deriving Show + +lookupRelationship :: RelId -> [Relationship] -> Maybe Target +lookupRelationship relid rels = + lookup relid (map (\(Relationship pair) -> pair) rels) + +filePathIsRel :: FilePath -> Bool +filePathIsRel fp = + let (dir, name) = splitFileName fp + in + (dir == "word/_rels/") && ((takeExtension name) == ".rels") + +relElemToRelationship :: Element -> Maybe Relationship +relElemToRelationship element | qName (elName element) == "Relationship" = + do + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element + return $ Relationship (relId, target) +relElemToRelationship _ = Nothing + + +archiveToRelationships :: Archive -> [Relationship] +archiveToRelationships archive = + let relPaths = filter filePathIsRel (filesInArchive archive) + entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths + relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries + rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems + in + rels + +data Body = Body [BodyPart] + deriving Show + +isParOrTbl :: NameSpaces -> QName -> Bool +isParOrTbl ns q = qName q `elem` ["p", "tbl"] && + qURI q == (lookup "w" ns) + +elemToBody :: NameSpaces -> Element -> Maybe Body +elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = + Just $ Body + $ map fromJust + $ filter isJust + $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element +elemToBody _ _ = Nothing + +isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool +isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && + qURI q == (lookup "w" ns) + +elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) +elemToNumInfo ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + do + pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element + numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr + lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + return (numId, lvl) +elemToNumInfo _ _ = Nothing + +-- isBookMarkTag :: NameSpaces -> QName -> Bool +-- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && +-- qURI q == (lookup "w" ns) + +-- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark +-- parChildrenToBookmark ns (bms : bme : _) +-- | qName (elName bms) == "bookmarkStart" && +-- qURI (elName bms) == (lookup "w" ns) && +-- qName (elName bme) == "bookmarkEnd" && +-- qURI (elName bme) == (lookup "w" ns) = do +-- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms +-- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms +-- return $ (bmId, bmName) +-- parChildrenToBookmark _ _ = Nothing + +elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart +elemToBodyPart ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + let parstyle = elemToParagraphStyle ns element + parparts = mapMaybe id + $ map (elemToParPart ns) + $ filterChildrenName (isRunOrLinkOrBookmark ns) element + in + case elemToNumInfo ns element of + Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts + Nothing -> Just $ Paragraph parstyle parparts + | qName (elName element) == "tbl" && + qURI (elName element) == (lookup "w" ns) = + let + caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + grid = case + findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element + of + Just g -> elemToTblGrid ns g + Nothing -> [] + tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) + >>= elemToTblLook ns + in + Just $ Tbl + (fromMaybe "" caption) + grid + (fromMaybe defaultTblLook tblLook) + (mapMaybe (elemToRow ns) (elChildren element)) + | otherwise = Nothing + +elemToTblLook :: NameSpaces -> Element -> Maybe TblLook +elemToTblLook ns element + | qName (elName element) == "tblLook" && + qURI (elName element) == (lookup "w" ns) = + let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element + val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element + firstRowFmt = + case firstRow of + Just "1" -> True + Just _ -> False + Nothing -> case val of + Just bitMask -> testBitMask bitMask 0x020 + Nothing -> False + in + Just $ TblLook{firstRowFormatting = firstRowFmt} +elemToTblLook _ _ = Nothing + +testBitMask :: String -> Int -> Bool +testBitMask bitMaskS n = + case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + [] -> False + ((n', _) : _) -> ((n' .|. n) /= 0) + +data ParagraphStyle = ParagraphStyle { pStyle :: [String] + , indent :: Maybe Integer + } + deriving Show + +defaultParagraphStyle :: ParagraphStyle +defaultParagraphStyle = ParagraphStyle { pStyle = [] + , indent = Nothing + } + +elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle +elemToParagraphStyle ns element = + case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of + Just pPr -> + ParagraphStyle + {pStyle = + mapMaybe id $ + map + (findAttr (QName "val" (lookup "w" ns) (Just "w"))) + (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) + , indent = + findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= + findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= + stringToInteger + } + Nothing -> defaultParagraphStyle + + +data BodyPart = Paragraph ParagraphStyle [ParPart] + | ListItem ParagraphStyle String String [ParPart] + | Tbl String TblGrid TblLook [Row] + + deriving Show + +type TblGrid = [Integer] + +data TblLook = TblLook {firstRowFormatting::Bool} + deriving Show + +defaultTblLook :: TblLook +defaultTblLook = TblLook{firstRowFormatting = False} + +stringToInteger :: String -> Maybe Integer +stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) + +elemToTblGrid :: NameSpaces -> Element -> TblGrid +elemToTblGrid ns element + | qName (elName element) == "tblGrid" && + qURI (elName element) == (lookup "w" ns) = + let + cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element + in + mapMaybe (\e -> + findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e + >>= stringToInteger + ) + cols +elemToTblGrid _ _ = [] + +data Row = Row [Cell] + deriving Show + + +elemToRow :: NameSpaces -> Element -> Maybe Row +elemToRow ns element + | qName (elName element) == "tr" && + qURI (elName element) == (lookup "w" ns) = + let + cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element + in + Just $ Row (mapMaybe (elemToCell ns) cells) +elemToRow _ _ = Nothing + +data Cell = Cell [BodyPart] + deriving Show + +elemToCell :: NameSpaces -> Element -> Maybe Cell +elemToCell ns element + | qName (elName element) == "tc" && + qURI (elName element) == (lookup "w" ns) = + Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) +elemToCell _ _ = Nothing + +data ParPart = PlainRun Run + | BookMark BookMarkId Anchor + | InternalHyperLink Anchor [Run] + | ExternalHyperLink RelId [Run] + | Drawing String + deriving Show + +data Run = Run RunStyle [RunElem] + | Footnote String + | Endnote String + deriving Show + +data RunElem = TextRun String | LnBrk + deriving Show + +data RunStyle = RunStyle { isBold :: Bool + , isItalic :: Bool + , isSmallCaps :: Bool + , isStrike :: Bool + , isSuperScript :: Bool + , isSubScript :: Bool + , underline :: Maybe String + , rStyle :: Maybe String } + deriving Show + +defaultRunStyle :: RunStyle +defaultRunStyle = RunStyle { isBold = False + , isItalic = False + , isSmallCaps = False + , isStrike = False + , isSuperScript = False + , isSubScript = False + , underline = Nothing + , rStyle = Nothing + } + +elemToRunStyle :: NameSpaces -> Element -> RunStyle +elemToRunStyle ns element = + case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of + Just rPr -> + RunStyle + { + isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr + , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr + , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr + , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr + , isSuperScript = + (Just "superscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , isSubScript = + (Just "subscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , underline = + findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + , rStyle = + findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + } + Nothing -> defaultRunStyle + +elemToRun :: NameSpaces -> Element -> Maybe Run +elemToRun ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case + findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Footnote s + Nothing -> + case + findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Endnote s + Nothing -> Just $ + Run (elemToRunStyle ns element) + (elemToRunElems ns element) +elemToRun _ _ = Nothing + +elemToRunElem :: NameSpaces -> Element -> Maybe RunElem +elemToRunElem ns element + | qName (elName element) == "t" && + qURI (elName element) == (lookup "w" ns) = + Just $ TextRun (strContent element) + | qName (elName element) == "br" && + qURI (elName element) == (lookup "w" ns) = + Just $ LnBrk + | otherwise = Nothing + + +elemToRunElems :: NameSpaces -> Element -> [RunElem] +elemToRunElems ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + mapMaybe (elemToRunElem ns) (elChildren element) + | otherwise = [] + +elemToDrawing :: NameSpaces -> Element -> Maybe ParPart +elemToDrawing ns element + | qName (elName element) == "drawing" && + qURI (elName element) == (lookup "w" ns) = + let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + in + findElement (QName "blip" (Just a_ns) (Just "a")) element + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= (\s -> Just $ Drawing s) +elemToDrawing _ _ = Nothing + + +elemToParPart :: NameSpaces -> Element -> Maybe ParPart +elemToParPart ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of + Just drawingElem -> elemToDrawing ns drawingElem + Nothing -> do + r <- elemToRun ns element + return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "bookmarkStart" && + qURI (elName element) == (lookup "w" ns) = do + bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element + return $ BookMark bmId bmName +elemToParPart ns element + | qName (elName element) == "hyperlink" && + qURI (elName element) == (lookup "w" ns) = + let runs = map fromJust $ filter isJust $ map (elemToRun ns) + $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element + in + case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of + Just anchor -> + Just $ InternalHyperLink anchor runs + Nothing -> + case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of + Just relId -> Just $ ExternalHyperLink relId runs + Nothing -> Nothing +elemToParPart _ _ = Nothing + +type Target = String +type Anchor = String +type BookMarkId = String +type RelId = String + diff --git a/tests/Tests/Readers/DocX.hs b/tests/Tests/Readers/DocX.hs deleted file mode 100644 index f4564ea1d..000000000 --- a/tests/Tests/Readers/DocX.hs +++ /dev/null @@ -1,68 +0,0 @@ -module Tests.Readers.DocX (tests) where - -import Text.Pandoc.Options -import Text.Pandoc.Readers.Native -import Text.Pandoc.Definition -import Tests.Helpers -import Test.Framework -import qualified Data.ByteString.Lazy as B -import Text.Pandoc.Readers.DocX - -compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) -compareOutput docxFile nativeFile = do - df <- B.readFile docxFile - nf <- Prelude.readFile nativeFile - return $ (readDocX def df, readNative nf) - -testCompare' :: String -> FilePath -> FilePath -> IO Test -testCompare' name docxFile nativeFile = do - (dp, np) <- compareOutput docxFile nativeFile - return $ test id name (dp, np) - -testCompare :: String -> FilePath -> FilePath -> Test -testCompare name docxFile nativeFile = - buildTest $ testCompare' name docxFile nativeFile - - -tests :: [Test] -tests = [ testGroup "inlines" - [ testCompare - "font formatting" - "docx.inline_formatting.docx" - "docx.inline_formatting.native" - , testCompare - "hyperlinks" - "docx.links.docx" - "docx.links.native" - , testCompare - "inline image with reference output" - "docx.image.docx" - "docx.image_no_embed.native" - , testCompare - "handling unicode input" - "docx.unicode.docx" - "docx.unicode.native"] - , testGroup "blocks" - [ testCompare - "headers" - "docx.headers.docx" - "docx.headers.native" - , testCompare - "lists" - "docx.lists.docx" - "docx.lists.native" - , testCompare - "footnotes and endnotes" - "docx.notes.docx" - "docx.notes.native" - , testCompare - "blockquotes (parsing indent as blockquote)" - "docx.block_quotes.docx" - "docx.block_quotes_parse_indent.native" - , testCompare - "tables" - "docx.tables.docx" - "docx.tables.native" - ] - ] - diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs new file mode 100644 index 000000000..0a963ddc6 --- /dev/null +++ b/tests/Tests/Readers/Docx.hs @@ -0,0 +1,68 @@ +module Tests.Readers.Docx (tests) where + +import Text.Pandoc.Options +import Text.Pandoc.Readers.Native +import Text.Pandoc.Definition +import Tests.Helpers +import Test.Framework +import qualified Data.ByteString.Lazy as B +import Text.Pandoc.Readers.Docx + +compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) +compareOutput docxFile nativeFile = do + df <- B.readFile docxFile + nf <- Prelude.readFile nativeFile + return $ (readDocx def df, readNative nf) + +testCompare' :: String -> FilePath -> FilePath -> IO Test +testCompare' name docxFile nativeFile = do + (dp, np) <- compareOutput docxFile nativeFile + return $ test id name (dp, np) + +testCompare :: String -> FilePath -> FilePath -> Test +testCompare name docxFile nativeFile = + buildTest $ testCompare' name docxFile nativeFile + + +tests :: [Test] +tests = [ testGroup "inlines" + [ testCompare + "font formatting" + "docx.inline_formatting.docx" + "docx.inline_formatting.native" + , testCompare + "hyperlinks" + "docx.links.docx" + "docx.links.native" + , testCompare + "inline image with reference output" + "docx.image.docx" + "docx.image_no_embed.native" + , testCompare + "handling unicode input" + "docx.unicode.docx" + "docx.unicode.native"] + , testGroup "blocks" + [ testCompare + "headers" + "docx.headers.docx" + "docx.headers.native" + , testCompare + "lists" + "docx.lists.docx" + "docx.lists.native" + , testCompare + "footnotes and endnotes" + "docx.notes.docx" + "docx.notes.native" + , testCompare + "blockquotes (parsing indent as blockquote)" + "docx.block_quotes.docx" + "docx.block_quotes_parse_indent.native" + , testCompare + "tables" + "docx.tables.docx" + "docx.tables.native" + ] + ] + diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 9f9d85147..c07a51ec5 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -9,7 +9,7 @@ import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown import qualified Tests.Readers.Org import qualified Tests.Readers.RST -import qualified Tests.Readers.DocX +import qualified Tests.Readers.Docx import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX import qualified Tests.Writers.HTML @@ -39,7 +39,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Markdown" Tests.Readers.Markdown.tests , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests - , testGroup "DocX" Tests.Readers.DocX.tests + , testGroup "Docx" Tests.Readers.Docx.tests ] ] -- cgit v1.2.3 From 9fc5c8d7af31a47d8e3e8ea6dbb541178ec9ca66 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 12:27:27 -0700 Subject: Rewrote haddock reader to use haddock-library. This brings pandoc's rendering of haddock markup in line with the new haddock. Note that we preserve line breaks in `@` code blocks, unlike the earlier version. Modified tests pass. More tests would be good. --- src/Text/Pandoc/Readers/Haddock.hs | 124 ++++++++++++++++++++++++++++++------- tests/haddock-reader.haddock | 20 +++--- tests/haddock-reader.native | 6 +- 3 files changed, 115 insertions(+), 35 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 65d8de98f..a512f969d 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -15,10 +15,13 @@ module Text.Pandoc.Readers.Haddock import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Shared (trim, splitBy) import Data.Monoid +import Data.List (intersperse, stripPrefix) +import Data.Maybe (fromMaybe) import Text.Pandoc.Definition import Text.Pandoc.Options -import Documentation.Haddock.Parser (parseParas, Identifier) +import Documentation.Haddock.Parser import Documentation.Haddock.Types -- | Parse Haddock markup and return a 'Pandoc' document. @@ -27,25 +30,102 @@ readHaddock :: ReaderOptions -- ^ Reader options -> Pandoc readHaddock _ = B.doc . docHToBlocks . parseParas -docHToBlocks :: DocH mod Identifier -> Blocks -docHToBlocks d = - case d of +docHToBlocks :: DocH String Identifier -> Blocks +docHToBlocks d' = + case d' of + DocEmpty -> mempty DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) - DocParagraph ils -> B.para $ docHToInlines ils - -docHToInlines :: DocH mod Identifier -> Inlines -docHToInlines d = - case d of - DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2) - DocString s -> B.text s - --- similar to 'docAppend' in Haddock.Doc -mergeLists :: [Block] -> [Block] -mergeLists (BulletList xs : BulletList ys : blocks) - = mergeLists (BulletList (xs ++ ys) : blocks) -mergeLists (OrderedList _ xs : OrderedList a ys : blocks) - = mergeLists (OrderedList a (xs ++ ys) : blocks) -mergeLists (DefinitionList xs : DefinitionList ys : blocks) - = mergeLists (DefinitionList (xs ++ ys) : blocks) -mergeLists (x : blocks) = x : mergeLists blocks -mergeLists [] = [] + DocString _ -> inlineFallback + DocParagraph ils -> B.para $ docHToInlines False ils + DocIdentifier _ -> inlineFallback + DocIdentifierUnchecked _ -> inlineFallback + DocModule s -> B.plain $ docHToInlines False $ DocModule s + DocWarning _ -> mempty -- TODO + DocEmphasis _ -> inlineFallback + DocMonospaced _ -> inlineFallback + DocBold _ -> inlineFallback + DocHeader h -> B.header (headerLevel h) + (docHToInlines False $ headerTitle h) + DocUnorderedList items -> B.bulletList (map docHToBlocks items) + DocOrderedList items -> B.orderedList (map docHToBlocks items) + DocDefList items -> B.definitionList (map (\(d,t) -> + (docHToInlines False d, + [consolidatePlains $ docHToBlocks t])) items) + DocCodeBlock (DocString s) -> B.codeBlockWith ("",["haskell"],[]) s + DocCodeBlock d -> B.para $ docHToInlines True d + DocHyperlink _ -> inlineFallback + DocPic _ -> inlineFallback + DocAName _ -> inlineFallback + DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) + DocExamples es -> mconcat $ map (\e -> + makeExample ">>>" (exampleExpression e) (exampleResult e)) es + + where inlineFallback = B.plain $ docHToInlines False d' + consolidatePlains = B.fromList . consolidatePlains' . B.toList + consolidatePlains' zs@(Plain _ : _) = + let (xs, ys) = span isPlain zs in + Plain (concatMap extractContents xs) : consolidatePlains' ys + consolidatePlains' (x : xs) = x : consolidatePlains' xs + consolidatePlains' [] = [] + isPlain (Plain _) = True + isPlain _ = False + extractContents (Plain xs) = xs + extractContents _ = [] + +docHToInlines :: Bool -> DocH String Identifier -> Inlines +docHToInlines isCode d' = + case d' of + DocEmpty -> mempty + DocAppend d1 d2 -> mappend (docHToInlines isCode d1) + (docHToInlines isCode d2) + DocString s + | isCode -> mconcat $ intersperse B.linebreak + $ map B.code $ splitBy (=='\n') s + | otherwise -> B.text s + DocParagraph _ -> mempty + DocIdentifier (_,s,_) -> B.codeWith ("",["haskell"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell"],[]) s + DocModule s -> B.codeWith ("",["haskell"],[]) s + DocWarning _ -> mempty -- TODO + DocEmphasis d -> B.emph (docHToInlines isCode d) + DocMonospaced (DocString s) -> B.code s + DocMonospaced d -> docHToInlines True d + DocBold d -> B.strong (docHToInlines isCode d) + DocHeader _ -> mempty + DocUnorderedList _ -> mempty + DocOrderedList _ -> mempty + DocDefList _ -> mempty + DocCodeBlock _ -> mempty + DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h) + (maybe (B.text $ hyperlinkUrl h) B.text $ hyperlinkLabel h) + DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p) + (maybe mempty B.text $ pictureTitle p) + DocAName s -> B.spanWith (s,["anchor"],[]) mempty + DocProperty _ -> mempty + DocExamples _ -> mempty + +-- | Create an 'Example', stripping superfluous characters as appropriate +makeExample :: String -> String -> [String] -> Blocks +makeExample prompt expression result = + B.para $ B.codeWith ("",["prompt"],[]) prompt + <> B.space + <> B.codeWith ([], ["haskell","expr"], []) (trim expression) + <> B.linebreak + <> (mconcat $ intersperse B.linebreak $ map coder result') + where + -- 1. drop trailing whitespace from the prompt, remember the prefix + prefix = takeWhile (`elem` " \t") prompt + + -- 2. drop, if possible, the exact same sequence of whitespace + -- characters from each result line + -- + -- 3. interpret lines that only contain the string "" as an + -- empty line + result' = map (substituteBlankLine . tryStripPrefix prefix) result + where + tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys + + substituteBlankLine "" = "" + substituteBlankLine line = line + coder = B.codeWith ([], ["result"], []) + diff --git a/tests/haddock-reader.haddock b/tests/haddock-reader.haddock index c4f6d6c36..c3ef0c9fc 100644 --- a/tests/haddock-reader.haddock +++ b/tests/haddock-reader.haddock @@ -18,10 +18,10 @@ This is a code block: This is another code block: @ - f x = x + x. - The \@...\@ code block /interprets markup normally/. - "Module.Foo" - \"Hello World\" +f x = x + x. +The \@...\@ code block /interprets markup normally/. +"Module.Foo" +\"Hello World\" @ Haddock supports REPL examples: @@ -42,21 +42,21 @@ This is a reference to the "Foo" module. This is a bulleted list: - * first item + * first item - * second item + * second item This is an enumerated list: - (1) first item + (1) first item - 2. second item + 2. second item This is a definition list: - [@foo@] The description of @foo@. + [@foo@] The description of @foo@. - [@bar@] The description of @bar@. + [@bar@] The description of @bar@. Here is a link: diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index 877719b50..8edb0b29a 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -4,13 +4,13 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "*",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph,",Space,Str "not",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str ">",Space,Str "This",Space,Str "sentence",Space,Str "is",Space,Str "not",Space,Str "code.",Space,Str ">>>",Space,Str "This",Space,Str "is",Space,Str "not",Space,Str "an",Space,Str "example."] ,Para [Str "The",Space,Str "references",Space,Str "\955,",Space,Str "\955",Space,Str "and",Space,Str "\955",Space,Str "all",Space,Str "represent",Space,Str "the",Space,Str "lower-case",Space,Str "letter",Space,Str "lambda."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "code",Space,Str "block:"] -,CodeBlock ("",["haskell"],[]) " map :: (a -> b) -> [a] -> [b]\n map _ [] = []\n map f (x:xs) = f x : map f xs\n" +,CodeBlock ("",["haskell"],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" ,Para [Str "This",Space,Str "is",Space,Str "another",Space,Str "code",Space,Str "block:"] -,Para [Code ("",[],[]) "f",Space,Code ("",[],[]) "x",Space,Code ("",[],[]) "=",Space,Code ("",[],[]) "x",Space,Code ("",[],[]) "+",Space,Code ("",[],[]) "x.",Space,Code ("",[],[]) "The",Space,Code ("",[],[]) "@...@",Space,Code ("",[],[]) "code",Space,Code ("",[],[]) "block",Space,Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Space,Code ("",["haskell"],[]) "Module.Foo",Space,Code ("",[],[]) "\"Hello",Space,Code ("",[],[]) "World\""] +,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] ,Para [Str "Haddock",Space,Str "supports",Space,Str "REPL",Space,Str "examples:"] ,Para [Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] ,Para [Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] -,Para [Str "That",Space,Str "was",Space,Emph [Str "really cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib",Space,Code ("",[],[]) "10",Space,Code ("",[],[]) "=",Space,Code ("",[],[]) "55",Str "."] +,Para [Str "That",Space,Str "was",Space,Emph [Str "really",Space,Str "cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib 10 = 55",Str "."] ,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",["haskell"],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",["haskell"],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",["haskell"],[]) "Foo",Space,Str "module."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "bulleted",Space,Str "list:"] ,BulletList -- cgit v1.2.3 From cf15b929f833ea31b35bafb40f782e113546caa0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 17:55:21 -0700 Subject: Added haddock writer tests. --- pandoc.cabal | 2 + tests/Tests/Old.hs | 3 +- tests/haddock-reader.native | 4 +- tests/tables.haddock | 72 +++++ tests/writer.haddock | 660 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 738 insertions(+), 3 deletions(-) create mode 100644 tests/tables.haddock create mode 100644 tests/writer.haddock (limited to 'tests') diff --git a/pandoc.cabal b/pandoc.cabal index b064f39a7..a0312e95b 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -135,6 +135,7 @@ Extra-Source-Files: tests/tables.opendocument, tests/tables.org, tests/tables.asciidoc, + tests/tables.haddock, tests/tables.texinfo, tests/tables.rst, tests/tables.rtf, @@ -156,6 +157,7 @@ Extra-Source-Files: tests/writer.opendocument, tests/writer.org, tests/writer.asciidoc, + tests/writer.haddock, tests/writer.rst, tests/writer.rtf, tests/writer.texinfo, diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 424e1b7c5..fa01b1358 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -131,7 +131,8 @@ tests = [ testGroup "markdown" "opml-reader.opml" "opml-reader.native" ] , testGroup "haddock" - [ test "reader" ["-r", "haddock", "-w", "native", "-s"] + [ testGroup "writer" $ writerTests "haddock" + , test "reader" ["-r", "haddock", "-w", "native", "-s"] "haddock-reader.haddock" "haddock-reader.native" ] , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index 8edb0b29a..f50fae4ec 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -4,14 +4,14 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "*",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph,",Space,Str "not",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str ">",Space,Str "This",Space,Str "sentence",Space,Str "is",Space,Str "not",Space,Str "code.",Space,Str ">>>",Space,Str "This",Space,Str "is",Space,Str "not",Space,Str "an",Space,Str "example."] ,Para [Str "The",Space,Str "references",Space,Str "\955,",Space,Str "\955",Space,Str "and",Space,Str "\955",Space,Str "all",Space,Str "represent",Space,Str "the",Space,Str "lower-case",Space,Str "letter",Space,Str "lambda."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "code",Space,Str "block:"] -,CodeBlock ("",["haskell"],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" +,CodeBlock ("",[],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" ,Para [Str "This",Space,Str "is",Space,Str "another",Space,Str "code",Space,Str "block:"] ,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] ,Para [Str "Haddock",Space,Str "supports",Space,Str "REPL",Space,Str "examples:"] ,Para [Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] ,Para [Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] ,Para [Str "That",Space,Str "was",Space,Emph [Str "really",Space,Str "cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib 10 = 55",Str "."] -,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",["haskell"],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",["haskell"],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",["haskell"],[]) "Foo",Space,Str "module."] +,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",[],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",[],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",[],[]) "Foo",Space,Str "module."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "bulleted",Space,Str "list:"] ,BulletList [[Para [Str "first",Space,Str "item"]] diff --git a/tests/tables.haddock b/tests/tables.haddock new file mode 100644 index 000000000..e054dc072 --- /dev/null +++ b/tests/tables.haddock @@ -0,0 +1,72 @@ +Simple table with caption: + +> Right Left Center Default +> ------- ------ -------- --------- +> 12 12 12 12 +> 123 123 123 123 +> 1 1 1 1 +> +Simple table without caption: + +> Right Left Center Default +> ------- ------ -------- --------- +> 12 12 12 12 +> 123 123 123 123 +> 1 1 1 1 +> +Simple table indented two spaces: + +> Right Left Center Default +> ------- ------ -------- --------- +> 12 12 12 12 +> 123 123 123 123 +> 1 1 1 1 +> +Multiline table with caption: + +> -------------------------------------------------------------- +> Centered Left Right Default aligned +> Header Aligned Aligned +> ----------- ---------- ------------ -------------------------- +> First row 12.0 Example of a row that +> spans multiple lines. +> +> Second row 5.0 Here\'s another one. Note +> the blank line between +> rows. +> -------------------------------------------------------------- +> +Multiline table without caption: + +> -------------------------------------------------------------- +> Centered Left Right Default aligned +> Header Aligned Aligned +> ----------- ---------- ------------ -------------------------- +> First row 12.0 Example of a row that +> spans multiple lines. +> +> Second row 5.0 Here\'s another one. Note +> the blank line between +> rows. +> -------------------------------------------------------------- +> +Table without column headers: + +> ----- ----- ----- ----- +> 12 12 12 12 +> 123 123 123 123 +> 1 1 1 1 +> ----- ----- ----- ----- +> +Multiline table without column headers: + +> ----------- ---------- ------------ -------------------------- +> First row 12.0 Example of a row that +> spans multiple lines. +> +> Second row 5.0 Here\'s another one. Note +> the blank line between +> rows. +> ----------- ---------- ------------ -------------------------- +> + diff --git a/tests/writer.haddock b/tests/writer.haddock new file mode 100644 index 000000000..7b0811220 --- /dev/null +++ b/tests/writer.haddock @@ -0,0 +1,660 @@ +This is a set of tests for pandoc. Most of them are adapted from John Gruber’s +markdown test suite. + +-------------- + += Headers +#headers# + +== Level 2 with an +#level-2-with-an-embedded-link# + +=== Level 3 with /emphasis/ +#level-3-with-emphasis# + +==== Level 4 +#level-4# + +===== Level 5 +#level-5# + += Level 1 +#level-1# + +== Level 2 with /emphasis/ +#level-2-with-emphasis# + +=== Level 3 +#level-3# + +with no blank line + +== Level 2 +#level-2# + +with no blank line + +-------------- + += Paragraphs +#paragraphs# + +Here’s a regular paragraph. + +In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. +Because a hard-wrapped line in the middle of a paragraph looked like a list +item. + +Here’s one with a bullet. * criminey. + +There should be a hard line break +here. + +-------------- + += Block Quotes +#block-quotes# + +E-mail style: + +This is a block quote. It is pretty short. + +Code in a block quote: + +> sub status { +> print "working"; +> } + +A list: + +1. item one +2. item two + +Nested block quotes: + +nested + +nested + +This should not be a block quote: 2 > 1. + +And a following paragraph. + +-------------- + += Code Blocks +#code-blocks# + +Code: + +> ---- (should be four hyphens) +> +> sub status { +> print "working"; +> } +> +> this code block is indented by one tab + +And: + +> this code block is indented by two tabs +> +> These should not be escaped: \$ \\ \> \[ \{ + +-------------- + += Lists +#lists# + +== Unordered +#unordered# + +Asterisks tight: + +- asterisk 1 +- asterisk 2 +- asterisk 3 + +Asterisks loose: + +- asterisk 1 + +- asterisk 2 + +- asterisk 3 + +Pluses tight: + +- Plus 1 +- Plus 2 +- Plus 3 + +Pluses loose: + +- Plus 1 + +- Plus 2 + +- Plus 3 + +Minuses tight: + +- Minus 1 +- Minus 2 +- Minus 3 + +Minuses loose: + +- Minus 1 + +- Minus 2 + +- Minus 3 + +== Ordered +#ordered# + +Tight: + +1. First +2. Second +3. Third + +and: + +1. One +2. Two +3. Three + +Loose using tabs: + +1. First + +2. Second + +3. Third + +and using spaces: + +1. One + +2. Two + +3. Three + +Multiple paragraphs: + +1. Item 1, graf one. + + Item 1. graf two. The quick brown fox jumped over the lazy dog’s back. + +2. Item 2. + +3. Item 3. + +== Nested +#nested# + +- Tab + - Tab + - Tab + +Here’s another: + +1. First +2. Second: + - Fee + - Fie + - Foe + +3. Third + +Same thing but with paragraphs: + +1. First + +2. Second: + + - Fee + - Fie + - Foe + +3. Third + +== Tabs and spaces +#tabs-and-spaces# + +- this is a list item indented with tabs + +- this is a list item indented with spaces + + - this is an example list item indented with tabs + + - this is an example list item indented with spaces + +== Fancy list markers +#fancy-list-markers# + +(2) begins with 2 +(3) and now 3 + + with a continuation + + iv. sublist with roman numerals, starting with 4 + v. more items + (A) a subsublist + (B) a subsublist + +Nesting: + +A. Upper Alpha + I. Upper Roman. + (6) Decimal start with 6 + c) Lower alpha with paren + +Autonumbering: + +1. Autonumber. +2. More. + 1. Nested. + +Should not be a list item: + +M.A. 2007 + +B. Williams + +-------------- + += Definition Lists +#definition-lists# + +Tight using spaces: + +[apple] + red fruit +[orange] + orange fruit +[banana] + yellow fruit + +Tight using tabs: + +[apple] + red fruit +[orange] + orange fruit +[banana] + yellow fruit + +Loose: + +[apple] + red fruit + +[orange] + orange fruit + +[banana] + yellow fruit + +Multiple blocks with italics: + +[/apple/] + red fruit + + contains seeds, crisp, pleasant to taste + +[/orange/] + orange fruit + + > { orange code block } + + orange block quote + +Multiple definitions, tight: + +[apple] + red fruit + computer +[orange] + orange fruit + bank + +Multiple definitions, loose: + +[apple] + red fruit + + computer + +[orange] + orange fruit + + bank + +Blank line after term, indented marker, alternate markers: + +[apple] + red fruit + + computer + +[orange] + orange fruit + + 1. sublist + 2. sublist + += HTML Blocks +#html-blocks# + +Simple block on one line: + +foo + +And nested without indentation: + +foo + +bar + +Interpreted markdown in a table: + +This is /emphasized/ +And this is __strong__ +Here’s a simple block: + +foo + +This should be a code block, though: + +> +> foo +> + +As should this: + +> foo + +Now, nested: + +foo + +This should just be an HTML comment: + +Multiline: + +Code block: + +> + +Just plain comment, with trailing spaces on the line: + +Code: + +> + +Hr’s: + +-------------- + += Inline Markup +#inline-markup# + +This is /emphasized/, and so /is this/. + +This is __strong__, and so __is this__. + +An //. + +__/This is strong and em./__ + +So is __/this/__ word. + +__/This is strong and em./__ + +So is __/this/__ word. + +This is code: @>@, @$@, @\\@, @\\$@, @\@. + +~~This is /strikeout/.~~ + +Superscripts: abcd a/hello/ ahello there. + +Subscripts: H2O, H23O, Hmany of themO. + +These should not be superscripts or subscripts, because of the unescaped +spaces: a^b c^d, a~b c~d. + +-------------- + += Smart quotes, ellipses, dashes +#smart-quotes-ellipses-dashes# + +“Hello,” said the spider. “‘Shelob’ is my name.” + +‘A’, ‘B’, and ‘C’ are letters. + +‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’ + +‘He said, “I want to go.”’ Were you alive in the 70’s? + +Here is some quoted ‘@code@’ and a +“”. + +Some dashes: one—two — three—four — five. + +Dashes between numbers: 5–7, 255–66, 1987–1999. + +Ellipses…and…and…. + +-------------- + += LaTeX +#latex# + +- +- 2 + 2 = 4 +- /x/ ∈ /y/ +- /α/ ∧ /ω/ +- 223 +- /p/-Tree +- Here’s some display math: + $$\\frac{d}{dx}f(x)=\\lim_{h\\to 0}\\frac{f(x+h)-f(x)}{h}$$ +- Here’s one that has a line break in it: /α/ + /ω/ × /x/2. + +These shouldn’t be math: + +- To get the famous equation, write @$e = mc^2$@. +- $22,000 is a /lot/ of money. So is $34,000. (It worked if “lot” is + emphasized.) +- Shoes ($20) and socks ($5). +- Escaped @$@: $73 /this should be emphasized/ 23$. + +Here’s a LaTeX table: + +-------------- + += Special Characters +#special-characters# + +Here is some unicode: + +- I hat: Î +- o umlaut: ö +- section: § +- set membership: ∈ +- copyright: © + +AT&T has an ampersand in their name. + +AT&T is another way to write it. + +This & that. + +4 \< 5. + +6 > 5. + +Backslash: \\ + +Backtick: \` + +Asterisk: * + +Underscore: _ + +Left brace: { + +Right brace: } + +Left bracket: [ + +Right bracket: ] + +Left paren: ( + +Right paren: ) + +Greater-than: > + +Hash: # + +Period: . + +Bang: ! + +Plus: + + +Minus: - + +-------------- + += Links +#links# + +== Explicit +#explicit# + +Just a . + +. + +. + +. + + + + + + + + + +< Empty>. + +== Reference +#reference# + +Foo . + +Foo . + +Foo . + +With . + + by itself should be a link. + +Indented . + +Indented . + +Indented . + +This should [not][] be a link. + +> [not]: /url + +Foo . + +Foo . + +== With ampersands +#with-ampersands# + +Here’s a . + +Here’s a link with an amersand in the link text: . + +Here’s an . + +Here’s an . + +== Autolinks +#autolinks# + +With an ampersand: + +- In a list? +- +- It should. + +An e-mail address: + +Blockquoted: + +Auto-links should not occur here: @\@ + +> or here: + +-------------- + += Images +#images# + +From “Voyage dans la Lune” by Georges Melies (1902): + +<> + +Here is a movie <> icon. + +-------------- + += Footnotes +#footnotes# + +Here is a footnote reference,<#notes [1]> and another.<#notes [2]> This should +/not/ be a footnote reference, because it contains a space.[^my note] Here is +an inline note.<#notes [3]> + +Notes can go in quotes.<#notes [4]> + +1. And in list items.<#notes [5]> + +This paragraph should not be part of the note, as it is not indented. + +#notes# + +1. Here is the footnote. It can go anywhere after the footnote reference. It + need not be placed at the end of the document. + +2. Here’s the long note. This one contains multiple blocks. + + Subsequent blocks are indented to show that they belong to the footnote + (as with list items). + + > { } + + If you want, you can indent every line, but you can also be lazy and just + indent the first line of each block. + +3. This is /easier/ to type. Inline notes may contain + and @]@ verbatim characters, as well as + [bracketed text]. + +4. In quote. + +5. In list. -- cgit v1.2.3 From bc037b69a4e4868c9a5eab5b0ee12a41118da02a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:04:08 -0700 Subject: Revised haddock reader tests for changes in reader. --- tests/haddock-reader.native | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tests') diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index f50fae4ec..b62189046 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -6,12 +6,12 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "code",Space,Str "block:"] ,CodeBlock ("",[],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" ,Para [Str "This",Space,Str "is",Space,Str "another",Space,Str "code",Space,Str "block:"] -,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] +,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell","module"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] ,Para [Str "Haddock",Space,Str "supports",Space,Str "REPL",Space,Str "examples:"] -,Para [Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] -,Para [Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] +,Para [Code ("",["prompt"],[]) ">>>",Space,Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] +,Para [Code ("",["prompt"],[]) ">>>",Space,Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] ,Para [Str "That",Space,Str "was",Space,Emph [Str "really",Space,Str "cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib 10 = 55",Str "."] -,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",[],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",[],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",[],[]) "Foo",Space,Str "module."] +,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",["haskell","identifier"],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",["haskell","identifier"],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",["haskell","module"],[]) "Foo",Space,Str "module."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "bulleted",Space,Str "list:"] ,BulletList [[Para [Str "first",Space,Str "item"]] @@ -23,9 +23,9 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "definition",Space,Str "list:"] ,DefinitionList [([Code ("",[],[]) "foo"], - [[Plain [Str "The",Space,Str "description",Space,Str "of",Space,Code ("",[],[]) "foo",Str "."]]]) + [[Para [Str "The",Space,Str "description",Space,Str "of",Space,Code ("",[],[]) "foo",Str "."]]]) ,([Code ("",[],[]) "bar"], - [[Plain [Str "The",Space,Str "description",Space,Str "of",Space,Code ("",[],[]) "bar",Str "."]]])] + [[Para [Str "The",Space,Str "description",Space,Str "of",Space,Code ("",[],[]) "bar",Str "."]]])] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "link:",Space,Link [Str "http://haskell.org"] ("http://haskell.org","http://haskell.org")] ,Para [Link [Str "Haskell"] ("http://haskell.org","http://haskell.org"),Space,Str "is",Space,Str "a",Space,Str "fun",Space,Str "language!"] ,Para [Link [Str "Click",Space,Str "Here!"] ("http://example.com","http://example.com")]] -- cgit v1.2.3 From c4182b39ca009f02fc4e0768056d37d64b93df7c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:08:41 -0700 Subject: Small fix to haddock "tables". --- src/Text/Pandoc/Writers/Haddock.hs | 4 ++-- tests/tables.haddock | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 36f57c2b7..1939d3a6d 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -129,7 +129,7 @@ blockToHaddock opts (BlockQuote blocks) = -- Haddock doesn't have tables. Use haddock tables in code. blockToHaddock opts (Table caption aligns widths headers rows) = do caption' <- inlineListToHaddock opts caption - let caption'' = if null caption || not (isEnabled Ext_table_captions opts) + let caption'' = if null caption then empty else blankline <> caption' <> blankline rawHeaders <- mapM (blockListToHaddock opts) headers @@ -148,7 +148,7 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do | otherwise -> fmap (id,) $ gridTable opts (all null headers) aligns widths rawHeaders rawRows - return $ prefixed "> " $ nst $ tbl $$ blankline $$ caption'' $$ blankline + return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline blockToHaddock opts (BulletList items) = do contents <- mapM (bulletListItemToHaddock opts) items return $ cat contents <> blankline diff --git a/tests/tables.haddock b/tests/tables.haddock index e054dc072..413ec97ad 100644 --- a/tests/tables.haddock +++ b/tests/tables.haddock @@ -6,6 +6,8 @@ Simple table with caption: > 123 123 123 123 > 1 1 1 1 > +> Demonstration of simple table syntax. + Simple table without caption: > Right Left Center Default @@ -13,7 +15,7 @@ Simple table without caption: > 12 12 12 12 > 123 123 123 123 > 1 1 1 1 -> + Simple table indented two spaces: > Right Left Center Default @@ -22,6 +24,8 @@ Simple table indented two spaces: > 123 123 123 123 > 1 1 1 1 > +> Demonstration of simple table syntax. + Multiline table with caption: > -------------------------------------------------------------- @@ -36,6 +40,8 @@ Multiline table with caption: > rows. > -------------------------------------------------------------- > +> Here\'s the caption. It may span multiple lines. + Multiline table without caption: > -------------------------------------------------------------- @@ -49,7 +55,7 @@ Multiline table without caption: > the blank line between > rows. > -------------------------------------------------------------- -> + Table without column headers: > ----- ----- ----- ----- @@ -57,7 +63,7 @@ Table without column headers: > 123 123 123 123 > 1 1 1 1 > ----- ----- ----- ----- -> + Multiline table without column headers: > ----------- ---------- ------------ -------------------------- @@ -68,5 +74,4 @@ Multiline table without column headers: > the blank line between > rows. > ----------- ---------- ------------ -------------------------- -> -- cgit v1.2.3 From 95b6ffcef6428318dd9ca25be6ce6e113ef3c499 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:11:08 -0700 Subject: Updated haddock writer tests. --- tests/writer.haddock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tests') diff --git a/tests/writer.haddock b/tests/writer.haddock index 7b0811220..129242153 100644 --- a/tests/writer.haddock +++ b/tests/writer.haddock @@ -240,17 +240,17 @@ Same thing but with paragraphs: with a continuation - iv. sublist with roman numerals, starting with 4 - v. more items - (A) a subsublist - (B) a subsublist + 4. sublist with roman numerals, starting with 4 + 5. more items + (1) a subsublist + (2) a subsublist Nesting: -A. Upper Alpha - I. Upper Roman. +1. Upper Alpha + 1. Upper Roman. (6) Decimal start with 6 - c) Lower alpha with paren + 3) Lower alpha with paren Autonumbering: -- cgit v1.2.3 From 00281559bf9c955ece6b18d48ef487fdc5f4406e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 00:28:23 -0700 Subject: Haddock writer: Use _____ for hrule. Avoids interpretation as list. --- src/Text/Pandoc/Writers/Haddock.hs | 4 ++-- tests/writer.haddock | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 59d979ea8..1c82839d0 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -112,8 +112,8 @@ blockToHaddock _ (RawBlock f str) | f == "haddock" = do return $ text str <> text "\n" | otherwise = return empty -blockToHaddock _ HorizontalRule = - return $ blankline <> text "--------------" <> blankline +blockToHaddock opts HorizontalRule = + return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline blockToHaddock opts (Header level (ident,_,_) inlines) = do contents <- inlineListToHaddock opts inlines let attr' = if null ident diff --git a/tests/writer.haddock b/tests/writer.haddock index 129242153..0772331e3 100644 --- a/tests/writer.haddock +++ b/tests/writer.haddock @@ -1,7 +1,7 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite. --------------- +______________________________________________________________________________ = Headers #headers# @@ -34,7 +34,7 @@ with no blank line with no blank line --------------- +______________________________________________________________________________ = Paragraphs #paragraphs# @@ -50,7 +50,7 @@ Here’s one with a bullet. * criminey. There should be a hard line break here. --------------- +______________________________________________________________________________ = Block Quotes #block-quotes# @@ -80,7 +80,7 @@ This should not be a block quote: 2 > 1. And a following paragraph. --------------- +______________________________________________________________________________ = Code Blocks #code-blocks# @@ -101,7 +101,7 @@ And: > > These should not be escaped: \$ \\ \> \[ \{ --------------- +______________________________________________________________________________ = Lists #lists# @@ -264,7 +264,7 @@ M.A. 2007 B. Williams --------------- +______________________________________________________________________________ = Definition Lists #definition-lists# @@ -397,7 +397,7 @@ Code: Hr’s: --------------- +______________________________________________________________________________ = Inline Markup #inline-markup# @@ -427,7 +427,7 @@ Subscripts: H2O, H23O, Hmany of themO. These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d. --------------- +______________________________________________________________________________ = Smart quotes, ellipses, dashes #smart-quotes-ellipses-dashes# @@ -449,7 +449,7 @@ Dashes between numbers: 5–7, 255–66, 1987–1999. Ellipses…and…and…. --------------- +______________________________________________________________________________ = LaTeX #latex# @@ -474,7 +474,7 @@ These shouldn’t be math: Here’s a LaTeX table: --------------- +______________________________________________________________________________ = Special Characters #special-characters# @@ -529,7 +529,7 @@ Plus: + Minus: - --------------- +______________________________________________________________________________ = Links #links# @@ -610,7 +610,7 @@ Auto-links should not occur here: @\@ > or here: --------------- +______________________________________________________________________________ = Images #images# @@ -621,7 +621,7 @@ From “Voyage dans la Lune” by Georges Melies (1902): Here is a movie <> icon. --------------- +______________________________________________________________________________ = Footnotes #footnotes# -- cgit v1.2.3 From ceb742b1246f975437d7a0083139d248c94036b5 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 12:05:16 -0400 Subject: Add ReaderOptions to the docx tests This will allow for testing different media embedding (in addition to any other applicable options.) --- tests/Tests/Readers/Docx.hs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 0a963ddc6..273f03f4d 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -8,20 +8,23 @@ import Test.Framework import qualified Data.ByteString.Lazy as B import Text.Pandoc.Readers.Docx -compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) -compareOutput docxFile nativeFile = do +compareOutput :: ReaderOptions -> FilePath -> FilePath -> IO (Pandoc, Pandoc) +compareOutput opts docxFile nativeFile = do df <- B.readFile docxFile nf <- Prelude.readFile nativeFile - return $ (readDocx def df, readNative nf) + return $ (readDocx opts df, readNative nf) -testCompare' :: String -> FilePath -> FilePath -> IO Test -testCompare' name docxFile nativeFile = do - (dp, np) <- compareOutput docxFile nativeFile +testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO Test +testCompareWithOptsIO opts name docxFile nativeFile = do + (dp, np) <- compareOutput opts docxFile nativeFile return $ test id name (dp, np) +testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> Test +testCompareWithOpts opts name docxFile nativeFile = + buildTest $ testCompareWithOptsIO opts name docxFile nativeFile + testCompare :: String -> FilePath -> FilePath -> Test -testCompare name docxFile nativeFile = - buildTest $ testCompare' name docxFile nativeFile +testCompare = testCompareWithOpts def tests :: [Test] -- cgit v1.2.3 From d19996d7438fbd2ce56cf3ce46b99cd71437cacb Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:29:59 -0400 Subject: Fix notes test. This previously allowed spaces at the beginning of a paragraph. --- tests/docx.notes.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/docx.notes.native b/tests/docx.notes.native index 1e9b6bba4..5a94b1999 100644 --- a/tests/docx.notes.native +++ b/tests/docx.notes.native @@ -1,2 +1,2 @@ [Header 2 ("",[],[]) [Str "A",Space,Str "footnote"] -,Para [Str "Test",Space,Str "footnote.",Note [Para [Space,Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Space,Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] +,Para [Str "Test",Space,Str "footnote.",Note [Para [Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] -- cgit v1.2.3 From da0d1d27ac98ca28e66bc2df3de2bce738068fb8 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:33:22 -0400 Subject: Add tabs tests. --- tests/Tests/Readers/Docx.hs | 7 ++++++- tests/docx.tabs.docx | Bin 0 -> 12919 bytes tests/docx.tabs.native | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/docx.tabs.docx create mode 100644 tests/docx.tabs.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 273f03f4d..3a13641a9 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -44,7 +44,12 @@ tests = [ testGroup "inlines" , testCompare "handling unicode input" "docx.unicode.docx" - "docx.unicode.native"] + "docx.unicode.native" + , testCompare + "literal tabs" + "docx.tabs.docx" + "docx.tabs.native" + ] , testGroup "blocks" [ testCompare "headers" diff --git a/tests/docx.tabs.docx b/tests/docx.tabs.docx new file mode 100644 index 000000000..6ff5f4bb1 Binary files /dev/null and b/tests/docx.tabs.docx differ diff --git a/tests/docx.tabs.native b/tests/docx.tabs.native new file mode 100644 index 000000000..05461f20b --- /dev/null +++ b/tests/docx.tabs.native @@ -0,0 +1,2 @@ +[Para [Str "Some",Space,Str "text",Space,Str "separated",Space,Str "by",Space,Str "a",Space,Str "tab."] +,Para [Str "Tab-indented",Space,Str "text."]] -- cgit v1.2.3 From 12efffa85a257dbe81137f97334b2c6a7e072777 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:24:30 -0700 Subject: LaTeX writer: Fixed strikeout + highlighted code. Closes #1294. Previously strikeout highlighted code caused an error. --- src/Text/Pandoc/Writers/LaTeX.hs | 11 ++++++++++- tests/Tests/Writers/LaTeX.hs | 12 +++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index c221b318e..ed735242f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -678,7 +678,9 @@ inlineToLaTeX (Emph lst) = inlineToLaTeX (Strong lst) = inlineListToLaTeX lst >>= return . inCmd "textbf" inlineToLaTeX (Strikeout lst) = do - contents <- inlineListToLaTeX lst + -- we need to protect VERB in an mbox or we get an error + -- see #1294 + contents <- inlineListToLaTeX $ protectCode lst modify $ \s -> s{ stStrikeout = True } return $ inCmd "sout" contents inlineToLaTeX (Superscript lst) = @@ -784,6 +786,13 @@ inlineToLaTeX (Note contents) = do -- note: a \n before } needed when note ends with a Verbatim environment else "\\footnote" <> braces noteContents +protectCode :: [Inline] -> [Inline] +protectCode [] = [] +protectCode (x@(Code ("",[],[]) _) : xs) = x : protectCode xs +protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs + where ltx = RawInline (Format "latex") +protectCode (x : xs) = x : protectCode xs + citationsToNatbib :: [Citation] -> State WriterState Doc citationsToNatbib (one:[]) = citeCommand c p s k diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index 8a9519e2e..6db6542a0 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -8,7 +8,7 @@ import Tests.Helpers import Tests.Arbitrary() latex :: (ToString a, ToPandoc a) => a -> String -latex = writeLaTeX def . toPandoc +latex = writeLaTeX def{ writerHighlight = True } . toPandoc latexListing :: (ToString a, ToPandoc a) => a -> String latexListing = writeLaTeX def{ writerListings = True } . toPandoc @@ -54,4 +54,14 @@ tests = [ testGroup "code blocks" (text "Header 1" <> note (plain $ text "note")) =?> "\\section*{Header 1\\footnote{note}}\\label{foo}\n\\addcontentsline{toc}{section}{Header 1}\n" ] + , testGroup "inline code" + [ "struck out and highlighted" =: + strikeout (codeWith ("",["haskell"],[]) "foo" <> space + <> str "bar") =?> + "\\sout{\\mbox{\\VERB|\\NormalTok{foo}|} bar}" + , "struck out and not highlighted" =: + strikeout (code "foo" <> space + <> str "bar") =?> + "\\sout{\\texttt{foo} bar}" + ] ] -- cgit v1.2.3 From a4508d7fcff0fc80af7b9d03177679860f4d00e6 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 18:26:15 -0400 Subject: Docx reader tests: Introduce NoNormPandoc type. This is just a wrapper around Pandoc that doesn't normalize with `toString`. We want to make sure that our own normalization process works. If, in the future, we are able to hook into the builder's normalization, this will be removed. --- tests/Tests/Readers/Docx.hs | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 3a13641a9..e8fa33241 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -7,12 +7,37 @@ import Tests.Helpers import Test.Framework import qualified Data.ByteString.Lazy as B import Text.Pandoc.Readers.Docx +import Text.Pandoc.Writers.Native (writeNative) +import qualified Data.Map as M -compareOutput :: ReaderOptions -> FilePath -> FilePath -> IO (Pandoc, Pandoc) +-- We define a wrapper around pandoc that doesn't normalize in the +-- tests. Since we do our own normalization, we want to make sure +-- we're doing it right. + +data NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc} + deriving Show + +noNorm :: Pandoc -> NoNormPandoc +noNorm = NoNormPandoc + +instance ToString NoNormPandoc where + toString d = writeNative def{ writerStandalone = s } $ toPandoc d + where s = case d of + NoNormPandoc (Pandoc (Meta m) _) + | M.null m -> False + | otherwise -> True + +instance ToPandoc NoNormPandoc where + toPandoc = unNoNorm + +compareOutput :: ReaderOptions + -> FilePath + -> FilePath + -> IO (NoNormPandoc, NoNormPandoc) compareOutput opts docxFile nativeFile = do df <- B.readFile docxFile nf <- Prelude.readFile nativeFile - return $ (readDocx opts df, readNative nf) + return $ (noNorm (readDocx opts df), noNorm (readNative nf)) testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO Test testCompareWithOptsIO opts name docxFile nativeFile = do -- cgit v1.2.3 From ca4add679ce6dd438cc3f6d58f82d04a9ad6305e Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 21 Jun 2014 17:58:32 -0400 Subject: Add normalization test. Add torture-test for new normalization functions. One problem that this test demonstrates is that word has a tendency to turn off formatting at a space, and then turn it back on after. I'm not sure yet whether this is something we should fix. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.normalize.docx | Bin 0 -> 25994 bytes tests/docx.normalize.native | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 tests/docx.normalize.docx create mode 100644 tests/docx.normalize.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index e8fa33241..74184efc6 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -74,6 +74,10 @@ tests = [ testGroup "inlines" "literal tabs" "docx.tabs.docx" "docx.tabs.native" + , testCompare + "normalizing inlines" + "docx.normalize.docx" + "docx.normalize.native" ] , testGroup "blocks" [ testCompare diff --git a/tests/docx.normalize.docx b/tests/docx.normalize.docx new file mode 100644 index 000000000..5e4370a47 Binary files /dev/null and b/tests/docx.normalize.docx differ diff --git a/tests/docx.normalize.native b/tests/docx.normalize.native new file mode 100644 index 000000000..fa34d0581 --- /dev/null +++ b/tests/docx.normalize.native @@ -0,0 +1,2 @@ +[Para [Str "These",Space,Str "are",Space,Str "different",Space,Str "fonts."] +,Para [Strong [Str "These",Space,Emph [Str "are"]],Space,Strong [Emph [Strikeout [Str "different"]],Space,Str "fonts."]]] -- cgit v1.2.3 From b3df3a38611fe4fd03fa2d4e38ba45ae7cf8fe08 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 22 Jun 2014 01:56:33 -0400 Subject: Docx reader tests: Correct normalize test. --- tests/docx.normalize.native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/docx.normalize.native b/tests/docx.normalize.native index fa34d0581..aeba672c4 100644 --- a/tests/docx.normalize.native +++ b/tests/docx.normalize.native @@ -1,2 +1,2 @@ [Para [Str "These",Space,Str "are",Space,Str "different",Space,Str "fonts."] -,Para [Strong [Str "These",Space,Emph [Str "are"]],Space,Strong [Emph [Strikeout [Str "different"]],Space,Str "fonts."]]] +,Para [Strong [Str "These",Space,Emph [Str "are",Space,Strikeout [Str "different"]],Space,Str "fonts."]]] -- cgit v1.2.3 From ed43513087b514a5240fde04784dbf8709182513 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 22 Jun 2014 01:58:41 -0400 Subject: Docx reader tests: add tests for normalization deep in blocks. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.deep_normalize.docx | Bin 0 -> 29246 bytes tests/docx.deep_normalize.native | 6 ++++++ 3 files changed, 10 insertions(+) create mode 100644 tests/docx.deep_normalize.docx create mode 100644 tests/docx.deep_normalize.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 74184efc6..ffb079eee 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -78,6 +78,10 @@ tests = [ testGroup "inlines" "normalizing inlines" "docx.normalize.docx" "docx.normalize.native" + , testCompare + "normalizing inlines deep inside blocks" + "docx.deep_normalize.docx" + "docx.deep_normalize.native" ] , testGroup "blocks" [ testCompare diff --git a/tests/docx.deep_normalize.docx b/tests/docx.deep_normalize.docx new file mode 100644 index 000000000..7626d59ce Binary files /dev/null and b/tests/docx.deep_normalize.docx differ diff --git a/tests/docx.deep_normalize.native b/tests/docx.deep_normalize.native new file mode 100644 index 000000000..9b2089ec8 --- /dev/null +++ b/tests/docx.deep_normalize.native @@ -0,0 +1,6 @@ +[OrderedList (1,Decimal,OneParen) + [[Para [Str "This",Space,Str "is",Space,Str "at",Space,Str "the",Space,Str "first",Space,Str "level"] + ,OrderedList (1,LowerAlpha,DefaultDelim) + [[Para [Str "This",Space,Str "is",Space,Str "at",Space,Str "the",Space,Str "second",Space,Str "level"] + ,OrderedList (1,LowerRoman,DefaultDelim) + [[Para [Str "This",Space,Str "is",Space,Emph [Str "at",Space,Strong [Str "the",Space,Str "third",Space,Str "level"],Str ",",Space,Str "and",Space,Str "I",Space,Str "want",Space,Str "to"],Space,Str "test",Space,Str "normalization",Space,Str "here."]]]]]]]] -- cgit v1.2.3 From 87ab01637e1dc0f583277828bc458567a72e38ce Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 23 Jun 2014 12:51:10 -0700 Subject: LaTeX writer: Use `\textquotesingle` for `'` in inline code. Otherwise we get curly quotes in the PDF output. Closes #1364. --- src/Text/Pandoc/Writers/LaTeX.hs | 1 + tests/Tests/Writers/LaTeX.hs | 2 ++ 2 files changed, 3 insertions(+) (limited to 'tests') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f2f7438c4..100bf900d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -220,6 +220,7 @@ stringToLaTeX ctx (x:xs) = do '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as ']' -> "{]}" ++ rest -- optional arguments + '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest '\160' -> "~" ++ rest '\x2026' -> "\\ldots{}" ++ rest '\x2018' | ligatures -> "`" ++ rest diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index 6db6542a0..c32ded36d 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -63,5 +63,7 @@ tests = [ testGroup "code blocks" strikeout (code "foo" <> space <> str "bar") =?> "\\sout{\\texttt{foo} bar}" + , "single quotes" =: + code "dog's" =?> "\\texttt{dog\\textquotesingle{}s}" ] ] -- cgit v1.2.3 From 9b954fa855158d99b4ddba7c3ffe7f2fed7ce25f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:40:34 -0400 Subject: Add test for correctly trimming spaces in formatting. This used to be fixed in the tree-walking. We need to make sure we're doing it right now. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.trailing_spaces_in_formatting.docx | Bin 0 -> 12916 bytes tests/docx.trailing_spaces_in_formatting.native | 1 + 3 files changed, 5 insertions(+) create mode 100644 tests/docx.trailing_spaces_in_formatting.docx create mode 100644 tests/docx.trailing_spaces_in_formatting.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index ffb079eee..a42dc31e9 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -82,6 +82,10 @@ tests = [ testGroup "inlines" "normalizing inlines deep inside blocks" "docx.deep_normalize.docx" "docx.deep_normalize.native" + , testCompare + "move trailing spaces outside of formatting" + "docx.trailing_spaces_in_formatting.docx" + "docx.trailing_spaces_in_formatting.native" ] , testGroup "blocks" [ testCompare diff --git a/tests/docx.trailing_spaces_in_formatting.docx b/tests/docx.trailing_spaces_in_formatting.docx new file mode 100644 index 000000000..ebe7404a9 Binary files /dev/null and b/tests/docx.trailing_spaces_in_formatting.docx differ diff --git a/tests/docx.trailing_spaces_in_formatting.native b/tests/docx.trailing_spaces_in_formatting.native new file mode 100644 index 000000000..46ea9bca8 --- /dev/null +++ b/tests/docx.trailing_spaces_in_formatting.native @@ -0,0 +1 @@ +[Para [Str "Turn",Space,Str "my",Space,Emph [Str "formatting"],Space,Str "off",Space,Str "after",Space,Str "the",Space,Str "spaces."]] -- cgit v1.2.3 From 21295c5ab5567126c3112b9417f68c76b4f6debf Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 10:33:49 -0400 Subject: Docx reader: add failing tests for inline code and code blocks. --- tests/Tests/Readers/Docx.hs | 9 +++++++++ tests/docx.codeblock.docx | Bin 0 -> 8465 bytes tests/docx.codeblock.native | 3 +++ tests/docx.inline_code.docx | Bin 0 -> 8379 bytes tests/docx.inline_code.native | 1 + 5 files changed, 13 insertions(+) create mode 100644 tests/docx.codeblock.docx create mode 100644 tests/docx.codeblock.native create mode 100644 tests/docx.inline_code.docx create mode 100644 tests/docx.inline_code.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index a42dc31e9..c49bee14e 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -86,6 +86,10 @@ tests = [ testGroup "inlines" "move trailing spaces outside of formatting" "docx.trailing_spaces_in_formatting.docx" "docx.trailing_spaces_in_formatting.native" + , testCompare + "inline code (with VerbatimChar style)" + "docx.inline_code.docx" + "docx.inline_code.native" ] , testGroup "blocks" [ testCompare @@ -108,6 +112,11 @@ tests = [ testGroup "inlines" "tables" "docx.tables.docx" "docx.tables.native" + , testCompare + "code block" + "docx.codeblock.docx" + "docx.codeblock.native" + ] ] diff --git a/tests/docx.codeblock.docx b/tests/docx.codeblock.docx new file mode 100644 index 000000000..8ec00953c Binary files /dev/null and b/tests/docx.codeblock.docx differ diff --git a/tests/docx.codeblock.native b/tests/docx.codeblock.native new file mode 100644 index 000000000..441e33511 --- /dev/null +++ b/tests/docx.codeblock.native @@ -0,0 +1,3 @@ +[Para [Str "This",Space,Str "is",Space,Str "some",Space,Str "code:"] +,CodeBlock ("",[],[]) "readDocx :: ReaderOptions\n -> B.ByteString\n -> Pandoc" +,Para [Str "from",Space,Str "the",Space,Str "beginning",Space,Str "of",Space,Str "the",Space,Str "docx",Space,Str "reader."]] diff --git a/tests/docx.inline_code.docx b/tests/docx.inline_code.docx new file mode 100644 index 000000000..75c5ea3cb Binary files /dev/null and b/tests/docx.inline_code.docx differ diff --git a/tests/docx.inline_code.native b/tests/docx.inline_code.native new file mode 100644 index 000000000..11cf2777c --- /dev/null +++ b/tests/docx.inline_code.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "of",Space,Code ("",[],[]) "inline code",Space,Str "with",Space,Str "three",Space,Str "spaces."]] -- cgit v1.2.3 From 2621482d69d96a7f069133e57f6df8a479ad5111 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 11:48:23 -0400 Subject: Docx Reader: add failing defintion list tests. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.definition_list.docx | Bin 0 -> 8455 bytes tests/docx.definition_list.native | 7 +++++++ 3 files changed, 11 insertions(+) create mode 100644 tests/docx.definition_list.docx create mode 100644 tests/docx.definition_list.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index c49bee14e..4d062bbc0 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -100,6 +100,10 @@ tests = [ testGroup "inlines" "lists" "docx.lists.docx" "docx.lists.native" + , testCompare + "definition lists" + "docx.definition_list.docx" + "docx.definition_list.native" , testCompare "footnotes and endnotes" "docx.notes.docx" diff --git a/tests/docx.definition_list.docx b/tests/docx.definition_list.docx new file mode 100644 index 000000000..a19edda45 Binary files /dev/null and b/tests/docx.definition_list.docx differ diff --git a/tests/docx.definition_list.native b/tests/docx.definition_list.native new file mode 100644 index 000000000..2e08ff1ac --- /dev/null +++ b/tests/docx.definition_list.native @@ -0,0 +1,7 @@ +[DefinitionList + [([Str "Term",Space,Str "1"], + [[Para [Str "Definition",Space,Str "1"]]]) + ,([Str "Term",Space,Str "2",Space,Str "with",Space,Emph [Str "inline",Space,Str "markup"]], + [[Para [Str "Definition",Space,Str "2"] + ,CodeBlock ("",[],[]) "{ some code, part of Definition 2 }" + ,Para [Str "Third",Space,Str "paragraph",Space,Str "of",Space,Str "definition",Space,Str "2."]]])]] -- cgit v1.2.3 From a2b6ab847cb1c997c6ae7b8ed36f543a7ed90ecd Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 11:09:28 -0400 Subject: Docx reader: Add tests for basic track changes This is what seems like the sensible default: read in insertions, and ignore deletions. In the future, it would be good if options were available for either taking in deletions or keeping both in some scriptable format. --- tests/Tests/Readers/Docx.hs | 10 ++++++++++ tests/docx.track_changes_deletion.docx | Bin 0 -> 13350 bytes tests/docx.track_changes_deletion_only_ins.native | 1 + tests/docx.track_changes_insertion.docx | Bin 0 -> 12956 bytes tests/docx.track_changes_insertion_only_ins.native | 1 + 5 files changed, 12 insertions(+) create mode 100644 tests/docx.track_changes_deletion.docx create mode 100644 tests/docx.track_changes_deletion_only_ins.native create mode 100644 tests/docx.track_changes_insertion.docx create mode 100644 tests/docx.track_changes_insertion_only_ins.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 4d062bbc0..f34e123ed 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -122,5 +122,15 @@ tests = [ testGroup "inlines" "docx.codeblock.native" ] + , testGroup "track changes" + [ testCompare + "insert insertion (insertions only)" + "docx.track_changes_insertion.docx" + "docx.track_changes_insertion_only_ins.native" + , testCompare + "skip deletion (insertions only)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_only_ins.native" + ] ] diff --git a/tests/docx.track_changes_deletion.docx b/tests/docx.track_changes_deletion.docx new file mode 100644 index 000000000..5cfdbeed8 Binary files /dev/null and b/tests/docx.track_changes_deletion.docx differ diff --git a/tests/docx.track_changes_deletion_only_ins.native b/tests/docx.track_changes_deletion_only_ins.native new file mode 100644 index 000000000..205c67810 --- /dev/null +++ b/tests/docx.track_changes_deletion_only_ins.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_insertion.docx b/tests/docx.track_changes_insertion.docx new file mode 100644 index 000000000..fbdc9003e Binary files /dev/null and b/tests/docx.track_changes_insertion.docx differ diff --git a/tests/docx.track_changes_insertion_only_ins.native b/tests/docx.track_changes_insertion_only_ins.native new file mode 100644 index 000000000..ca2e46df0 --- /dev/null +++ b/tests/docx.track_changes_insertion_only_ins.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "two",Space,Str "exciting",Space,Str "insertions."]] -- cgit v1.2.3 From afdc0af779d245e781ec5de4cf7b3afcef47190b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 16:13:59 -0400 Subject: Track changes tests. --- tests/Tests/Readers/Docx.hs | 32 +++++++++++++++++++--- tests/docx.track_changes_deletion_accept.native | 1 + tests/docx.track_changes_deletion_all.native | 1 + tests/docx.track_changes_deletion_only_ins.native | 1 - tests/docx.track_changes_deletion_reject.native | 1 + tests/docx.track_changes_insertion_accept.native | 1 + tests/docx.track_changes_insertion_all.native | 1 + tests/docx.track_changes_insertion_only_ins.native | 1 - tests/docx.track_changes_insertion_reject.native | 1 + 9 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 tests/docx.track_changes_deletion_accept.native create mode 100644 tests/docx.track_changes_deletion_all.native delete mode 100644 tests/docx.track_changes_deletion_only_ins.native create mode 100644 tests/docx.track_changes_deletion_reject.native create mode 100644 tests/docx.track_changes_insertion_accept.native create mode 100644 tests/docx.track_changes_insertion_all.native delete mode 100644 tests/docx.track_changes_insertion_only_ins.native create mode 100644 tests/docx.track_changes_insertion_reject.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index f34e123ed..8c51217cf 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -124,13 +124,37 @@ tests = [ testGroup "inlines" ] , testGroup "track changes" [ testCompare - "insert insertion (insertions only)" + "insertion (default)" "docx.track_changes_insertion.docx" - "docx.track_changes_insertion_only_ins.native" + "docx.track_changes_insertion_accept.native" + , testCompareWithOpts def{readerTrackChanges=AcceptChanges} + "insert insertion (accept)" + "docx.track_changes_insertion.docx" + "docx.track_changes_insertion_accept.native" + , testCompareWithOpts def{readerTrackChanges=RejectChanges} + "remove insertion (reject)" + "docx.track_changes_insertion.docx" + "docx.track_changes_insertion_reject.native" , testCompare - "skip deletion (insertions only)" + "deletion (default)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_accept.native" + , testCompareWithOpts def{readerTrackChanges=AcceptChanges} + "remove deletion (accept)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_accept.native" + , testCompareWithOpts def{readerTrackChanges=RejectChanges} + "insert deletion (reject)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_reject.native" + , testCompareWithOpts def{readerTrackChanges=AllChanges} + "keep insertion (all)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_all.native" + , testCompareWithOpts def{readerTrackChanges=AllChanges} + "keep deletion (all)" "docx.track_changes_deletion.docx" - "docx.track_changes_deletion_only_ins.native" + "docx.track_changes_deletion_all.native" ] ] diff --git a/tests/docx.track_changes_deletion_accept.native b/tests/docx.track_changes_deletion_accept.native new file mode 100644 index 000000000..205c67810 --- /dev/null +++ b/tests/docx.track_changes_deletion_accept.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_deletion_all.native b/tests/docx.track_changes_deletion_all.native new file mode 100644 index 000000000..7f4ed2a90 --- /dev/null +++ b/tests/docx.track_changes_deletion_all.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Span ("",["deletion"],[("author","eng-dept"),("date","2014-06-25T10:42:00Z")]) [Str "n",Space,Str "excessively",Space,Str "modified"],Space,Str "deletion."]] diff --git a/tests/docx.track_changes_deletion_only_ins.native b/tests/docx.track_changes_deletion_only_ins.native deleted file mode 100644 index 205c67810..000000000 --- a/tests/docx.track_changes_deletion_only_ins.native +++ /dev/null @@ -1 +0,0 @@ -[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_deletion_reject.native b/tests/docx.track_changes_deletion_reject.native new file mode 100644 index 000000000..04283bee5 --- /dev/null +++ b/tests/docx.track_changes_deletion_reject.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "an",Space,Str "excessively",Space,Str "modified",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_insertion_accept.native b/tests/docx.track_changes_insertion_accept.native new file mode 100644 index 000000000..ca2e46df0 --- /dev/null +++ b/tests/docx.track_changes_insertion_accept.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "two",Space,Str "exciting",Space,Str "insertions."]] diff --git a/tests/docx.track_changes_insertion_all.native b/tests/docx.track_changes_insertion_all.native new file mode 100644 index 000000000..12664e425 --- /dev/null +++ b/tests/docx.track_changes_insertion_all.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Span ("",["insertion"],[("author","eng-dept"),("date","2014-06-25T10:40:00Z")]) [Str "two",Space,Str "exciting"],Space,Str "insertions."]] diff --git a/tests/docx.track_changes_insertion_only_ins.native b/tests/docx.track_changes_insertion_only_ins.native deleted file mode 100644 index ca2e46df0..000000000 --- a/tests/docx.track_changes_insertion_only_ins.native +++ /dev/null @@ -1 +0,0 @@ -[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "two",Space,Str "exciting",Space,Str "insertions."]] diff --git a/tests/docx.track_changes_insertion_reject.native b/tests/docx.track_changes_insertion_reject.native new file mode 100644 index 000000000..def000abd --- /dev/null +++ b/tests/docx.track_changes_insertion_reject.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "insertions."]] -- cgit v1.2.3 From b152145d6d4154a59f9ce36d5fc6f1c60aa0928c Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 03:57:38 -0400 Subject: Change test result to match new behavior. --- tests/docx.links.native | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/docx.links.native b/tests/docx.links.native index 98768de5a..fb95863a4 100644 --- a/tests/docx.links.native +++ b/tests/docx.links.native @@ -1,6 +1,6 @@ [Header 2 ("",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] ,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."] -,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#_A_section_for",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] +,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#a-section-for-testing-link-targets",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] ,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."] -,Header 2 ("_A_section_for",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"] +,Header 2 ("a-section-for-testing-link-targets",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"] ,Para [Str "A",Space,Str "bookmark",Space,Str "right",Space,Span ("my_bookmark",["anchor"],[]) [],Str "here"]] -- cgit v1.2.3 From 7fb74d88fb3a55a91340c00b5f1e3c006314769d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 09:35:17 -0400 Subject: Update docx tests to reflect new expected behavior Now doing auto ids for all headers, so tests should reflect that. --- tests/docx.block_quotes_parse_indent.native | 2 +- tests/docx.headers.native | 6 +++--- tests/docx.image_no_embed.native | 2 +- tests/docx.links.native | 2 +- tests/docx.lists.native | 2 +- tests/docx.notes.native | 2 +- tests/docx.tables.native | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) (limited to 'tests') diff --git a/tests/docx.block_quotes_parse_indent.native b/tests/docx.block_quotes_parse_indent.native index da1cef110..842b3606a 100644 --- a/tests/docx.block_quotes_parse_indent.native +++ b/tests/docx.block_quotes_parse_indent.native @@ -1,4 +1,4 @@ -[Header 2 ("",[],[]) [Str "Some",Space,Str "block",Space,Str "quotes,",Space,Str "in",Space,Str "different",Space,Str "ways"] +[Header 2 ("some-block-quotes-in-different-ways",[],[]) [Str "Some",Space,Str "block",Space,Str "quotes,",Space,Str "in",Space,Str "different",Space,Str "ways"] ,Para [Str "This",Space,Str "is",Space,Str "the",Space,Str "proper",Space,Str "way,",Space,Str "with",Space,Str "a",Space,Str "style"] ,BlockQuote [Para [Str "I",Space,Str "don\8217t",Space,Str "know",Space,Str "why",Space,Str "this",Space,Str "would",Space,Str "be",Space,Str "in",Space,Str "italics,",Space,Str "but",Space,Str "so",Space,Str "it",Space,Str "appears",Space,Str "to",Space,Str "be",Space,Str "on",Space,Str "my",Space,Str "screen."]] diff --git a/tests/docx.headers.native b/tests/docx.headers.native index e4d4a4781..03f967728 100644 --- a/tests/docx.headers.native +++ b/tests/docx.headers.native @@ -1,5 +1,5 @@ -[Header 1 ("",[],[]) [Str "A",Space,Str "Test",Space,Str "of",Space,Str "Headers"] -,Header 2 ("",[],[]) [Str "Second",Space,Str "Level"] +[Header 1 ("a-test-of-headers",[],[]) [Str "A",Space,Str "Test",Space,Str "of",Space,Str "Headers"] +,Header 2 ("second-level",[],[]) [Str "Second",Space,Str "Level"] ,Para [Str "Some",Space,Str "plain",Space,Str "text."] -,Header 3 ("",[],[]) [Str "Third",Space,Str "level"] +,Header 3 ("third-level",[],[]) [Str "Third",Space,Str "level"] ,Para [Str "Some",Space,Str "more",Space,Str "plain",Space,Str "text."]] diff --git a/tests/docx.image_no_embed.native b/tests/docx.image_no_embed.native index 18debf135..063958bc7 100644 --- a/tests/docx.image_no_embed.native +++ b/tests/docx.image_no_embed.native @@ -1,2 +1,2 @@ -[Header 2 ("",[],[]) [Str "An",Space,Str "image"] +[Header 2 ("an-image",[],[]) [Str "An",Space,Str "image"] ,Para [Image [] ("word/media/image1.jpeg","")]] diff --git a/tests/docx.links.native b/tests/docx.links.native index fb95863a4..c741fe875 100644 --- a/tests/docx.links.native +++ b/tests/docx.links.native @@ -1,4 +1,4 @@ -[Header 2 ("",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] +[Header 2 ("an-internal-link-and-an-external-link",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] ,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."] ,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#a-section-for-testing-link-targets",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] ,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."] diff --git a/tests/docx.lists.native b/tests/docx.lists.native index e46bc140b..af922b335 100644 --- a/tests/docx.lists.native +++ b/tests/docx.lists.native @@ -1,4 +1,4 @@ -[Header 2 ("",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"] +[Header 2 ("some-nested-lists",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"] ,OrderedList (1,Decimal,Period) [[Para [Str "one"]] ,[Para [Str "two"] diff --git a/tests/docx.notes.native b/tests/docx.notes.native index 5a94b1999..ec1b414b6 100644 --- a/tests/docx.notes.native +++ b/tests/docx.notes.native @@ -1,2 +1,2 @@ -[Header 2 ("",[],[]) [Str "A",Space,Str "footnote"] +[Header 2 ("a-footnote",[],[]) [Str "A",Space,Str "footnote"] ,Para [Str "Test",Space,Str "footnote.",Note [Para [Str "My",Space,Str "note."]],Space,Str "Test",Space,Str "endnote.",Note [Para [Str "This",Space,Str "is",Space,Str "an",Space,Str "endnote",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]]]] diff --git a/tests/docx.tables.native b/tests/docx.tables.native index 8dbaabda7..2564afcec 100644 --- a/tests/docx.tables.native +++ b/tests/docx.tables.native @@ -1,4 +1,4 @@ -[Header 2 ("",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] +[Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] ,Table [] [AlignDefault,AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0,0.0] [[Para [Str "Name"]] ,[Para [Str "Game"]] -- cgit v1.2.3