aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2006-11-26 07:01:37 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2006-11-26 07:01:37 +0000
commit986c1f9deec5acf01fbb594db00b2ec174296c77 (patch)
tree5af2a32672ed628cae694291b970d37c4e0cadb0 /src
parente417ceaa8d7064ad81adbeabceb7204483aac1a5 (diff)
downloadpandoc-986c1f9deec5acf01fbb594db00b2ec174296c77.tar.gz
Pandoc bug fixes:
+ LaTeX reader did not parse metadata correctly. Now the title, author, and date are parsed correctly, and everything else in the preamble is skipped. + Simplified parsing of LaTeX command arguments and options. The function commandArgs now returns a list of arguments OR options (in whatever order they appear). The brackets are included, and a new stripFirstAndLast function is provided to strip them off when needed. This fixes a problem in dealing with \newcommand, etc. + Added a "try" before "parser" in definition of notFollowedBy' combinator. Adjusted the code using this combinator accordingly. + Changed handling of code blocks. Previously, some readers allowed trailing newlines, while others stripped them. Now, all readers strip trailing newlines in code blocks; writers insert a newline at the end of code blocks as needed. + Changed test suite to reflect these changes. git-svn-id: https://pandoc.googlecode.com/svn/trunk@137 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs10
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs91
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs21
-rw-r--r--src/Text/Pandoc/Readers/RST.hs2
-rw-r--r--src/Text/Pandoc/Shared.hs5
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs2
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs2
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs2
-rw-r--r--src/Text/Pandoc/Writers/RST.hs2
-rw-r--r--src/Text/ParserCombinators/Pandoc.hs5
10 files changed, 72 insertions, 70 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 054d9eb72..f9a738e94 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -153,15 +153,15 @@ htmlScript = try (do
rest <- manyTill anyChar (htmlEndTag "script")
return (open ++ rest ++ "</script>"))
-rawHtmlBlock = do
- notFollowedBy (do {choice [htmlTag "/body", htmlTag "/html"]; return ' '})
+rawHtmlBlock = try (do
+ notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"])
body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition]
sp <- (many space)
state <- getState
if stateParseRaw state then
return (RawHtml (body ++ sp))
else
- return Null
+ return Null)
htmlComment = try (do
string "<!--"
@@ -284,12 +284,12 @@ preCodeBlock = try (do
result <- manyTill anyChar (htmlEndTag "code")
spaces
htmlEndTag "pre"
- return (CodeBlock (decodeEntities result)))
+ return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
bareCodeBlock = try (do
htmlTag "code"
result <- manyTill anyChar (htmlEndTag "code")
- return (CodeBlock (decodeEntities result)))
+ return (CodeBlock (stripTrailingNewlines (decodeEntities result))))
--
-- block quotes
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3bf3dfd23..bd91c5014 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -39,37 +39,33 @@ normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2"
normalizePunctuation :: String -> String
normalizePunctuation = normalizeDashes . normalizeQuotes
--- | Returns command option (between []) if any, or empty string.
-commandOpt = option "" (between (char '[') (char ']') (many1 (noneOf "]")))
-
-- | Returns text between brackets and its matching pair.
-bracketedText = try (do
- char '{'
- result <- many (choice [ try (do{ char '\\';
- b <- oneOf "{}";
- return (['\\', b])}), -- escaped bracket
- count 1 (noneOf "{}"),
- do {text <- bracketedText; return ("{" ++ text ++ "}")} ])
- char '}'
- return (concat result))
+bracketedText openB closeB = try (do
+ char openB
+ result <- many (choice [ oneOfStrings [ ['\\', openB], ['\\', closeB] ],
+ count 1 (noneOf [openB, closeB]),
+ bracketedText openB closeB ])
+ char closeB
+ return ([openB] ++ (concat result) ++ [closeB]))
--- | Parses list of arguments of LaTeX command.
-commandArgs = many bracketedText
+-- | Returns an option or argument of a LaTeX command
+optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ]
--- | Parses LaTeX command, returns (name, star, option, list of arguments).
+-- | Returns list of options and arguments of a LaTeX command
+commandArgs = many optOrArg
+
+-- | Parses LaTeX command, returns (name, star, list of options/arguments).
command = try (do
char '\\'
name <- many1 alphaNum
star <- option "" (string "*") -- some commands have starred versions
- opt <- commandOpt
args <- commandArgs
- return (name, star, opt, args))
+ return (name, star, args))
begin name = try (do
string "\\begin{"
string name
char '}'
- option "" commandOpt
option [] commandArgs
spaces
return name)
@@ -93,7 +89,6 @@ anyEnvironment = try (do
name <- many alphaNum
star <- option "" (string "*") -- some environments have starred variants
char '}'
- option "" commandOpt
option [] commandArgs
spaces
contents <- manyTill block (end (name ++ star))
@@ -103,15 +98,14 @@ anyEnvironment = try (do
-- parsing documents
--
--- | Skip everything up through \begin{document}
-skipLaTeXHeader = try (do
- manyTill anyChar (begin "document")
+-- | Process LaTeX preamble, extracting metadata
+processLaTeXPreamble = do
+ manyTill (choice [bibliographic, comment, unknownCommand]) (try (string "\\begin{document}"))
spaces
- return "")
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX = do
- option "" skipLaTeXHeader -- if parsing a fragment, this might not be present
+ option () processLaTeXPreamble -- preamble might not be present, if a fragment
blocks <- parseBlocks
spaces
option "" (string "\\end{document}") -- if parsing a fragment, this might not be present
@@ -121,7 +115,10 @@ parseLaTeX = do
let keyBlocks = stateKeyBlocks state
let noteBlocks = stateNoteBlocks state
let blocks' = filter (/= Null) blocks
- return (Pandoc (Meta [] [] "") (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
+ let title' = stateTitle state
+ let authors' = stateAuthors state
+ let date' = stateDate state
+ return (Pandoc (Meta title' authors' date') (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
--
-- parsing blocks
@@ -209,7 +206,7 @@ mathBlockWith start end = try (do
list = bulletList <|> orderedList <?> "list"
listItem = try (do
- ("item", _, _, _) <- command
+ ("item", _, _) <- command
spaces
state <- getState
let oldParserContext = stateParserContext state
@@ -265,7 +262,7 @@ authors = try (do
string "\\author{"
authors <- manyTill anyChar (char '}')
spaces
- let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\" "\n" authors
+ let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\\\\\" "\n" authors
updateState (\state -> state { stateAuthors = authors' })
return Null)
@@ -283,15 +280,15 @@ date = try (do
-- this forces items to be parsed in different blocks
itemBlock = try (do
- ("item", _, opt, _) <- command
+ ("item", _, args) <- command
state <- getState
if (stateParserContext state == ListItemState) then
fail "item should be handled by list block"
else
- if null opt then
+ if null args then
return Null
else
- return (Plain [Str opt]))
+ return (Plain [Str (stripFirstAndLast (head args))]))
--
-- raw LaTeX
@@ -312,15 +309,13 @@ rawLaTeXEnvironment = try (do
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
char '}'
- opt <- option "" commandOpt
args <- option [] commandArgs
- let optStr = if (null opt) then "" else "[" ++ opt ++ "]"
- let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args
+ let argStr = concat args
contents <- manyTill (choice [(many1 (noneOf "\\")),
(do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }),
string "\\"]) (end name')
spaces
- return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ optStr ++ argStr ++
+ return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++
(concat contents) ++ "\\end{" ++ name' ++ "}")]))
unknownEnvironment = try (do
@@ -335,17 +330,16 @@ unknownCommand = try (do
notFollowedBy' (string "\\end{itemize}")
notFollowedBy' (string "\\end{enumerate}")
notFollowedBy' (string "\\end{document}")
- (name, star, opt, args) <- command
+ (name, star, args) <- command
spaces
- let optStr = if null opt then "" else "[" ++ opt ++ "]"
- let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args
+ let argStr = concat args
state <- getState
if (name == "item") && ((stateParserContext state) == ListItemState) then
fail "should not be parsed as raw"
else
string ""
if stateParseRaw state then
- return (Plain [TeX ("\\" ++ name ++ star ++ optStr ++ argStr)])
+ return (Plain [TeX ("\\" ++ name ++ star ++ argStr)])
else
return (Plain [Str (joinWithSep " " args)]))
@@ -554,13 +548,19 @@ link = try (do
return (Link (normalizeSpaces label) ref))
image = try (do
- ("includegraphics", _, _, (src:lst)) <- command
- return (Image [Str "image"] (Src src "")))
+ ("includegraphics", _, args) <- command
+ let args' = filter (\arg -> (take 1 arg) /= "[") args
+ let src = if null args' then
+ Src "" ""
+ else
+ Src (stripFirstAndLast (head args')) ""
+ return (Image [Str "image"] src))
footnote = try (do
- ("footnote", _, _, (contents:[])) <- command
+ ("footnote", _, (contents:[])) <- command
+ let contents' = stripFirstAndLast contents
let blocks = case runParser parseBlocks defaultParserState "footnote" contents of
- Left err -> error $ "Input:\n" ++ show contents ++
+ Left err -> error $ "Input:\n" ++ show contents' ++
"\nError:\n" ++ show err
Right result -> result
state <- getState
@@ -574,12 +574,11 @@ footnote = try (do
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try (do
- (name, star, opt, args) <- command
- let optStr = if (null opt) then "" else "[" ++ opt ++ "]"
- let argStr = concatMap (\arg -> "{" ++ arg ++ "}") args
+ (name, star, args) <- command
+ let argStr = concat args
state <- getState
if ((name == "begin") || (name == "end") || (name == "item")) then
fail "not an inline command"
else
string ""
- return (TeX ("\\" ++ name ++ star ++ optStr ++ argStr)))
+ return (TeX ("\\" ++ name ++ star ++ argStr)))
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 60ac40fd7..df2f43e87 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -136,8 +136,7 @@ header = choice [ setextHeader, atxHeader ] <?> "header"
atxHeader = try (do
lead <- many1 (char atxHChar)
skipSpaces
- txt <- many1 (do {notFollowedBy' atxClosing; inline})
- atxClosing
+ txt <- manyTill inline atxClosing
return (Header (length lead) (normalizeSpaces txt)))
atxClosing = try (do
@@ -195,7 +194,7 @@ indentedBlock = try (do
codeBlock = do
result <- choice [indentedBlock, indentedLine]
option "" blanklines
- return (CodeBlock result)
+ return (CodeBlock (stripTrailingNewlines result))
--
-- note block
@@ -286,9 +285,9 @@ orderedListStart =
listLine start = try (do
notFollowedBy' start
notFollowedBy blankline
- notFollowedBy' (try (do{ indentSpaces;
- many (spaceChar);
- choice [bulletListStart, orderedListStart]}))
+ notFollowedBy' (do{ indentSpaces;
+ many (spaceChar);
+ choice [bulletListStart, orderedListStart]})
line <- manyTill anyChar newline
return (line ++ "\n"))
@@ -311,7 +310,7 @@ listContinuation start =
return ((concat result) ++ blanks))
listContinuationLine start = try (do
- notFollowedBy blankline
+ notFollowedBy' blankline
notFollowedBy' start
option "" indentSpaces
result <- manyTill anyChar newline
@@ -404,10 +403,10 @@ special = choice [ link, referenceLink, rawHtmlInline, autoLink,
escapedChar = escaped anyChar
-ltSign = do
+ltSign = try (do
notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
char '<'
- return (Str ['<'])
+ return (Str ['<']))
specialCharsMinusLt = filter (/= '<') specialChars
@@ -487,7 +486,7 @@ endline =
-- next line would allow block quotes without preceding blank line
-- Markdown.pl does allow this, but there's a chance of a wrapped
-- greater-than sign triggering a block quote by accident...
--- notFollowedBy (try (do { choice [emailBlockQuoteStart, string ",----"]; return ' ' }))
+-- notFollowedBy' (choice [emailBlockQuoteStart, string ",----"])
notFollowedBy blankline
-- parse potential list starts at beginning of line differently if in a list:
st <- getState
@@ -560,7 +559,7 @@ referenceLinkSingle = -- a link like [this]
autoLink = -- a link <like.this.com>
try (do
- notFollowedBy (do {anyHtmlBlockTag; return ' '})
+ notFollowedBy' anyHtmlBlockTag
src <- between (char autoLinkStart) (char autoLinkEnd)
(many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd])))
case (matchRegex emailAddress src) of
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 82e5ea303..69c7d9baa 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -292,7 +292,7 @@ indentedBlock variable = try (do
codeBlock = try (do
codeBlockStart
result <- indentedBlock False -- the False means we want one tab stop indent on each line
- return (CodeBlock result))
+ return (CodeBlock (stripTrailingNewlines result)))
--
-- raw html
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index c35e33d01..07afba00e 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -11,6 +11,7 @@ module Text.Pandoc.Shared (
removeLeadingTrailingSpace,
removeLeadingSpace,
removeTrailingSpace,
+ stripFirstAndLast,
-- * Parsing
readWith,
testStringWith,
@@ -227,6 +228,10 @@ removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || (x == '\t'))
removeTrailingSpace :: String -> String
removeTrailingSpace = reverse . removeLeadingSpace . reverse
+-- | Strip leading and trailing characters from string
+stripFirstAndLast str =
+ drop 1 $ take ((length str) - 1) str
+
-- | Split list of inlines into groups separated by a space.
splitBySpace :: [Inline] -> [[Inline]]
splitBySpace lst = filter (\a -> (/= Space) (head a))
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 9eecf2761..6b0696817 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -132,7 +132,7 @@ blockToHtml options (Note ref lst) =
"<div class=\"pandocNote\">\n" ++ contents' ++ "</div>\n"
blockToHtml options (Key _ _) = ""
blockToHtml options (CodeBlock str) = "<pre><code>" ++ (codeStringToHtml str) ++
- "</code></pre>\n"
+ "\n</code></pre>\n"
blockToHtml options (RawHtml str) = str
blockToHtml options (BulletList lst) =
let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 0674652cf..13eac31c2 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -107,7 +107,7 @@ blockToLaTeX notes (BlockQuote lst) =
"\\begin{quote}\n" ++ (concatMap (blockToLaTeX notes) lst) ++ "\\end{quote}\n"
blockToLaTeX notes (Note ref lst) = ""
blockToLaTeX notes (Key _ _) = ""
-blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ "\\end{verbatim}\n"
+blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ "\n\\end{verbatim}\n"
blockToLaTeX notes (RawHtml str) = ""
blockToLaTeX notes (BulletList lst) =
"\\begin{itemize}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{itemize}\n"
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index b73090f62..4ca131455 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -83,7 +83,7 @@ blockToMarkdown tabStop (Key txt (Src src tit)) =
text " " <> char '[' <> inlineListToMarkdown txt <> char ']' <> text ": " <> text src <>
(if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty)
blockToMarkdown tabStop (CodeBlock str) = (nest tabStop $ vcat $ map text (lines str)) <>
- (if (endsWith '\n' str) then empty else text "\n") <> text "\n"
+ text "\n"
blockToMarkdown tabStop (RawHtml str) = text str
blockToMarkdown tabStop (BulletList lst) =
vcat (map (bulletListItemToMarkdown tabStop) lst) <> text "\n"
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 37d895336..7d08d152d 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -91,7 +91,7 @@ blockToRST tabStop (Note ref blocks) =
blockToRST tabStop (Key txt (Src src tit)) =
(text "ERROR - KEY FOUND", empty) -- shouldn't have a key here
blockToRST tabStop (CodeBlock str) =
- (hang (text "::\n") tabStop (vcat $ map text (lines ('\n':(str ++ "\n")))), empty)
+ (hang (text "::\n") tabStop (vcat $ map text (lines ('\n':(str ++ "\n\n")))), empty)
blockToRST tabStop (RawHtml str) =
let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
(hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')), empty)
diff --git a/src/Text/ParserCombinators/Pandoc.hs b/src/Text/ParserCombinators/Pandoc.hs
index 9bf0a76f7..a78b776d3 100644
--- a/src/Text/ParserCombinators/Pandoc.hs
+++ b/src/Text/ParserCombinators/Pandoc.hs
@@ -82,9 +82,8 @@ many1Till p end = try (do
-- | A more general form of @notFollowedBy@. This one allows any type of parser to
-- be specified, and succeeds only if that parser fails. It does not consume any input.
notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
-notFollowedBy' parser = try (do{ c <- parser; unexpected (show c) }
- <|> return ()
- )
+notFollowedBy' parser = try (do { c <- try parser; unexpected (show c) }
+ <|> return ())
-- | The inverse of @notFollowedBy'@. Fails if parser will fail, otherwise
-- returns @()@ (but does not consume any input).