From 4a841bfc5464907adea4cdd655485565565b40ae Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Sat, 3 Nov 2007 22:14:03 +0000
Subject: Use template haskell to avoid the need for templates: + Added library
 Text.Pandoc.Include, with a template haskell   function $(includeStrFrom
 fname) to include a file as a string   constant at compile time. + This
 removes the need for the 'templates' directory or Makefile   target.  These
 have been removed. + The base source directory has been changed from src to .
 + A new 'data' directory has been added, containing the ASCIIMathML.js  
 script, writer headers, and S5 files. + The src/wrappers directory has been
 moved to 'wrappers'. + The Text.Pandoc.ASCIIMathML library is no longer
 needed, since   Text.Pandoc.Writers.HTML can use includeStrFrom to include
 the   ASCIIMathML.js code directly.  It has been removed.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1063 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 Text/Pandoc/Readers/HTML.hs     | 496 ++++++++++++++++++++++
 Text/Pandoc/Readers/LaTeX.hs    | 651 ++++++++++++++++++++++++++++
 Text/Pandoc/Readers/Markdown.hs | 909 ++++++++++++++++++++++++++++++++++++++++
 Text/Pandoc/Readers/RST.hs      | 640 ++++++++++++++++++++++++++++
 4 files changed, 2696 insertions(+)
 create mode 100644 Text/Pandoc/Readers/HTML.hs
 create mode 100644 Text/Pandoc/Readers/LaTeX.hs
 create mode 100644 Text/Pandoc/Readers/Markdown.hs
 create mode 100644 Text/Pandoc/Readers/RST.hs

(limited to 'Text/Pandoc/Readers')

diff --git a/Text/Pandoc/Readers/HTML.hs b/Text/Pandoc/Readers/HTML.hs
new file mode 100644
index 000000000..70a071152
--- /dev/null
+++ b/Text/Pandoc/Readers/HTML.hs
@@ -0,0 +1,496 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.HTML
+   Copyright   : Copyright (C) 2006-7 John MacFarlane
+   License     : GNU GPL, version 2 or above 
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha 
+   Portability : portable
+
+Conversion of HTML to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.HTML ( 
+                                 readHtml, 
+                                 rawHtmlInline, 
+                                 rawHtmlBlock, 
+                                 anyHtmlBlockTag, 
+                                 anyHtmlInlineTag,  
+                                 anyHtmlTag,
+                                 anyHtmlEndTag,
+                                 htmlEndTag,
+                                 extractTagType,
+                                 htmlBlockElement 
+                                ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared 
+import Text.Pandoc.CharacterReferences ( characterReference, 
+                                         decodeCharacterReferences )
+import Data.Maybe ( fromMaybe )
+import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf )
+import Data.Char ( toUpper, toLower, isAlphaNum )
+
+-- | Convert HTML-formatted string to 'Pandoc' document.
+readHtml :: ParserState   -- ^ Parser state
+         -> String        -- ^ String to parse
+         -> Pandoc
+readHtml = readWith parseHtml
+
+--
+-- Constants
+--
+
+eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
+                  "map", "area", "object", "script"]
+
+inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
+                  "br", "cite", "code", "dfn", "em", "font", "i", "img",
+                  "input", "kbd", "label", "q", "s", "samp", "select",
+                  "small", "span", "strike", "strong", "sub", "sup",
+                  "textarea", "tt", "u", "var"] ++ eitherBlockOrInline
+
+blockHtmlTags = ["address", "blockquote", "center", "dir", "div",
+                 "dl", "fieldset", "form", "h1", "h2", "h3", "h4",
+                 "h5", "h6", "hr", "isindex", "menu", "noframes",
+                 "noscript", "ol", "p", "pre", "table", "ul", "dd",
+                 "dt", "frameset", "li", "tbody", "td", "tfoot",
+                 "th", "thead", "tr"] ++ eitherBlockOrInline
+
+--
+-- HTML utility functions
+--
+
+-- | Read blocks until end tag.
+blocksTilEnd tag = do
+  blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
+  return $ filter (/= Null) blocks
+
+-- | Read inlines until end tag.
+inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
+
+-- | Parse blocks between open and close tag.
+blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
+
+-- | Parse inlines between open and close tag.
+inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
+
+-- | Extract type from a tag:  e.g. @br@ from @\<br\>@
+extractTagType :: String -> String
+extractTagType ('<':rest) = 
+  let isSpaceOrSlash c = c `elem` "/ \n\t" in
+  map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
+extractTagType _ = ""
+
+-- | Parse any HTML tag (opening or self-closing) and return text of tag
+anyHtmlTag = try $ do
+  char '<'
+  spaces
+  tag <- many1 alphaNum
+  attribs <- many htmlAttribute
+  spaces
+  ender <- option "" (string "/")
+  let ender' = if null ender then "" else " /"
+  spaces
+  char '>'
+  return $ "<" ++ tag ++ 
+           concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
+
+anyHtmlEndTag = try $ do
+  char '<'   
+  spaces
+  char '/'
+  spaces
+  tagType <- many1 alphaNum
+  spaces
+  char '>'
+  return $ "</" ++ tagType ++ ">"
+
+htmlTag :: String -> GenParser Char st (String, [(String, String)])
+htmlTag tag = try $ do
+  char '<'
+  spaces
+  stringAnyCase tag
+  attribs <- many htmlAttribute
+  spaces
+  optional (string "/")
+  spaces
+  char '>'
+  return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
+
+-- parses a quoted html attribute value
+quoted quoteChar = do
+  result <- between (char quoteChar) (char quoteChar) 
+                    (many (noneOf [quoteChar]))
+  return (result, [quoteChar])
+
+htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
+
+-- minimized boolean attribute
+htmlMinimizedAttribute = try $ do
+  many1 space
+  name <- many1 (choice [letter, oneOf ".-_:"])
+  return (name, name, name)
+
+htmlRegularAttribute = try $ do
+  many1 space
+  name <- many1 (choice [letter, oneOf ".-_:"])
+  spaces
+  char '='
+  spaces
+  (content, quoteStr) <- choice [ (quoted '\''), 
+                                  (quoted '"'), 
+                                  (do
+                                     a <- many (alphaNum <|> (oneOf "-._:"))
+                                     return (a,"")) ]
+  return (name, content,
+          (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
+
+-- | Parse an end tag of type 'tag'
+htmlEndTag tag = try $ do
+  char '<'   
+  spaces
+  char '/'
+  spaces
+  stringAnyCase tag
+  spaces
+  char '>'
+  return $ "</" ++ tag ++ ">"
+
+-- | Returns @True@ if the tag is (or can be) an inline tag.
+isInline tag = (extractTagType tag) `elem` inlineHtmlTags
+
+-- | Returns @True@ if the tag is (or can be) a block tag.
+isBlock tag = (extractTagType tag) `elem` blockHtmlTags 
+
+anyHtmlBlockTag = try $ do
+  tag <- anyHtmlTag <|> anyHtmlEndTag
+  if isBlock tag then return tag else fail "inline tag"
+
+anyHtmlInlineTag = try $ do
+  tag <- anyHtmlTag <|> anyHtmlEndTag
+  if isInline tag then return tag else fail "not an inline tag"
+
+-- | Parses material between script tags.
+-- Scripts must be treated differently, because they can contain '<>' etc.
+htmlScript = try $ do
+  open <- string "<script"
+  rest <- manyTill anyChar (htmlEndTag "script")
+  return $ open ++ rest ++ "</script>"
+
+htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
+
+rawHtmlBlock = try $ do
+  notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
+  body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag
+  sp <- many space
+  state <- getState
+  if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
+
+-- | Parses an HTML comment.
+htmlComment = try $ do
+  string "<!--"
+  comment <- manyTill anyChar (try (string "-->"))
+  return $ "<!--" ++ comment ++ "-->"
+
+--
+-- parsing documents
+--
+
+xmlDec = try $ do
+  string "<?"
+  rest <- manyTill anyChar (char '>')
+  return $ "<?" ++ rest ++ ">"
+
+definition = try $ do
+  string "<!"
+  rest <- manyTill anyChar (char '>')
+  return $ "<!" ++ rest ++ ">"
+
+nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
+  ((rawHtmlBlock >> return ' ') <|> anyChar)
+
+parseTitle = try $ do
+  (tag, _) <- htmlTag "title"
+  contents <- inlinesTilEnd tag
+  spaces
+  return contents
+
+-- parse header and return meta-information (for now, just title)
+parseHead = try $ do
+  htmlTag "head"
+  spaces
+  skipMany nonTitleNonHead
+  contents <- option [] parseTitle
+  skipMany nonTitleNonHead
+  htmlTag "/head"
+  return (contents, [], "")
+
+skipHtmlTag tag = optional (htmlTag tag)
+
+-- h1 class="title" representation of title in body
+bodyTitle = try $ do
+  (tag, attribs) <- htmlTag "h1"  
+  cl <- case (extractAttribute "class" attribs) of
+          Just "title" -> return ""
+          otherwise    -> fail "not title"
+  inlinesTilEnd "h1"
+
+parseHtml = do
+  sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
+  skipHtmlTag "html"
+  spaces
+  (title, authors, date) <- option ([], [], "") parseHead 
+  spaces
+  skipHtmlTag "body"
+  spaces
+  optional bodyTitle  -- skip title in body, because it's represented in meta
+  blocks <- parseBlocks
+  spaces
+  optional (htmlEndTag "body")
+  spaces
+  optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html>
+  eof
+  return $ Pandoc (Meta title authors date) blocks
+
+--
+-- parsing blocks
+--
+
+parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
+
+block = choice [ codeBlock
+               , header
+               , hrule
+               , list
+               , blockQuote
+               , para
+               , plain
+               , rawHtmlBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
+
+headerLevel n = try $ do
+    let level = "h" ++ show n
+    (tag, attribs) <- htmlTag level
+    contents <- inlinesTilEnd level
+    return $ Header n (normalizeSpaces contents)
+
+--
+-- hrule block
+--
+
+hrule = try  $ do
+  (tag, attribs) <- htmlTag "hr"
+  state <- getState
+  if not (null attribs) && stateParseRaw state
+     then unexpected "attributes in hr" -- parse as raw in this case
+     else return HorizontalRule
+
+--
+-- code blocks
+--
+
+-- Note:  HTML tags in code blocks (e.g. for syntax highlighting) are 
+-- skipped, because they are not portable to output formats other than HTML.
+codeBlock = try $ do
+    htmlTag "pre" 
+    result <- manyTill 
+              (many1 (satisfy (/= '<')) <|> 
+               ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
+              (htmlEndTag "pre")
+    let result' = concat result
+    -- drop leading newline if any
+    let result'' = if "\n" `isPrefixOf` result'
+                      then drop 1 result'
+                      else result'
+    -- drop trailing newline if any
+    let result''' = if "\n" `isSuffixOf` result''
+                       then init result''
+                       else result''
+    return $ CodeBlock $ decodeCharacterReferences result'''
+
+--
+-- block quotes
+--
+
+blockQuote = try $ htmlTag "blockquote" >> spaces >> 
+                   blocksTilEnd "blockquote" >>= (return . BlockQuote)
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+orderedList = try $ do
+  (_, attribs) <- htmlTag "ol"
+  (start, style) <- option (1, DefaultStyle) $
+                           do failIfStrict
+                              let sta = fromMaybe "1" $ 
+                                        lookup "start" attribs
+                              let sty = fromMaybe (fromMaybe "" $
+                                        lookup "style" attribs) $
+                                        lookup "class" attribs
+                              let sty' = case sty of
+                                          "lower-roman"  -> LowerRoman
+                                          "upper-roman"  -> UpperRoman
+                                          "lower-alpha"  -> LowerAlpha
+                                          "upper-alpha"  -> UpperAlpha
+                                          "decimal"      -> Decimal
+                                          _              -> DefaultStyle
+                              return (read sta, sty')
+  spaces
+  items <- sepEndBy1 (blocksIn "li") spaces
+  htmlEndTag "ol"
+  return $ OrderedList (start, style, DefaultDelim) items
+
+bulletList = try $ do
+  htmlTag "ul"
+  spaces
+  items <- sepEndBy1 (blocksIn "li") spaces
+  htmlEndTag "ul"
+  return $ BulletList items
+
+definitionList = try $ do
+  failIfStrict  -- def lists not part of standard markdown
+  tag <- htmlTag "dl"
+  spaces
+  items <- sepEndBy1 definitionListItem spaces
+  htmlEndTag "dl"
+  return $ DefinitionList items
+
+definitionListItem = try $ do
+  terms <- sepEndBy1 (inlinesIn "dt") spaces
+  defs <- sepEndBy1 (blocksIn "dd") spaces
+  let term = joinWithSep [LineBreak] terms
+  return (term, concat defs)
+
+--
+-- paragraph block
+--
+
+para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= 
+             return . Para . normalizeSpaces
+
+-- 
+-- plain block
+--
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+-- 
+-- inline
+--
+
+inline = choice [ charRef
+                , strong
+                , emph
+                , superscript
+                , subscript
+                , strikeout
+                , spanStrikeout
+                , code
+                , str
+                , linebreak
+                , whitespace
+                , link
+                , image
+                , rawHtmlInline
+                ] <?> "inline"
+
+code = try $ do 
+  htmlTag "code"
+  result <- manyTill anyChar (htmlEndTag "code")
+  -- remove internal line breaks, leading and trailing space,
+  -- and decode character references
+  return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ 
+                  joinWithSep " " $ lines result 
+
+rawHtmlInline = do
+  result <- htmlScript <|> htmlComment <|> anyHtmlInlineTag
+  state <- getState
+  if stateParseRaw state then return (HtmlInline result) else return (Str "")
+
+betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= 
+                        return . normalizeSpaces
+
+emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
+
+strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
+
+superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
+
+subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
+
+strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
+            return . Strikeout
+
+spanStrikeout = try $ do
+  failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
+  (tag, attributes) <- htmlTag "span" 
+  result <- case (extractAttribute "class" attributes) of
+              Just "strikeout" -> inlinesTilEnd "span"
+              _                -> fail "not a strikeout"
+  return $ Strikeout result
+
+whitespace = many1 space >> return Space
+
+-- hard line break
+linebreak = htmlTag "br" >> optional newline >> return LineBreak
+
+str = many1 (noneOf "<& \t\n") >>= return . Str
+
+--
+-- links and images
+--
+
+-- extract contents of attribute (attribute names are case-insensitive)
+extractAttribute name [] = Nothing
+extractAttribute name ((attrName, contents):rest) = 
+  let name'     = map toLower name 
+      attrName' = map toLower attrName
+  in  if attrName' == name'
+         then Just (decodeCharacterReferences contents)
+         else extractAttribute name rest
+
+link = try $ do
+  (tag, attributes) <- htmlTag "a"  
+  url <- case (extractAttribute "href" attributes) of
+           Just url -> return url
+           Nothing  -> fail "no href"
+  let title = fromMaybe "" $ extractAttribute "title" attributes
+  label <- inlinesTilEnd "a"
+  return $ Link (normalizeSpaces label) (url, title)
+
+image = try $ do
+  (tag, attributes) <- htmlTag "img" 
+  url <- case (extractAttribute "src" attributes) of
+           Just url -> return url
+           Nothing  -> fail "no src"
+  let title = fromMaybe "" $ extractAttribute "title" attributes
+  let alt = fromMaybe "" (extractAttribute "alt" attributes)
+  return $ Image [Str alt] (url, title)
+
diff --git a/Text/Pandoc/Readers/LaTeX.hs b/Text/Pandoc/Readers/LaTeX.hs
new file mode 100644
index 000000000..37cc2bfe4
--- /dev/null
+++ b/Text/Pandoc/Readers/LaTeX.hs
@@ -0,0 +1,651 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX
+   Copyright   : Copyright (C) 2006-7 John MacFarlane
+   License     : GNU GPL, version 2 or above 
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of LaTeX to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.LaTeX ( 
+                                  readLaTeX,
+                                  rawLaTeXInline,
+                                  rawLaTeXEnvironment
+                                 ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared 
+import Data.Maybe ( fromMaybe )
+import Data.Char ( chr )
+import Data.List ( isPrefixOf, isSuffixOf )
+
+-- | Parse LaTeX from string and return 'Pandoc' document.
+readLaTeX :: ParserState   -- ^ Parser state, including options for parser
+          -> String        -- ^ String to parse
+          -> Pandoc
+readLaTeX = readWith parseLaTeX
+
+-- characters with special meaning
+specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
+
+--
+-- utility functions
+--
+
+-- | Returns text between brackets and its matching pair.
+bracketedText openB closeB = do
+  result <- charsInBalanced' openB closeB
+  return $ [openB] ++ result ++ [closeB]
+
+-- | Returns an option or argument of a LaTeX command.
+optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
+
+-- | True if the string begins with '{'.
+isArg ('{':rest) = True
+isArg other = False
+
+-- | Returns list of options and arguments of a LaTeX command.
+commandArgs = many optOrArg
+
+-- | Parses LaTeX command, returns (name, star, list of options or arguments).
+command = do
+  char '\\'
+  name <- many1 letter
+  star <- option "" (string "*")  -- some commands have starred versions
+  args <- commandArgs
+  return (name, star, args)
+
+begin name = try $ do
+  string $ "\\begin{" ++ name ++ "}"
+  optional commandArgs
+  spaces
+  return name
+
+end name = try $ do
+  string $ "\\end{" ++ name ++ "}"
+  spaces
+  return name
+
+-- | Returns a list of block elements containing the contents of an
+-- environment.
+environment name = try $ begin name >> spaces >> manyTill block (end name)
+
+anyEnvironment =  try $ do
+  string "\\begin{"
+  name <- many letter
+  star <- option "" (string "*") -- some environments have starred variants
+  char '}'
+  optional commandArgs
+  spaces
+  contents <- manyTill block (end (name ++ star))
+  return $ BlockQuote contents
+
+--
+-- parsing documents
+--
+
+-- | Process LaTeX preamble, extracting metadata.
+processLaTeXPreamble = try $ manyTill 
+  (choice [bibliographic, comment, unknownCommand, nullBlock]) 
+  (try (string "\\begin{document}")) >> 
+  spaces
+
+-- | Parse LaTeX and return 'Pandoc'.
+parseLaTeX = do
+  optional processLaTeXPreamble -- preamble might not be present (fragment)
+  spaces
+  blocks <- parseBlocks
+  spaces
+  optional $ try (string "\\end{document}" >> many anyChar) 
+  -- might not be present (fragment)
+  spaces
+  eof
+  state <- getState
+  let blocks' = filter (/= Null) blocks
+  let title' = stateTitle state
+  let authors' = stateAuthors state
+  let date' = stateDate state
+  return $ Pandoc (Meta title' authors' date')  blocks'
+
+--
+-- parsing blocks
+--
+
+parseBlocks = spaces >> many block
+
+block = choice [ hrule
+               , codeBlock
+               , header
+               , list
+               , blockQuote
+               , mathBlock
+               , comment
+               , bibliographic
+               , para
+               , specialEnvironment
+               , itemBlock
+               , unknownEnvironment
+               , unknownCommand ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = try $ do
+  char '\\'
+  subs <- many (try (string "sub"))
+  string "section"
+  optional (char '*')
+  char '{'
+  title <- manyTill inline (char '}')
+  spaces
+  return $ Header (length subs + 1) (normalizeSpaces title)
+
+--
+-- hrule block
+--
+
+hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", 
+                       "\\newpage" ] >> spaces >> return HorizontalRule
+
+--
+-- code blocks
+--
+
+codeBlock = codeBlock1 <|> codeBlock2
+
+codeBlock1 = try $ do
+  string "\\begin{verbatim}"  -- don't use begin function because it 
+                              -- gobbles whitespace
+  optional blanklines         -- we want to gobble blank lines, but not 
+                              -- leading space
+  contents <- manyTill anyChar (try (string "\\end{verbatim}"))
+  spaces
+  return $ CodeBlock (stripTrailingNewlines contents)
+
+codeBlock2 = try $ do
+  string "\\begin{Verbatim}"  -- used by fancyvrb package
+  option "" blanklines
+  contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
+  spaces
+  return $ CodeBlock (stripTrailingNewlines contents)
+
+--
+-- block quotes
+--
+
+blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= 
+             return . BlockQuote
+
+--
+-- math block
+--
+
+mathBlock = mathBlockWith (begin "equation") (end "equation") <|> 
+            mathBlockWith (begin "displaymath") (end "displaymath") <|>
+            mathBlockWith (string "\\[") (string "\\]") <?> "math block"
+
+mathBlockWith start end = try $ do
+  start
+  spaces
+  result <- manyTill anyChar end
+  spaces
+  return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
+
+--
+-- list blocks
+--
+
+list = bulletList <|> orderedList <|> definitionList <?> "list"
+
+listItem = try $ do
+  ("item", _, args) <- command
+  spaces
+  state <- getState
+  let oldParserContext = stateParserContext state
+  updateState (\state -> state {stateParserContext = ListItemState})
+  blocks <- many block
+  updateState (\state -> state {stateParserContext = oldParserContext})
+  opt <- case args of
+           ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> 
+                       parseFromString (many inline) $ tail $ init x
+           _        -> return []
+  return (opt, blocks)
+
+orderedList = try $ do
+  string "\\begin{enumerate}"
+  (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+                              try $ do failIfStrict
+                                       char '['
+                                       res <- anyOrderedListMarker
+                                       char ']'
+                                       return res
+  spaces
+  option "" $ try $ do string "\\setlength{\\itemindent}"
+                       char '{'
+                       manyTill anyChar (char '}')
+  spaces
+  start <- option 1 $ try $ do failIfStrict
+                               string "\\setcounter{enum"
+                               many1 (oneOf "iv")
+                               string "}{"
+                               num <- many1 digit
+                               char '}' 
+                               spaces
+                               return $ (read num) + 1
+  items <- many listItem
+  end "enumerate"
+  spaces
+  return $ OrderedList (start, style, delim) $ map snd items
+
+bulletList = try $ do
+  begin "itemize"
+  spaces
+  items <- many listItem
+  end "itemize"
+  spaces
+  return (BulletList $ map snd items)
+
+definitionList = try $ do
+  begin "description"
+  spaces
+  items <- many listItem
+  end "description"
+  spaces
+  return (DefinitionList items)
+
+--
+-- paragraph block
+--
+
+para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
+
+--
+-- title authors date
+--
+
+bibliographic = choice [ maketitle, title, authors, date ]
+
+maketitle = try (string "\\maketitle") >> spaces >> return Null
+
+title = try $ do
+  string "\\title{"
+  tit <- manyTill inline (char '}')
+  spaces
+  updateState (\state -> state { stateTitle = tit })
+  return Null
+
+authors = try $ do
+  string "\\author{"
+  authors <- manyTill anyChar (char '}')
+  spaces
+  let authors' = map removeLeadingTrailingSpace $ lines $
+                 substitute "\\\\" "\n" authors
+  updateState (\state -> state { stateAuthors = authors' })
+  return Null
+
+date = try $ do
+  string "\\date{"
+  date' <- manyTill anyChar (char '}')
+  spaces
+  updateState (\state -> state { stateDate = date' })
+  return Null
+
+--
+-- item block
+-- for use in unknown environments that aren't being parsed as raw latex
+--
+
+-- this forces items to be parsed in different blocks
+itemBlock = try $ do
+  ("item", _, args) <- command
+  state <- getState
+  if (stateParserContext state == ListItemState)
+     then fail "item should be handled by list block"
+     else if null args 
+             then return Null
+             else return $ Plain [Str (stripFirstAndLast (head args))]
+
+--
+-- raw LaTeX 
+--
+
+specialEnvironment = do  -- these are always parsed as raw
+  lookAhead (choice (map (\name -> begin name)  ["tabular", "figure",
+              "tabbing", "eqnarry", "picture", "table", "verse", "theorem"]))
+  rawLaTeXEnvironment
+
+-- | Parse any LaTeX environment and return a Para block containing
+-- the whole literal environment as raw TeX.
+rawLaTeXEnvironment :: GenParser Char st Block
+rawLaTeXEnvironment = try $ do
+  string "\\begin{"
+  name <- many1 letter
+  star <- option "" (string "*") -- for starred variants
+  let name' = name ++ star
+  char '}'
+  args <- option [] commandArgs
+  let argStr = concat args
+  contents <- manyTill (choice [ (many1 (noneOf "\\")), 
+                                 (do 
+                                    (Para [TeX str]) <- rawLaTeXEnvironment
+                                    return str),
+                                 string "\\" ]) 
+                       (end name')
+  spaces
+  return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++ 
+                 concat contents ++ "\\end{" ++ name' ++ "}"]
+
+unknownEnvironment = try $ do
+  state <- getState
+  result <- if stateParseRaw state -- check whether we should include raw TeX 
+               then rawLaTeXEnvironment -- if so, get whole raw environment
+               else anyEnvironment      -- otherwise just the contents
+  return result
+
+unknownCommand = try $ do
+  notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", 
+                                     "document"]
+  (name, star, args) <- command
+  spaces
+  let argStr = concat args
+  state <- getState
+  if name == "item" && (stateParserContext state) == ListItemState
+     then fail "should not be parsed as raw"
+     else string ""
+  if stateParseRaw state
+     then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
+     else return $ Plain [Str (joinWithSep " " args)]
+
+-- latex comment
+comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
+
+-- 
+-- inline
+--
+
+inline =  choice [ str
+                 , endline
+                 , whitespace
+                 , quoted
+                 , apostrophe
+                 , spacer
+                 , strong
+                 , math
+                 , ellipses
+                 , emDash
+                 , enDash
+                 , hyphen
+                 , emph
+                 , strikeout
+                 , superscript
+                 , subscript
+                 , ref
+                 , lab
+                 , code
+                 , url
+                 , link
+                 , image
+                 , footnote
+                 , linebreak
+                 , accentedChar
+                 , specialChar
+                 , rawLaTeXInline
+                 , escapedChar
+                 , unescapedChar
+                 ] <?> "inline"
+
+accentedChar = normalAccentedChar <|> specialAccentedChar
+
+normalAccentedChar = try $ do
+  char '\\'
+  accent <- oneOf "'`^\"~"
+  character <- (try $ char '{' >> letter >>~ char '}') <|> letter
+  let table = fromMaybe [] $ lookup character accentTable 
+  let result = case lookup accent table of
+                 Just num  -> chr num
+                 Nothing   -> '?'
+  return $ Str [result]
+
+-- an association list of letters and association list of accents
+-- and decimal character numbers.
+accentTable = 
+  [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
+    ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
+    ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
+    ('N', [('~', 209)]),
+    ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
+    ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
+    ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
+    ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
+    ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
+    ('n', [('~', 241)]),
+    ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
+    ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
+
+specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
+                               oslash, pound, euro, copyright, sect ]
+
+ccedil = try $ do
+  char '\\'
+  letter <- oneOfStrings ["cc", "cC"]
+  let num = if letter == "cc" then 231 else 199
+  return $ Str [chr num]
+
+aring = try $ do
+  char '\\'
+  letter <- oneOfStrings ["aa", "AA"]
+  let num = if letter == "aa" then 229 else 197
+  return $ Str [chr num]
+
+iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> 
+       return (Str [chr 239])
+
+icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
+        return (Str [chr 238])
+
+szlig = try (string "\\ss") >> return (Str [chr 223])
+
+oslash = try $ do
+  char '\\'
+  letter <- choice [char 'o', char 'O']
+  let num = if letter == 'o' then 248 else 216
+  return $ Str [chr num]
+
+aelig = try $ do
+  char '\\'
+  letter <- oneOfStrings ["ae", "AE"]
+  let num = if letter == "ae" then 230 else 198
+  return $ Str [chr num]
+
+pound = try (string "\\pounds") >> return (Str [chr 163])
+
+euro = try (string "\\euro") >> return (Str [chr 8364])
+
+copyright = try (string "\\copyright") >> return (Str [chr 169])
+
+sect = try (string "\\S") >> return (Str [chr 167])
+
+escapedChar = do
+  result <- escaped (oneOf " $%&_#{}\n")
+  return $ if result == Str "\n" then Str " " else result
+
+-- ignore standalone, nonescaped special characters
+unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "")
+
+specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
+
+backslash = try (string "\\textbackslash") >> return (Str "\\")
+
+tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
+
+caret = try (string "\\^{}") >> return (Str "^")
+
+bar = try (string "\\textbar") >> return (Str "\\")
+
+lt = try (string "\\textless") >> return (Str "<")
+
+gt = try (string "\\textgreater") >> return (Str ">")
+
+doubleQuote = char '"' >> return (Str "\"")
+
+code = code1 <|> code2
+
+code1 = try $ do 
+  string "\\verb"
+  marker <- anyChar
+  result <- manyTill anyChar (char marker)
+  return $ Code $ removeLeadingTrailingSpace result
+
+code2 = try $ do
+  string "\\texttt{"
+  result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
+  return $ Code result
+
+emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
+             manyTill inline (char '}') >>= return . Emph
+
+strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
+                  return . Strikeout
+
+superscript = try $ string "\\textsuperscript{" >> 
+                    manyTill inline (char '}') >>= return . Superscript
+
+-- note: \textsubscript isn't a standard latex command, but we use
+-- a defined version in pandoc.
+subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
+                  return . Subscript
+
+apostrophe = char '\'' >> return Apostrophe
+
+quoted = doubleQuoted <|> singleQuoted
+
+singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
+               return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
+               return . Quoted DoubleQuote . normalizeSpaces
+
+singleQuoteStart = char '`'
+
+singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
+
+doubleQuoteStart = string "``"
+
+doubleQuoteEnd = try $ string "''"
+
+ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
+                 return Ellipses
+
+enDash = try (string "--") >> return EnDash
+
+emDash = try (string "---") >> return EmDash
+
+hyphen = char '-' >> return (Str "-")
+
+lab = try $ do
+  string "\\label{"
+  result <- manyTill anyChar (char '}')
+  return $ Str $ "(" ++ result ++ ")"
+
+ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
+
+strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
+         return . Strong
+
+whitespace = many1 (oneOf "~ \t") >> return Space
+
+-- hard line break
+linebreak = try (string "\\\\") >> return LineBreak
+
+spacer = try (string "\\,") >> return (Str "")
+
+str = many1 (noneOf specialChars) >>= return . Str
+
+-- endline internal to paragraph
+endline = try $ newline >> notFollowedBy blankline >> return Space
+
+-- math
+math = math1 <|> math2 <?> "math"
+
+math1 = try $ do
+  char '$'
+  result <- many (noneOf "$")
+  char '$'
+  return $ TeX ("$" ++ result ++ "$")
+
+math2 = try $ do
+  string "\\("
+  result <- many (noneOf "$")
+  string "\\)"
+  return $ TeX ("$" ++ result ++ "$")
+
+--
+-- links and images
+--
+
+url = try $ do
+  string "\\url"
+  url <- charsInBalanced '{' '}'
+  return $ Link [Code url] (url, "")
+
+link = try $ do
+  string "\\href{"
+  url <- manyTill anyChar (char '}')
+  char '{'
+  label <- manyTill inline (char '}') 
+  return $ Link (normalizeSpaces label) (url, "")
+
+image = try $ do
+  ("includegraphics", _, args) <- command
+  let args' = filter isArg args -- filter out options
+  let src = if null args' then
+              ("", "")
+            else
+              (stripFirstAndLast (head args'), "")
+  return $ Image [Str "image"] src
+
+footnote = try $ do
+  (name, _, (contents:[])) <- command
+  if ((name == "footnote") || (name == "thanks"))
+     then string ""
+     else fail "not a footnote or thanks command"
+  let contents' = stripFirstAndLast contents
+  -- parse the extracted block, which may contain various block elements:
+  rest <- getInput
+  setInput $ contents'
+  blocks <- parseBlocks
+  setInput rest
+  return $ Note blocks
+
+-- | Parse any LaTeX command and return it in a raw TeX inline element.
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = try $ do
+  (name, star, args) <- command
+  state <- getState
+  if ((name == "begin") || (name == "end") || (name == "item"))
+     then fail "not an inline command" 
+     else string ""
+  return $ TeX ("\\" ++ name ++ star ++ concat args)
+
diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs
new file mode 100644
index 000000000..df84c0ac7
--- /dev/null
+++ b/Text/Pandoc/Readers/Markdown.hs
@@ -0,0 +1,909 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Markdown
+   Copyright   : Copyright (C) 2006-7 John MacFarlane
+   License     : GNU GPL, version 2 or above 
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of markdown-formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Markdown ( 
+                                     readMarkdown 
+                                    ) where
+
+import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy )
+import Data.Ord ( comparing )
+import Data.Char ( isAlphaNum )
+import Network.URI ( isURI )
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared 
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
+import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, 
+                                  anyHtmlInlineTag, anyHtmlTag,
+                                  anyHtmlEndTag, htmlEndTag, extractTagType,
+                                  htmlBlockElement )
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.ParserCombinators.Parsec
+
+-- | Read markdown from an input string and return a Pandoc document.
+readMarkdown :: ParserState -> String -> Pandoc
+readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+--
+
+spaceChars = " \t"
+bulletListMarkers = "*+-"
+hruleChars = "*-_"
+setextHChars = "=-"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = "\\[]*_~`<>$!^-.&'\""
+
+--
+-- auxiliary functions
+--
+
+indentSpaces = try $ do
+  state <- getState
+  let tabStop = stateTabStop state
+  try (count tabStop (char ' ')) <|> 
+    (many (char ' ') >> string "\t") <?> "indentation"
+
+nonindentSpaces = do
+  state <- getState
+  let tabStop = stateTabStop state
+  sps <- many (char ' ')
+  if length sps < tabStop 
+     then return sps
+     else unexpected "indented line"
+
+-- | Fail unless we're at beginning of a line.
+failUnlessBeginningOfLine = do
+  pos <- getPosition
+  if sourceColumn pos == 1 then return () else fail "not beginning of line"
+
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart = do
+  state <- getState
+  if stateSmart state then return () else fail "Smart typography feature"
+
+-- | Parse an inline Str element with a given content.
+inlineString str = try $ do 
+  (Str res) <- inline 
+  if res == str then return res else fail $ "unexpected Str content"
+
+-- | Parse a sequence of inline elements between a string
+--  @opener@ and a string @closer@, including inlines
+--  between balanced pairs of @opener@ and a @closer@.
+inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
+inlinesInBalanced opener closer = try $ do
+  string opener
+  result <- manyTill ( (do lookAhead (inlineString opener)
+                           -- because it might be a link...
+                           bal <- inlinesInBalanced opener closer 
+                           return $ [Str opener] ++ bal ++ [Str closer])
+                       <|> (count 1 inline)) 
+                     (try (string closer))
+  return $ concat result
+
+--
+-- document structure
+--
+
+titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
+
+authorsLine = try $ do 
+  char '%'
+  skipSpaces
+  authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
+  newline
+  return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
+
+dateLine = try $ do
+  char '%'
+  skipSpaces
+  date <- many (noneOf "\n")
+  newline
+  return $ decodeCharacterReferences $ removeTrailingSpace date
+
+titleBlock = try $ do
+  failIfStrict
+  title <- option [] titleLine
+  author <- option [] authorsLine
+  date <- option "" dateLine
+  optional blanklines
+  return (title, author, date)
+
+parseMarkdown = do
+  -- markdown allows raw HTML
+  updateState (\state -> state { stateParseRaw = True })
+  startPos <- getPosition
+  -- go through once just to get list of reference keys
+  -- docMinusKeys is the raw document with blanks where the keys were...
+  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= 
+                  return . concat
+  setInput docMinusKeys
+  setPosition startPos
+  st <- getState
+  -- go through again for notes unless strict...
+  if stateStrict st
+     then return ()
+     else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= 
+                              return . concat
+             st <- getState
+             let reversedNotes = stateNotes st
+             updateState $ \st -> st { stateNotes = reverse reversedNotes }
+             setInput docMinusNotes
+             setPosition startPos
+  -- now parse it for real...
+  (title, author, date) <- option ([],[],"") titleBlock
+  blocks <- parseBlocks 
+  return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
+
+-- 
+-- initial pass for references and notes
+--
+
+referenceKey = try $ do
+  startPos <- getPosition
+  nonindentSpaces
+  label <- reference
+  char ':'
+  skipSpaces
+  optional (char '<')
+  src <- many (noneOf "> \n\t")
+  optional (char '>')
+  tit <- option "" referenceTitle
+  blanklines
+  endPos <- getPosition
+  let newkey = (label, (removeTrailingSpace src,  tit))
+  st <- getState
+  let oldkeys = stateKeys st
+  updateState $ \st -> st { stateKeys = newkey : oldkeys }
+  -- return blanks so line count isn't affected
+  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+referenceTitle = try $ do 
+  (many1 spaceChar >> option '\n' newline) <|> newline
+  skipSpaces
+  tit <-    (charsInBalanced '(' ')' >>= return . unwords . words)
+        <|> do delim <- char '\'' <|> char '"'
+               manyTill anyChar (try (char delim >> skipSpaces >>
+                                      notFollowedBy (noneOf ")\n")))
+  return $ decodeCharacterReferences tit
+
+noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
+
+rawLine = do
+  notFollowedBy blankline
+  notFollowedBy' noteMarker
+  contents <- many1 nonEndline
+  end <- option "" (newline >> optional indentSpaces >> return "\n") 
+  return $ contents ++ end
+
+rawLines = many1 rawLine >>= return . concat
+
+noteBlock = try $ do
+  startPos <- getPosition
+  ref <- noteMarker
+  char ':'
+  optional blankline
+  optional indentSpaces
+  raw <- sepBy rawLines (try (blankline >> indentSpaces))
+  optional blanklines
+  endPos <- getPosition
+  -- parse the extracted text, which may contain various block elements:
+  contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
+  let newnote = (ref, contents)
+  st <- getState
+  let oldnotes = stateNotes st
+  updateState $ \st -> st { stateNotes = newnote : oldnotes }
+  -- return blanks so line count isn't affected
+  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+--
+-- parsing blocks
+--
+
+parseBlocks = manyTill block eof
+
+block = choice [ header 
+               , table
+               , codeBlock
+               , hrule
+               , list
+               , blockQuote
+               , htmlBlock
+               , rawLaTeXEnvironment'
+               , para
+               , plain
+               , nullBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = atxHeader <|> setextHeader <?> "header"
+
+atxHeader = try $ do
+  level <- many1 (char '#') >>= return . length
+  notFollowedBy (char '.' <|> char ')') -- this would be a list
+  skipSpaces
+  text <- manyTill inline atxClosing >>= return . normalizeSpaces
+  return $ Header level text
+
+atxClosing = try $ skipMany (char '#') >> blanklines
+
+setextHeader = try $ do
+  -- first, see if this block has any chance of being a setextHeader:
+  lookAhead (anyLine >> oneOf setextHChars)
+  text <- many1Till inline newline >>= return . normalizeSpaces
+  level <- choice $ zipWith 
+           (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev)
+           setextHChars [1..(length setextHChars)]
+  return $ Header level text
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+  skipSpaces
+  start <- oneOf hruleChars
+  count 2 (skipSpaces >> char start)
+  skipMany (skipSpaces >> char start)
+  newline
+  optional blanklines
+  return HorizontalRule
+
+--
+-- code blocks
+--
+
+indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
+
+codeBlock = do
+  contents <- many1 (indentedLine <|> 
+                     try (do b <- blanklines
+                             l <- indentedLine
+                             return $ b ++ l))
+  optional blanklines
+  return $ CodeBlock $ stripTrailingNewlines $ concat contents
+
+--
+-- block quotes
+--
+
+emacsBoxQuote = try $ do
+  failIfStrict
+  string ",----"
+  manyTill anyChar newline
+  raw <- manyTill 
+         (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
+         (try (string "`----"))
+  blanklines
+  return raw
+
+emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
+
+emailBlockQuote = try $ do
+  emailBlockQuoteStart
+  raw <- sepBy (many (nonEndline <|> 
+                      (try (endline >> notFollowedBy emailBlockQuoteStart >>
+                       return '\n'))))
+               (try (newline >> emailBlockQuoteStart))
+  newline <|> (eof >> return '\n')
+  optional blanklines
+  return raw
+
+blockQuote = do 
+  raw <- emailBlockQuote <|> emacsBoxQuote
+  -- parse the extracted block, which may contain various block elements:
+  contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
+  return $ BlockQuote contents
+ 
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+bulletListStart = try $ do
+  optional newline -- if preceded by a Plain block in a list context
+  nonindentSpaces
+  notFollowedBy' hrule     -- because hrules start out just like lists
+  oneOf bulletListMarkers
+  spaceChar
+  skipSpaces
+
+anyOrderedListStart = try $ do
+  optional newline -- if preceded by a Plain block in a list context
+  nonindentSpaces
+  notFollowedBy $ string "p." >> spaceChar >> digit  -- page number
+  state <- getState
+  if stateStrict state
+     then do many1 digit
+             char '.'
+             spaceChar
+             return (1, DefaultStyle, DefaultDelim)
+     else anyOrderedListMarker >>~ spaceChar
+
+orderedListStart style delim = try $ do
+  optional newline -- if preceded by a Plain block in a list context
+  nonindentSpaces
+  state <- getState
+  num <- if stateStrict state
+            then do many1 digit
+                    char '.'
+                    return 1
+            else orderedListMarker style delim 
+  if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
+     num `elem` [1, 5, 10, 50, 100, 500, 1000]))
+     then char '\t' <|> (spaceChar >> spaceChar)
+     else spaceChar
+  skipSpaces
+
+-- parse a line of a list item (start = parser for beginning of list item)
+listLine start = try $ do
+  notFollowedBy' start
+  notFollowedBy blankline
+  notFollowedBy' (do indentSpaces
+                     many (spaceChar)
+                     bulletListStart <|> (anyOrderedListStart >> return ()))
+  line <- manyTill anyChar newline
+  return $ line ++ "\n"
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start = try $ do
+  start
+  result <- many1 (listLine start)
+  blanks <- many blankline
+  return $ concat result ++ blanks
+
+-- continuation of a list item - indented and separated by blankline 
+-- or (in compact lists) endline.
+-- note: nested lists are parsed as continuations
+listContinuation start = try $ do
+  lookAhead indentSpaces
+  result <- many1 (listContinuationLine start)
+  blanks <- many blankline
+  return $ concat result ++ blanks
+
+listContinuationLine start = try $ do
+  notFollowedBy blankline
+  notFollowedBy' start
+  optional indentSpaces
+  result <- manyTill anyChar newline
+  return $ result ++ "\n"
+
+listItem start = try $ do 
+  first <- rawListItem start
+  continuations <- many (listContinuation start)
+  -- parsing with ListItemState forces markers at beginning of lines to
+  -- count as list item markers, even if not separated by blank space.
+  -- see definition of "endline"
+  state <- getState
+  let oldContext = stateParserContext state
+  setState $ state {stateParserContext = ListItemState}
+  -- parse the extracted block, which may contain various block elements:
+  let raw = concat (first:continuations)
+  contents <- parseFromString parseBlocks raw
+  updateState (\st -> st {stateParserContext = oldContext})
+  return contents
+
+orderedList = try $ do
+  (start, style, delim) <- lookAhead anyOrderedListStart
+  items <- many1 (listItem (orderedListStart style delim))
+  return $ OrderedList (start, style, delim) $ compactify items
+
+bulletList = many1 (listItem bulletListStart) >>= 
+             return . BulletList . compactify
+
+-- definition lists
+
+definitionListItem = try $ do
+  notFollowedBy blankline
+  notFollowedBy' indentSpaces
+  -- first, see if this has any chance of being a definition list:
+  lookAhead (anyLine >> char ':')
+  term <- manyTill inline newline
+  raw <- many1 defRawBlock
+  state <- getState
+  let oldContext = stateParserContext state
+  -- parse the extracted block, which may contain various block elements:
+  contents <- parseFromString parseBlocks $ concat raw
+  updateState (\st -> st {stateParserContext = oldContext})
+  return ((normalizeSpaces term), contents)
+
+defRawBlock = try $ do
+  char ':'
+  state <- getState
+  let tabStop = stateTabStop state
+  try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
+  firstline <- anyLine
+  rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
+  trailing <- option "" blanklines
+  return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
+
+definitionList = do
+  failIfStrict
+  items <- many1 definitionListItem
+  let (terms, defs) = unzip items
+  let defs' = compactify defs
+  let items' = zip terms defs'
+  return $ DefinitionList items'
+
+--
+-- paragraph block
+--
+
+para = try $ do 
+  result <- many1 inline
+  newline
+  blanklines <|> do st <- getState
+                    if stateStrict st
+                       then lookAhead (blockQuote <|> header) >> return ""
+                       else lookAhead emacsBoxQuote >> return ""
+  return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces 
+
+-- 
+-- raw html
+--
+
+htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
+
+htmlBlock = do
+  st <- getState
+  if stateStrict st
+    then try $ do failUnlessBeginningOfLine
+                  first <- htmlElement
+                  finalSpace <- many (oneOf spaceChars)
+                  finalNewlines <- many newline
+                  return $ RawHtml $ first ++ finalSpace ++ finalNewlines
+    else rawHtmlBlocks
+
+-- True if tag is self-closing
+isSelfClosing tag = 
+  isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
+
+strictHtmlBlock = try $ do
+  tag <- anyHtmlBlockTag 
+  let tag' = extractTagType tag
+  if isSelfClosing tag || tag' == "hr" 
+     then return tag
+     else do contents <- many (notFollowedBy' (htmlEndTag tag') >> 
+                               (htmlElement <|> (count 1 anyChar)))
+             end <- htmlEndTag tag'
+             return $ tag ++ concat contents ++ end
+
+rawHtmlBlocks = do
+  htmlBlocks <- many1 rawHtmlBlock    
+  let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
+  let combined' = if not (null combined) && last combined == '\n'
+                     then init combined  -- strip extra newline 
+                     else combined 
+  return $ RawHtml combined'
+
+--
+-- LaTeX
+--
+
+rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
+
+--
+-- Tables
+-- 
+
+-- Parse a dashed line with optional trailing spaces; return its length
+-- and the length including trailing space.
+dashedLine ch = do
+  dashes <- many1 (char ch)
+  sp     <- many spaceChar
+  return $ (length dashes, length $ dashes ++ sp)
+
+-- Parse a table header with dashed lines of '-' preceded by 
+-- one line of text.
+simpleTableHeader = try $ do
+  rawContent  <- anyLine
+  initSp      <- nonindentSpaces
+  dashes      <- many1 (dashedLine '-')
+  newline
+  let (lengths, lines) = unzip dashes
+  let indices  = scanl (+) (length initSp) lines
+  let rawHeads = tail $ splitByIndices (init indices) rawContent
+  let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+  return (rawHeads, aligns, indices)
+
+-- Parse a table footer - dashed lines followed by blank line.
+tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
+
+-- Parse a table separator - dashed line.
+tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
+
+-- Parse a raw line and split it into chunks by indices.
+rawTableLine indices = do
+  notFollowedBy' (blanklines <|> tableFooter)
+  line <- many1Till anyChar newline
+  return $ map removeLeadingTrailingSpace $ tail $ 
+           splitByIndices (init indices) line
+
+-- Parse a table line and return a list of lists of blocks (columns).
+tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
+
+-- Parse a multiline table row and return a list of blocks (columns).
+multilineRow indices = do
+  colLines <- many1 (rawTableLine indices)
+  optional blanklines
+  let cols = map unlines $ transpose colLines
+  mapM (parseFromString (many plain)) cols
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int     -- Number of columns on terminal
+                  -> [Int]   -- Indices
+                  -> [Float] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []  
+widthsFromIndices numColumns indices = 
+  let lengths = zipWith (-) indices (0:indices)
+      totLength = sum lengths
+      quotient = if totLength > numColumns
+                   then fromIntegral totLength
+                   else fromIntegral numColumns
+      fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+  tail fracs
+
+-- Parses a table caption:  inlines beginning with 'Table:'
+-- and followed by blank lines.
+tableCaption = try $ do
+  nonindentSpaces
+  string "Table:"
+  result <- many1 inline
+  blanklines
+  return $ normalizeSpaces result
+
+-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
+tableWith headerParser lineParser footerParser = try $ do
+    (rawHeads, aligns, indices) <- headerParser
+    lines <- many1Till (lineParser indices) footerParser
+    caption <- option [] tableCaption
+    heads <- mapM (parseFromString (many plain)) rawHeads
+    state <- getState
+    let numColumns = stateColumns state
+    let widths = widthsFromIndices numColumns indices
+    return $ Table caption aligns widths heads lines
+
+-- Parse a simple table with '---' header and one line per row.
+simpleTable = tableWith simpleTableHeader tableLine blanklines
+
+-- Parse a multiline table:  starts with row of '-' on top, then header
+-- (which may be multiline), then the rows,
+-- which may be multiline, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+
+multilineTableHeader = try $ do
+  tableSep 
+  rawContent  <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+  initSp      <- nonindentSpaces
+  dashes      <- many1 (dashedLine '-')
+  newline
+  let (lengths, lines) = unzip dashes
+  let indices  = scanl (+) (length initSp) lines
+  let rawHeadsList = transpose $ map 
+                     (\ln -> tail $ splitByIndices (init indices) ln)
+                     rawContent
+  let rawHeads = map (joinWithSep " ") rawHeadsList
+  let aligns   = zipWith alignType rawHeadsList lengths
+  return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String] -> Int -> Alignment
+alignType []  len = AlignDefault
+alignType strLst len =
+  let str        = head $ sortBy (comparing length) $ 
+                          map removeTrailingSpace strLst
+      leftSpace  = if null str then False else (str !! 0) `elem` " \t"
+      rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
+  in  case (leftSpace, rightSpace) of
+        (True,  False)   -> AlignRight
+        (False, True)    -> AlignLeft
+        (True,  True)    -> AlignCenter
+        (False, False)   -> AlignDefault
+
+table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
+
+-- 
+-- inline
+--
+
+inline = choice [ str
+                , smartPunctuation
+                , whitespace
+                , endline
+                , code
+                , charRef
+                , strong
+                , emph
+                , note
+                , inlineNote
+                , link
+                , image
+                , math
+                , strikeout
+                , superscript
+                , subscript
+                , autoLink
+                , rawHtmlInline'
+                , rawLaTeXInline'
+                , escapedChar
+                , symbol
+                , ltSign ] <?> "inline"
+
+escapedChar = do
+  char '\\'
+  state <- getState
+  result <- option '\\' $ if stateStrict state 
+                             then oneOf "\\`*_{}[]()>#+-.!~"
+                             else satisfy (not . isAlphaNum)
+  return $ Str [result]
+
+ltSign = do
+  st <- getState
+  if stateStrict st
+     then char '<'
+     else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
+  return $ Str ['<']
+
+specialCharsMinusLt = filter (/= '<') specialChars
+
+symbol = do 
+  result <- oneOf specialCharsMinusLt
+  return $ Str [result]
+
+-- parses inline code, between n `s and n `s
+code = try $ do 
+  starts <- many1 (char '`')
+  skipSpaces
+  result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+                       (char '\n' >> return " ")) 
+                      (try (skipSpaces >> count (length starts) (char '`') >> 
+                      notFollowedBy (char '`')))
+  return $ Code $ removeLeadingTrailingSpace $ concat result
+
+mathWord = many1 ((noneOf " \t\n\\$") <|>
+                  (try (char '\\') >>~ notFollowedBy (char '$')))
+
+math = try $ do
+  failIfStrict
+  char '$'
+  notFollowedBy space
+  words <- sepBy1 mathWord (many1 space)
+  char '$'
+  return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
+
+emph = ((enclosed (char '*') (char '*') inline) <|>
+        (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= 
+        return . Emph . normalizeSpaces
+
+strong = ((enclosed (string "**") (try $ string "**") inline) <|> 
+          (enclosed (string "__") (try $ string "__") inline)) >>=
+         return . Strong . normalizeSpaces
+
+strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
+            return . Strikeout . normalizeSpaces
+
+superscript = failIfStrict >> enclosed (char '^') (char '^') 
+              (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+              return . Superscript
+
+subscript = failIfStrict >> enclosed (char '~') (char '~')
+            (notFollowedBy' whitespace >> inline) >>=  -- may not contain Space
+            return . Subscript 
+
+smartPunctuation = failUnlessSmart >> 
+                   choice [ quoted, apostrophe, dash, ellipses ]
+
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+
+quoted = doubleQuoted <|> singleQuoted 
+
+withQuoteContext context parser = do
+  oldState <- getState
+  let oldQuoteContext = stateQuoteContext oldState
+  setState oldState { stateQuoteContext = context }
+  result <- parser
+  newState <- getState
+  setState newState { stateQuoteContext = oldQuoteContext }
+  return result
+
+singleQuoted = try $ do
+  singleQuoteStart
+  withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
+    return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted = try $ do 
+  doubleQuoteStart
+  withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
+    return . Quoted DoubleQuote . normalizeSpaces
+
+failIfInQuoteContext context = do
+  st <- getState
+  if stateQuoteContext st == context
+     then fail "already inside quotes"
+     else return ()
+
+singleQuoteStart = do 
+  failIfInQuoteContext InSingleQuote
+  char '\8216' <|> 
+     do char '\''  
+        notFollowedBy (oneOf ")!],.;:-? \t\n")
+        notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+                            satisfy (not . isAlphaNum))) -- possess/contraction
+        return '\''
+
+singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
+
+doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
+                   (char '"' <|> char '\8220') >>
+                   notFollowedBy (oneOf " \t\n")
+
+doubleQuoteEnd = char '"' <|> char '\8221'
+
+ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
+
+dash = enDash <|> emDash
+
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
+
+emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
+               skipSpaces >> return EmDash
+
+whitespace = do
+  sps <- many1 (oneOf spaceChars)
+  if length sps >= 2
+     then option Space (endline >> return LineBreak)
+     else return Space <?> "whitespace"
+
+nonEndline = satisfy (/='\n')
+
+strChar = noneOf (specialChars ++ spaceChars ++ "\n")
+
+str = many1 strChar >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline = try $ do
+  newline
+  notFollowedBy blankline
+  st <- getState
+  if stateStrict st 
+    then do notFollowedBy emailBlockQuoteStart
+            notFollowedBy (char '#')  -- atx header
+    else return () 
+  -- parse potential list-starts differently if in a list:
+  if stateParserContext st == ListItemState
+     then notFollowedBy' (bulletListStart <|> 
+                          (anyOrderedListStart >> return ()))
+     else return ()
+  return Space
+
+--
+-- links
+--
+
+-- a reference label for a link
+reference = notFollowedBy' (string "[^") >>  -- footnote reference
+            inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
+
+-- source for a link, with optional title
+source = try $ do 
+  char '('
+  optional (char '<')
+  src <- many (noneOf ")> \t\n")
+  optional (char '>')
+  tit <- option "" linkTitle
+  skipSpaces
+  char ')'
+  return (removeTrailingSpace src, tit)
+
+linkTitle = try $ do 
+  (many1 spaceChar >> option '\n' newline) <|> newline
+  skipSpaces
+  delim <- char '\'' <|> char '"'
+  tit <-   manyTill anyChar (try (char delim >> skipSpaces >>
+                                  notFollowedBy (noneOf ")\n")))
+  return $ decodeCharacterReferences tit
+
+link = try $ do
+  label <- reference
+  src <- source <|> referenceLink label
+  return $ Link label src
+
+-- a link like [this][ref] or [this][] or [this]
+referenceLink label = do
+  ref <- option [] (try (optional (char ' ') >> 
+                         optional (newline >> skipSpaces) >> reference))
+  let ref' = if null ref then label else ref
+  state <- getState
+  case lookupKeySrc (stateKeys state) ref' of
+     Nothing     -> fail "no corresponding key" 
+     Just target -> return target 
+
+emailAddress = try $ do
+  name <- many1 (alphaNum <|> char '+')
+  char '@'
+  first <- many1 alphaNum
+  rest <- many1 (char '.' >> many1 alphaNum)
+  return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
+
+uri = try $ do
+  str <- many1 (noneOf "\n\t >")
+  if isURI str
+     then return str
+     else fail "not a URI"
+
+autoLink = try $ do
+  char '<'
+  src <- uri <|> emailAddress
+  char '>'
+  let src' = if "mailto:" `isPrefixOf` src
+                then drop 7 src
+                else src 
+  st <- getState
+  return $ if stateStrict st
+              then Link [Str src'] (src, "")
+              else Link [Code src'] (src, "")
+
+image = try $ do
+  char '!'
+  (Link label src) <- link
+  return $ Image label src
+
+note = try $ do
+  failIfStrict
+  ref <- noteMarker
+  state <- getState
+  let notes = stateNotes state
+  case lookup ref notes of
+    Nothing       -> fail "note not found"
+    Just contents -> return $ Note contents
+
+inlineNote = try $ do
+  failIfStrict
+  char '^'
+  contents <- inlinesInBalanced "[" "]"
+  return $ Note [Para contents]
+
+rawLaTeXInline' = failIfStrict >> rawLaTeXInline
+
+rawHtmlInline' = do
+  st <- getState
+  result <- choice $ if stateStrict st
+                        then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] 
+                        else [htmlBlockElement, anyHtmlInlineTag]
+  return $ HtmlInline result
+
diff --git a/Text/Pandoc/Readers/RST.hs b/Text/Pandoc/Readers/RST.hs
new file mode 100644
index 000000000..1239eb688
--- /dev/null
+++ b/Text/Pandoc/Readers/RST.hs
@@ -0,0 +1,640 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.RST 
+   Copyright   : Copyright (C) 2006-7 John MacFarlane
+   License     : GNU GPL, version 2 or above 
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion from reStructuredText to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.RST ( 
+                                readRST
+                               ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared 
+import Text.ParserCombinators.Parsec
+import Data.List ( findIndex, delete )
+
+-- | Parse reStructuredText string and return Pandoc document.
+readRST :: ParserState -> String -> Pandoc
+readRST state str = (readWith parseRST) state (str ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+---
+
+bulletListMarkers = "*+-"
+underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = "\\`|*_<>$:[-"
+
+--
+-- parsing documents
+--
+
+isAnonKey (ref, src) = ref == [Str "_"]
+
+isHeader :: Int -> Block -> Bool
+isHeader n (Header x _) = x == n
+isHeader _ _ = False
+
+-- | Promote all headers in a list of blocks.  (Part of
+-- title transformation for RST.)
+promoteHeaders :: Int -> [Block] -> [Block]
+promoteHeaders num ((Header level text):rest) = 
+    (Header (level - num) text):(promoteHeaders num rest)
+promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
+promoteHeaders num [] = []
+
+-- | If list of blocks starts with a header (or a header and subheader)
+-- of level that are not found elsewhere, return it as a title and
+-- promote all the other headers. 
+titleTransform :: [Block]              -- ^ list of blocks
+               -> ([Block], [Inline])  -- ^ modified list of blocks, title
+titleTransform ((Header 1 head1):(Header 2 head2):rest) =  -- title subtitle
+    if (any (isHeader 1) rest) || (any (isHeader 2) rest)
+       then ((Header 1 head1):(Header 2 head2):rest, [])
+       else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
+titleTransform ((Header 1 head1):rest) =       -- title, no subtitle
+    if (any (isHeader 1) rest)
+       then ((Header 1 head1):rest, [])
+       else ((promoteHeaders 1 rest), head1)
+titleTransform blocks = (blocks, [])
+
+parseRST = do
+  startPos <- getPosition
+  -- go through once just to get list of reference keys
+  -- docMinusKeys is the raw document with blanks where the keys were...
+  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
+  setInput docMinusKeys
+  setPosition startPos
+  st <- getState
+  let reversedKeys = stateKeys st
+  updateState $ \st -> st { stateKeys = reverse reversedKeys }
+  -- now parse it for real...
+  blocks <- parseBlocks 
+  let blocks' = filter (/= Null) blocks
+  state <- getState
+  let (blocks'', title) = if stateStandalone state
+                              then titleTransform blocks'
+                              else (blocks', [])
+  let authors = stateAuthors state
+  let date = stateDate state
+  let title' = if (null title) then (stateTitle state) else title
+  return $ Pandoc (Meta title' authors date) blocks''
+
+--
+-- parsing blocks
+--
+
+parseBlocks = manyTill block eof
+
+block = choice [ codeBlock
+               , rawHtmlBlock
+               , rawLaTeXBlock
+               , fieldList
+               , blockQuote
+               , imageBlock
+               , unknownDirective
+               , header
+               , hrule
+               , list
+               , lineBlock
+               , para
+               , plain
+               , nullBlock ] <?> "block"
+
+--
+-- field list
+--
+
+fieldListItem indent = try $ do
+  string indent
+  char ':'
+  name <- many1 alphaNum
+  string ": "
+  skipSpaces
+  first <- manyTill anyChar newline
+  rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> 
+                            indentedBlock
+  return (name, joinWithSep " " (first:(lines rest)))
+
+fieldList = try $ do
+  indent <- lookAhead $ many (oneOf " \t")
+  items <- many1 $ fieldListItem indent
+  blanklines
+  let authors = case lookup "Authors" items of
+                  Just auth -> [auth]
+                  Nothing  -> map snd (filter (\(x,y) -> x == "Author") items)
+  if null authors 
+     then return () 
+     else updateState $ \st -> st {stateAuthors = authors}
+  case (lookup "Date" items) of
+           Just dat -> updateState $ \st -> st {stateDate = dat}
+           Nothing  -> return ()
+  case (lookup "Title" items) of
+           Just tit -> parseFromString (many inline) tit >>=
+                       \t -> updateState $ \st -> st {stateTitle = t}
+           Nothing  -> return ()
+  let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && 
+                  (x /= "Date") && (x /= "Title")) items
+  if null remaining
+              then return Null
+              else do terms <- mapM (return . (:[]) . Str . fst) remaining
+                      defs  <- mapM (parseFromString (many block) . snd) 
+                                    remaining
+                      return $ DefinitionList $ zip terms defs
+
+--
+-- line block
+--
+
+lineBlockLine = try $ do
+  string "| "
+  white <- many (oneOf " \t")
+  line <- manyTill inline newline
+  return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
+
+lineBlock = try $ do
+  lines <- many1 lineBlockLine
+  blanklines
+  return $ Para (concat lines)
+
+--
+-- paragraph block
+--
+
+para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
+
+codeBlockStart = string "::" >> blankline >> blankline
+
+-- paragraph that ends in a :: starting a code block
+paraBeforeCodeBlock = try $ do
+  result <- many1 (notFollowedBy' codeBlockStart >> inline)
+  lookAhead (string "::")
+  return $ Para $ if last result == Space
+                     then normalizeSpaces result
+                     else (normalizeSpaces result) ++ [Str ":"]
+
+-- regular paragraph
+paraNormal = try $ do 
+  result <- many1 inline
+  newline
+  blanklines
+  return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces 
+
+--
+-- image block
+--
+
+imageBlock = try $ do
+  string ".. image:: "
+  src <- manyTill anyChar newline
+  fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
+                           many1 $ fieldListItem indent
+  optional blanklines
+  case lookup "alt" fields of
+        Just alt -> return $ Plain [Image [Str alt] (src, alt)]
+        Nothing  -> return $ Plain [Image [Str "image"] (src, "")]
+--
+-- header blocks
+--
+
+header = doubleHeader <|> singleHeader <?> "header"
+
+-- a header with lines on top and bottom
+doubleHeader = try $ do
+  c <- oneOf underlineChars
+  rest <- many (char c)  -- the top line
+  let lenTop = length (c:rest)
+  skipSpaces
+  newline
+  txt <- many1 (notFollowedBy blankline >> inline)
+  pos <- getPosition
+  let len = (sourceColumn pos) - 1
+  if (len > lenTop) then fail "title longer than border" else return ()
+  blankline              -- spaces and newline
+  count lenTop (char c)  -- the bottom line
+  blanklines
+  -- check to see if we've had this kind of header before.  
+  -- if so, get appropriate level.  if not, add to list.
+  state <- getState
+  let headerTable = stateHeaderTable state
+  let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
+        Just ind -> (headerTable, ind + 1)
+        Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+  setState (state { stateHeaderTable = headerTable' })
+  return $ Header level (normalizeSpaces txt)
+
+-- a header with line on the bottom only
+singleHeader = try $ do 
+  notFollowedBy' whitespace
+  txt <- many1 (do {notFollowedBy blankline; inline})
+  pos <- getPosition
+  let len = (sourceColumn pos) - 1
+  blankline
+  c <- oneOf underlineChars
+  rest <- count (len - 1) (char c)
+  many (char c)
+  blanklines
+  state <- getState
+  let headerTable = stateHeaderTable state
+  let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
+        Just ind -> (headerTable, ind + 1)
+        Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
+  setState (state { stateHeaderTable = headerTable' })
+  return $ Header level (normalizeSpaces txt)
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+  chr <- oneOf underlineChars
+  count 3 (char chr)
+  skipMany (char chr)
+  blankline
+  blanklines
+  return HorizontalRule
+
+--
+-- code blocks
+--
+
+-- read a line indented by a given string
+indentedLine indents = try $ do
+  string indents
+  result <- manyTill anyChar newline
+  return $ result ++ "\n"
+
+-- two or more indented lines, possibly separated by blank lines.
+-- any amount of indentation will work.
+indentedBlock = do 
+  indents <- lookAhead $ many1 (oneOf " \t")
+  lns <- many $ choice $ [ indentedLine indents,
+                           try $ do b <- blanklines
+                                    l <- indentedLine indents
+                                    return (b ++ l) ]
+  optional blanklines 
+  return $ concat lns
+
+codeBlock = try $ do
+  codeBlockStart
+  result <- indentedBlock
+  return $ CodeBlock $ stripTrailingNewlines result
+
+--
+-- raw html
+--
+
+rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
+                     indentedBlock >>= return . RawHtml
+
+--
+-- raw latex
+--
+
+rawLaTeXBlock = try $ do
+  string ".. raw:: latex"
+  blanklines
+  result <- indentedBlock
+  return $ Para [(TeX result)]
+
+--
+-- block quotes
+--
+
+blockQuote = do
+  raw <- indentedBlock
+  -- parse the extracted block, which may contain various block elements:
+  contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+  return $ BlockQuote contents
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+definitionListItem = try $ do
+  term <- many1Till inline endline
+  raw <- indentedBlock
+  -- parse the extracted block, which may contain various block elements:
+  contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+  return (normalizeSpaces term, contents)
+
+definitionList = many1 definitionListItem >>= return . DefinitionList
+
+-- parses bullet list start and returns its length (inc. following whitespace)
+bulletListStart = try $ do
+  notFollowedBy' hrule  -- because hrules start out just like lists
+  marker <- oneOf bulletListMarkers
+  white <- many1 spaceChar
+  return $ length (marker:white)
+
+-- parses ordered list start and returns its length (inc following whitespace)
+orderedListStart style delim = try $ do
+  (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
+  white <- many1 spaceChar
+  return $ markerLen + length white
+
+-- parse a line of a list item
+listLine markerLength = try $ do
+  notFollowedBy blankline
+  indentWith markerLength
+  line <- manyTill anyChar newline
+  return $ line ++ "\n"
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith num = do
+  state <- getState
+  let tabStop = stateTabStop state
+  if (num < tabStop)
+     then count num  (char ' ')
+     else choice [ try (count num (char ' ')), 
+                   (try (char '\t' >> count (num - tabStop) (char ' '))) ] 
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start = do
+  markerLength <- start
+  firstLine <- manyTill anyChar newline
+  restLines <- many (listLine markerLength)
+  return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+
+-- continuation of a list item - indented and separated by blankline or 
+-- (in compact lists) endline.  
+-- Note: nested lists are parsed as continuations.
+listContinuation markerLength = try $ do
+  blanks <- many1 blankline
+  result <- many1 (listLine markerLength)
+  return $ blanks ++ concat result
+
+listItem start = try $ do 
+  (markerLength, first) <- rawListItem start
+  rest <- many (listContinuation markerLength)
+  blanks <- choice [ try (many blankline >>~ lookAhead start),
+                     many1 blankline ]  -- whole list must end with blank.
+  -- parsing with ListItemState forces markers at beginning of lines to
+  -- count as list item markers, even if not separated by blank space.
+  -- see definition of "endline"
+  state <- getState
+  let oldContext = stateParserContext state
+  setState $ state {stateParserContext = ListItemState}
+  -- parse the extracted block, which may itself contain block elements
+  parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
+  updateState (\st -> st {stateParserContext = oldContext})
+  return parsed
+
+orderedList = do
+  (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
+  items <- many1 (listItem (orderedListStart style delim))
+  let items' = compactify items
+  return $ OrderedList (start, style, delim) items'
+
+bulletList = many1 (listItem bulletListStart) >>= 
+             return . BulletList . compactify
+
+--
+-- unknown directive (e.g. comment)
+--
+
+unknownDirective = try $ do
+  string ".. "
+  manyTill anyChar newline
+  many (string "   :" >> many1 (noneOf "\n:") >> char ':' >>
+        many1 (noneOf "\n") >> newline)
+  optional blanklines
+  return Null
+
+-- 
+-- reference key
+--
+
+referenceKey = do
+  startPos <- getPosition
+  key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
+  st <- getState
+  let oldkeys = stateKeys st
+  updateState $ \st -> st { stateKeys = key : oldkeys }
+  optional blanklines
+  endPos <- getPosition
+  -- return enough blanks to replace key
+  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+targetURI = do
+  skipSpaces
+  optional newline
+  contents <- many1 (try (many spaceChar >> newline >> 
+                          many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
+  blanklines
+  return contents
+
+imageKey = try $ do
+  string ".. |"
+  ref <- manyTill inline (char '|')
+  skipSpaces
+  string "image::"
+  src <- targetURI
+  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+anonymousKey = try $ do
+  oneOfStrings [".. __:", "__"]
+  src <- targetURI
+  state <- getState
+  return ([Str "_"], (removeLeadingTrailingSpace src, ""))
+
+regularKeyQuoted = try $ do
+  string ".. _`"
+  ref <- manyTill inline (char '`')
+  char ':'
+  src <- targetURI
+  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+regularKey = try $ do
+  string ".. _"
+  ref <- manyTill inline (char ':')
+  src <- targetURI
+  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+ -- 
+ -- inline
+ --
+
+inline = choice [ link
+                , str
+                , whitespace
+                , endline
+                , strong
+                , emph
+                , code
+                , image
+                , hyphens
+                , superscript
+                , subscript
+                , escapedChar
+                , symbol ] <?> "inline"
+
+hyphens = do
+  result <- many1 (char '-')
+  option Space endline 
+  -- don't want to treat endline after hyphen or dash as a space
+  return $ Str result
+
+escapedChar = escaped anyChar
+
+symbol = do 
+  result <- oneOf specialChars
+  return $ Str [result]
+
+-- parses inline code, between codeStart and codeEnd
+code = try $ do 
+  string "``"
+  result <- manyTill anyChar (try (string "``"))
+  return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+
+emph = enclosed (char '*') (char '*') inline >>= 
+       return . Emph . normalizeSpaces
+
+strong = enclosed (string "**") (try $ string "**") inline >>= 
+         return . Strong . normalizeSpaces
+
+interpreted role = try $ do
+  optional $ try $ string "\\ "
+  result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
+  nextChar <- lookAhead anyChar
+  try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
+  return [Str result]
+
+superscript = interpreted "sup" >>= (return . Superscript)
+
+subscript = interpreted "sub" >>= (return . Subscript)
+
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
+
+str = notFollowedBy' oneWordReference >> 
+      many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline = try $ do
+  newline
+  notFollowedBy blankline
+  -- parse potential list-starts at beginning of line differently in a list:
+  st <- getState
+  if (stateParserContext st) == ListItemState
+     then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
+          notFollowedBy' bulletListStart
+     else return ()
+  return Space
+
+--
+-- links
+--
+
+link = choice [explicitLink, referenceLink, autoLink]  <?> "link"
+
+explicitLink = try $ do
+  char '`'
+  notFollowedBy (char '`') -- `` is marks start of inline code
+  label <- manyTill inline (try (do {spaces; char '<'}))
+  src <- manyTill (noneOf ">\n ") (char '>')
+  skipSpaces
+  string "`_"
+  return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
+
+reference = try $ do
+  char '`'
+  notFollowedBy (char '`')
+  label <- many1Till inline (char '`') 
+  char '_'
+  return label
+
+oneWordReference = do
+  raw <- many1 alphaNum
+  char '_'
+  notFollowedBy alphaNum    -- because this_is_not a link
+  return [Str raw]
+
+referenceLink = try $ do
+  label <- reference <|> oneWordReference
+  key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
+  state <- getState
+  let keyTable = stateKeys state
+  src <- case lookupKeySrc keyTable key of
+           Nothing     -> fail "no corresponding key"
+           Just target -> return target
+  -- if anonymous link, remove first anon key so it won't be used again
+  let keyTable' = if (key == [Str "_"]) -- anonymous link? 
+                    then delete ([Str "_"], src) keyTable -- remove first anon key 
+                    else keyTable                    
+  setState $ state { stateKeys = keyTable' }
+  return $ Link (normalizeSpaces label) src 
+
+uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", 
+                           "mailto:", "news:", "telnet:" ]
+
+uri = try $ do
+  scheme <- uriScheme
+  identifier <- many1 (noneOf " \t\n")
+  return $ scheme ++ identifier
+
+autoURI = do
+  src <- uri
+  return $ Link [Str src] (src, "")
+
+emailChar = alphaNum <|> oneOf "-+_."
+
+emailAddress = try $ do
+  firstLetter <- alphaNum
+  restAddr <- many emailChar
+  let addr = firstLetter:restAddr
+  char '@'
+  dom <- domain
+  return $ addr ++ '@':dom
+
+domainChar = alphaNum <|> char '-'
+
+domain = do
+  first <- many1 domainChar
+  dom <- many1 (try (do{ char '.'; many1 domainChar }))
+  return $ joinWithSep "." (first:dom)
+
+autoEmail = do
+  src <- emailAddress
+  return $ Link [Str src] ("mailto:" ++ src, "")
+
+autoLink = autoURI <|> autoEmail
+
+-- For now, we assume that all substitution references are for images.
+image = try $ do
+  char '|'
+  ref <- manyTill inline (char '|')
+  state <- getState
+  let keyTable = stateKeys state
+  src <- case lookupKeySrc keyTable ref of
+           Nothing     -> fail "no corresponding key"
+           Just target -> return target
+  return $ Image (normalizeSpaces ref) src
+
-- 
cgit v1.2.3