Removed Text directory. This is a remnant of an experiment

moving the contents of src/ to the top level, and should have been deleted long ago. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1097 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-11-28 21:01:17 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-11-28 21:01:17 +0000
commit: 47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864 (patch)
tree: ead5f33d20aa3a88c78f11bb8e598a4f741247f1 /Text/Pandoc/Readers
parent: 3dfe4cb708ac14369aab19383c1008b2cf4adbc4 (diff)
download: pandoc-47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864.tar.gz
4 files changed, 0 insertions, 2696 deletions
diff --git a/Text/Pandoc/Readers/HTML.hs b/Text/Pandoc/Readers/HTML.hs
deleted file mode 100644
index 70a071152..000000000
--- a/Text/Pandoc/Readers/HTML.hs
+++ /dev/null
@@ -1,496 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.HTML
-   Copyright   : Copyright (C) 2006-7 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha 
-   Portability : portable
-
-Conversion of HTML to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.HTML ( 
-                                 readHtml, 
-                                 rawHtmlInline, 
-                                 rawHtmlBlock, 
-                                 anyHtmlBlockTag, 
-                                 anyHtmlInlineTag,  
-                                 anyHtmlTag,
-                                 anyHtmlEndTag,
-                                 htmlEndTag,
-                                 extractTagType,
-                                 htmlBlockElement 
-                                ) where
-
-import Text.ParserCombinators.Parsec
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared 
-import Text.Pandoc.CharacterReferences ( characterReference, 
-                                         decodeCharacterReferences )
-import Data.Maybe ( fromMaybe )
-import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf )
-import Data.Char ( toUpper, toLower, isAlphaNum )
-
--- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: ParserState   -- ^ Parser state
-         -> String        -- ^ String to parse
-         -> Pandoc
-readHtml = readWith parseHtml
-
---
--- Constants
---
-
-eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
-                  "map", "area", "object", "script"]
-
-inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
-                  "br", "cite", "code", "dfn", "em", "font", "i", "img",
-                  "input", "kbd", "label", "q", "s", "samp", "select",
-                  "small", "span", "strike", "strong", "sub", "sup",
-                  "textarea", "tt", "u", "var"] ++ eitherBlockOrInline
-
-blockHtmlTags = ["address", "blockquote", "center", "dir", "div",
-                 "dl", "fieldset", "form", "h1", "h2", "h3", "h4",
-                 "h5", "h6", "hr", "isindex", "menu", "noframes",
-                 "noscript", "ol", "p", "pre", "table", "ul", "dd",
-                 "dt", "frameset", "li", "tbody", "td", "tfoot",
-                 "th", "thead", "tr"] ++ eitherBlockOrInline
-
---
--- HTML utility functions
---
-
--- | Read blocks until end tag.
-blocksTilEnd tag = do
-  blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
-  return $ filter (/= Null) blocks
-
--- | Read inlines until end tag.
-inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-
--- | Parse blocks between open and close tag.
-blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
-
--- | Parse inlines between open and close tag.
-inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
-
--- | Extract type from a tag:  e.g. @br@ from @\<br\>@
-extractTagType :: String -> String
-extractTagType ('<':rest) = 
-  let isSpaceOrSlash c = c `elem` "/ \n\t" in
-  map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
-extractTagType _ = ""
-
--- | Parse any HTML tag (opening or self-closing) and return text of tag
-anyHtmlTag = try $ do
-  char '<'
-  spaces
-  tag <- many1 alphaNum
-  attribs <- many htmlAttribute
-  spaces
-  ender <- option "" (string "/")
-  let ender' = if null ender then "" else " /"
-  spaces
-  char '>'
-  return $ "<" ++ tag ++ 
-           concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
-
-anyHtmlEndTag = try $ do
-  char '<'   
-  spaces
-  char '/'
-  spaces
-  tagType <- many1 alphaNum
-  spaces
-  char '>'
-  return $ "</" ++ tagType ++ ">"
-
-htmlTag :: String -> GenParser Char st (String, [(String, String)])
-htmlTag tag = try $ do
-  char '<'
-  spaces
-  stringAnyCase tag
-  attribs <- many htmlAttribute
-  spaces
-  optional (string "/")
-  spaces
-  char '>'
-  return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
-
--- parses a quoted html attribute value
-quoted quoteChar = do
-  result <- between (char quoteChar) (char quoteChar) 
-                    (many (noneOf [quoteChar]))
-  return (result, [quoteChar])
-
-htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
-
--- minimized boolean attribute
-htmlMinimizedAttribute = try $ do
-  many1 space
-  name <- many1 (choice [letter, oneOf ".-_:"])
-  return (name, name, name)
-
-htmlRegularAttribute = try $ do
-  many1 space
-  name <- many1 (choice [letter, oneOf ".-_:"])
-  spaces
-  char '='
-  spaces
-  (content, quoteStr) <- choice [ (quoted '\''), 
-                                  (quoted '"'), 
-                                  (do
-                                     a <- many (alphaNum <|> (oneOf "-._:"))
-                                     return (a,"")) ]
-  return (name, content,
-          (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
-
--- | Parse an end tag of type 'tag'
-htmlEndTag tag = try $ do
-  char '<'   
-  spaces
-  char '/'
-  spaces
-  stringAnyCase tag
-  spaces
-  char '>'
-  return $ "</" ++ tag ++ ">"
-
--- | Returns @True@ if the tag is (or can be) an inline tag.
-isInline tag = (extractTagType tag) `elem` inlineHtmlTags
-
--- | Returns @True@ if the tag is (or can be) a block tag.
-isBlock tag = (extractTagType tag) `elem` blockHtmlTags 
-
-anyHtmlBlockTag = try $ do
-  tag <- anyHtmlTag <|> anyHtmlEndTag
-  if isBlock tag then return tag else fail "inline tag"
-
-anyHtmlInlineTag = try $ do
-  tag <- anyHtmlTag <|> anyHtmlEndTag
-  if isInline tag then return tag else fail "not an inline tag"
-
--- | Parses material between script tags.
--- Scripts must be treated differently, because they can contain '<>' etc.
-htmlScript = try $ do
-  open <- string "<script"
-  rest <- manyTill anyChar (htmlEndTag "script")
-  return $ open ++ rest ++ "</script>"
-
-htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
-
-rawHtmlBlock = try $ do
-  notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
-  body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag
-  sp <- many space
-  state <- getState
-  if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
-
--- | Parses an HTML comment.
-htmlComment = try $ do
-  string "<!--"
-  comment <- manyTill anyChar (try (string "-->"))
-  return $ "<!--" ++ comment ++ "-->"
-
---
--- parsing documents
---
-
-xmlDec = try $ do
-  string "<?"
-  rest <- manyTill anyChar (char '>')
-  return $ "<?" ++ rest ++ ">"
-
-definition = try $ do
-  string "<!"
-  rest <- manyTill anyChar (char '>')
-  return $ "<!" ++ rest ++ ">"
-
-nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
-  ((rawHtmlBlock >> return ' ') <|> anyChar)
-
-parseTitle = try $ do
-  (tag, _) <- htmlTag "title"
-  contents <- inlinesTilEnd tag
-  spaces
-  return contents
-
--- parse header and return meta-information (for now, just title)
-parseHead = try $ do
-  htmlTag "head"
-  spaces
-  skipMany nonTitleNonHead
-  contents <- option [] parseTitle
-  skipMany nonTitleNonHead
-  htmlTag "/head"
-  return (contents, [], "")
-
-skipHtmlTag tag = optional (htmlTag tag)
-
--- h1 class="title" representation of title in body
-bodyTitle = try $ do
-  (tag, attribs) <- htmlTag "h1"  
-  cl <- case (extractAttribute "class" attribs) of
-          Just "title" -> return ""
-          otherwise    -> fail "not title"
-  inlinesTilEnd "h1"
-
-parseHtml = do
-  sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
-  skipHtmlTag "html"
-  spaces
-  (title, authors, date) <- option ([], [], "") parseHead 
-  spaces
-  skipHtmlTag "body"
-  spaces
-  optional bodyTitle  -- skip title in body, because it's represented in meta
-  blocks <- parseBlocks
-  spaces
-  optional (htmlEndTag "body")
-  spaces
-  optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html>
-  eof
-  return $ Pandoc (Meta title authors date) blocks
-
---
--- parsing blocks
---
-
-parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
-
-block = choice [ codeBlock
-               , header
-               , hrule
-               , list
-               , blockQuote
-               , para
-               , plain
-               , rawHtmlBlock ] <?> "block"
-
---
--- header blocks
---
-
-header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-
-headerLevel n = try $ do
-    let level = "h" ++ show n
-    (tag, attribs) <- htmlTag level
-    contents <- inlinesTilEnd level
-    return $ Header n (normalizeSpaces contents)
-
---
--- hrule block
---
-
-hrule = try  $ do
-  (tag, attribs) <- htmlTag "hr"
-  state <- getState
-  if not (null attribs) && stateParseRaw state
-     then unexpected "attributes in hr" -- parse as raw in this case
-     else return HorizontalRule
-
---
--- code blocks
---
-
--- Note:  HTML tags in code blocks (e.g. for syntax highlighting) are 
--- skipped, because they are not portable to output formats other than HTML.
-codeBlock = try $ do
-    htmlTag "pre" 
-    result <- manyTill 
-              (many1 (satisfy (/= '<')) <|> 
-               ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
-              (htmlEndTag "pre")
-    let result' = concat result
-    -- drop leading newline if any
-    let result'' = if "\n" `isPrefixOf` result'
-                      then drop 1 result'
-                      else result'
-    -- drop trailing newline if any
-    let result''' = if "\n" `isSuffixOf` result''
-                       then init result''
-                       else result''
-    return $ CodeBlock $ decodeCharacterReferences result'''
-
---
--- block quotes
---
-
-blockQuote = try $ htmlTag "blockquote" >> spaces >> 
-                   blocksTilEnd "blockquote" >>= (return . BlockQuote)
-
---
--- list blocks
---
-
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-orderedList = try $ do
-  (_, attribs) <- htmlTag "ol"
-  (start, style) <- option (1, DefaultStyle) $
-                           do failIfStrict
-                              let sta = fromMaybe "1" $ 
-                                        lookup "start" attribs
-                              let sty = fromMaybe (fromMaybe "" $
-                                        lookup "style" attribs) $
-                                        lookup "class" attribs
-                              let sty' = case sty of
-                                          "lower-roman"  -> LowerRoman
-                                          "upper-roman"  -> UpperRoman
-                                          "lower-alpha"  -> LowerAlpha
-                                          "upper-alpha"  -> UpperAlpha
-                                          "decimal"      -> Decimal
-                                          _              -> DefaultStyle
-                              return (read sta, sty')
-  spaces
-  items <- sepEndBy1 (blocksIn "li") spaces
-  htmlEndTag "ol"
-  return $ OrderedList (start, style, DefaultDelim) items
-
-bulletList = try $ do
-  htmlTag "ul"
-  spaces
-  items <- sepEndBy1 (blocksIn "li") spaces
-  htmlEndTag "ul"
-  return $ BulletList items
-
-definitionList = try $ do
-  failIfStrict  -- def lists not part of standard markdown
-  tag <- htmlTag "dl"
-  spaces
-  items <- sepEndBy1 definitionListItem spaces
-  htmlEndTag "dl"
-  return $ DefinitionList items
-
-definitionListItem = try $ do
-  terms <- sepEndBy1 (inlinesIn "dt") spaces
-  defs <- sepEndBy1 (blocksIn "dd") spaces
-  let term = joinWithSep [LineBreak] terms
-  return (term, concat defs)
-
---
--- paragraph block
---
-
-para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= 
-             return . Para . normalizeSpaces
-
--- 
--- plain block
---
-
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
--- 
--- inline
---
-
-inline = choice [ charRef
-                , strong
-                , emph
-                , superscript
-                , subscript
-                , strikeout
-                , spanStrikeout
-                , code
-                , str
-                , linebreak
-                , whitespace
-                , link
-                , image
-                , rawHtmlInline
-                ] <?> "inline"
-
-code = try $ do 
-  htmlTag "code"
-  result <- manyTill anyChar (htmlEndTag "code")
-  -- remove internal line breaks, leading and trailing space,
-  -- and decode character references
-  return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ 
-                  joinWithSep " " $ lines result 
-
-rawHtmlInline = do
-  result <- htmlScript <|> htmlComment <|> anyHtmlInlineTag
-  state <- getState
-  if stateParseRaw state then return (HtmlInline result) else return (Str "")
-
-betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= 
-                        return . normalizeSpaces
-
-emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
-
-strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
-
-superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
-
-subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
-
-strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
-            return . Strikeout
-
-spanStrikeout = try $ do
-  failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
-  (tag, attributes) <- htmlTag "span" 
-  result <- case (extractAttribute "class" attributes) of
-              Just "strikeout" -> inlinesTilEnd "span"
-              _                -> fail "not a strikeout"
-  return $ Strikeout result
-
-whitespace = many1 space >> return Space
-
--- hard line break
-linebreak = htmlTag "br" >> optional newline >> return LineBreak
-
-str = many1 (noneOf "<& \t\n") >>= return . Str
-
---
--- links and images
---
-
--- extract contents of attribute (attribute names are case-insensitive)
-extractAttribute name [] = Nothing
-extractAttribute name ((attrName, contents):rest) = 
-  let name'     = map toLower name 
-      attrName' = map toLower attrName
-  in  if attrName' == name'
-         then Just (decodeCharacterReferences contents)
-         else extractAttribute name rest
-
-link = try $ do
-  (tag, attributes) <- htmlTag "a"  
-  url <- case (extractAttribute "href" attributes) of
-           Just url -> return url
-           Nothing  -> fail "no href"
-  let title = fromMaybe "" $ extractAttribute "title" attributes
-  label <- inlinesTilEnd "a"
-  return $ Link (normalizeSpaces label) (url, title)
-
-image = try $ do
-  (tag, attributes) <- htmlTag "img" 
-  url <- case (extractAttribute "src" attributes) of
-           Just url -> return url
-           Nothing  -> fail "no src"
-  let title = fromMaybe "" $ extractAttribute "title" attributes
-  let alt = fromMaybe "" (extractAttribute "alt" attributes)
-  return $ Image [Str alt] (url, title)
-
diff --git a/Text/Pandoc/Readers/LaTeX.hs b/Text/Pandoc/Readers/LaTeX.hs
deleted file mode 100644
index 37cc2bfe4..000000000
--- a/Text/Pandoc/Readers/LaTeX.hs
+++ /dev/null
@@ -1,651 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-7 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Conversion of LaTeX to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.LaTeX ( 
-                                  readLaTeX,
-                                  rawLaTeXInline,
-                                  rawLaTeXEnvironment
-                                 ) where
-
-import Text.ParserCombinators.Parsec
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared 
-import Data.Maybe ( fromMaybe )
-import Data.Char ( chr )
-import Data.List ( isPrefixOf, isSuffixOf )
-
--- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: ParserState   -- ^ Parser state, including options for parser
-          -> String        -- ^ String to parse
-          -> Pandoc
-readLaTeX = readWith parseLaTeX
-
--- characters with special meaning
-specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
-
---
--- utility functions
---
-
--- | Returns text between brackets and its matching pair.
-bracketedText openB closeB = do
-  result <- charsInBalanced' openB closeB
-  return $ [openB] ++ result ++ [closeB]
-
--- | Returns an option or argument of a LaTeX command.
-optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
-
--- | True if the string begins with '{'.
-isArg ('{':rest) = True
-isArg other = False
-
--- | Returns list of options and arguments of a LaTeX command.
-commandArgs = many optOrArg
-
--- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command = do
-  char '\\'
-  name <- many1 letter
-  star <- option "" (string "*")  -- some commands have starred versions
-  args <- commandArgs
-  return (name, star, args)
-
-begin name = try $ do
-  string $ "\\begin{" ++ name ++ "}"
-  optional commandArgs
-  spaces
-  return name
-
-end name = try $ do
-  string $ "\\end{" ++ name ++ "}"
-  spaces
-  return name
-
--- | Returns a list of block elements containing the contents of an
--- environment.
-environment name = try $ begin name >> spaces >> manyTill block (end name)
-
-anyEnvironment =  try $ do
-  string "\\begin{"
-  name <- many letter
-  star <- option "" (string "*") -- some environments have starred variants
-  char '}'
-  optional commandArgs
-  spaces
-  contents <- manyTill block (end (name ++ star))
-  return $ BlockQuote contents
-
---
--- parsing documents
---
-
--- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble = try $ manyTill 
-  (choice [bibliographic, comment, unknownCommand, nullBlock]) 
-  (try (string "\\begin{document}")) >> 
-  spaces
-
--- | Parse LaTeX and return 'Pandoc'.
-parseLaTeX = do
-  optional processLaTeXPreamble -- preamble might not be present (fragment)
-  spaces
-  blocks <- parseBlocks
-  spaces
-  optional $ try (string "\\end{document}" >> many anyChar) 
-  -- might not be present (fragment)
-  spaces
-  eof
-  state <- getState
-  let blocks' = filter (/= Null) blocks
-  let title' = stateTitle state
-  let authors' = stateAuthors state
-  let date' = stateDate state
-  return $ Pandoc (Meta title' authors' date')  blocks'
-
---
--- parsing blocks
---
-
-parseBlocks = spaces >> many block
-
-block = choice [ hrule
-               , codeBlock
-               , header
-               , list
-               , blockQuote
-               , mathBlock
-               , comment
-               , bibliographic
-               , para
-               , specialEnvironment
-               , itemBlock
-               , unknownEnvironment
-               , unknownCommand ] <?> "block"
-
---
--- header blocks
---
-
-header = try $ do
-  char '\\'
-  subs <- many (try (string "sub"))
-  string "section"
-  optional (char '*')
-  char '{'
-  title <- manyTill inline (char '}')
-  spaces
-  return $ Header (length subs + 1) (normalizeSpaces title)
-
---
--- hrule block
---
-
-hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", 
-                       "\\newpage" ] >> spaces >> return HorizontalRule
-
---
--- code blocks
---
-
-codeBlock = codeBlock1 <|> codeBlock2
-
-codeBlock1 = try $ do
-  string "\\begin{verbatim}"  -- don't use begin function because it 
-                              -- gobbles whitespace
-  optional blanklines         -- we want to gobble blank lines, but not 
-                              -- leading space
-  contents <- manyTill anyChar (try (string "\\end{verbatim}"))
-  spaces
-  return $ CodeBlock (stripTrailingNewlines contents)
-
-codeBlock2 = try $ do
-  string "\\begin{Verbatim}"  -- used by fancyvrb package
-  option "" blanklines
-  contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
-  spaces
-  return $ CodeBlock (stripTrailingNewlines contents)
-
---
--- block quotes
---
-
-blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= 
-             return . BlockQuote
-
---
--- math block
---
-
-mathBlock = mathBlockWith (begin "equation") (end "equation") <|> 
-            mathBlockWith (begin "displaymath") (end "displaymath") <|>
-            mathBlockWith (string "\\[") (string "\\]") <?> "math block"
-
-mathBlockWith start end = try $ do
-  start
-  spaces
-  result <- manyTill anyChar end
-  spaces
-  return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
-
---
--- list blocks
---
-
-list = bulletList <|> orderedList <|> definitionList <?> "list"
-
-listItem = try $ do
-  ("item", _, args) <- command
-  spaces
-  state <- getState
-  let oldParserContext = stateParserContext state
-  updateState (\state -> state {stateParserContext = ListItemState})
-  blocks <- many block
-  updateState (\state -> state {stateParserContext = oldParserContext})
-  opt <- case args of
-           ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> 
-                       parseFromString (many inline) $ tail $ init x
-           _        -> return []
-  return (opt, blocks)
-
-orderedList = try $ do
-  string "\\begin{enumerate}"
-  (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
-                              try $ do failIfStrict
-                                       char '['
-                                       res <- anyOrderedListMarker
-                                       char ']'
-                                       return res
-  spaces
-  option "" $ try $ do string "\\setlength{\\itemindent}"
-                       char '{'
-                       manyTill anyChar (char '}')
-  spaces
-  start <- option 1 $ try $ do failIfStrict
-                               string "\\setcounter{enum"
-                               many1 (oneOf "iv")
-                               string "}{"
-                               num <- many1 digit
-                               char '}' 
-                               spaces
-                               return $ (read num) + 1
-  items <- many listItem
-  end "enumerate"
-  spaces
-  return $ OrderedList (start, style, delim) $ map snd items
-
-bulletList = try $ do
-  begin "itemize"
-  spaces
-  items <- many listItem
-  end "itemize"
-  spaces
-  return (BulletList $ map snd items)
-
-definitionList = try $ do
-  begin "description"
-  spaces
-  items <- many listItem
-  end "description"
-  spaces
-  return (DefinitionList items)
-
---
--- paragraph block
---
-
-para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
-
---
--- title authors date
---
-
-bibliographic = choice [ maketitle, title, authors, date ]
-
-maketitle = try (string "\\maketitle") >> spaces >> return Null
-
-title = try $ do
-  string "\\title{"
-  tit <- manyTill inline (char '}')
-  spaces
-  updateState (\state -> state { stateTitle = tit })
-  return Null
-
-authors = try $ do
-  string "\\author{"
-  authors <- manyTill anyChar (char '}')
-  spaces
-  let authors' = map removeLeadingTrailingSpace $ lines $
-                 substitute "\\\\" "\n" authors
-  updateState (\state -> state { stateAuthors = authors' })
-  return Null
-
-date = try $ do
-  string "\\date{"
-  date' <- manyTill anyChar (char '}')
-  spaces
-  updateState (\state -> state { stateDate = date' })
-  return Null
-
---
--- item block
--- for use in unknown environments that aren't being parsed as raw latex
---
-
--- this forces items to be parsed in different blocks
-itemBlock = try $ do
-  ("item", _, args) <- command
-  state <- getState
-  if (stateParserContext state == ListItemState)
-     then fail "item should be handled by list block"
-     else if null args 
-             then return Null
-             else return $ Plain [Str (stripFirstAndLast (head args))]
-
---
--- raw LaTeX 
---
-
-specialEnvironment = do  -- these are always parsed as raw
-  lookAhead (choice (map (\name -> begin name)  ["tabular", "figure",
-              "tabbing", "eqnarry", "picture", "table", "verse", "theorem"]))
-  rawLaTeXEnvironment
-
--- | Parse any LaTeX environment and return a Para block containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = try $ do
-  string "\\begin{"
-  name <- many1 letter
-  star <- option "" (string "*") -- for starred variants
-  let name' = name ++ star
-  char '}'
-  args <- option [] commandArgs
-  let argStr = concat args
-  contents <- manyTill (choice [ (many1 (noneOf "\\")), 
-                                 (do 
-                                    (Para [TeX str]) <- rawLaTeXEnvironment
-                                    return str),
-                                 string "\\" ]) 
-                       (end name')
-  spaces
-  return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++ 
-                 concat contents ++ "\\end{" ++ name' ++ "}"]
-
-unknownEnvironment = try $ do
-  state <- getState
-  result <- if stateParseRaw state -- check whether we should include raw TeX 
-               then rawLaTeXEnvironment -- if so, get whole raw environment
-               else anyEnvironment      -- otherwise just the contents
-  return result
-
-unknownCommand = try $ do
-  notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", 
-                                     "document"]
-  (name, star, args) <- command
-  spaces
-  let argStr = concat args
-  state <- getState
-  if name == "item" && (stateParserContext state) == ListItemState
-     then fail "should not be parsed as raw"
-     else string ""
-  if stateParseRaw state
-     then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
-     else return $ Plain [Str (joinWithSep " " args)]
-
--- latex comment
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
-
--- 
--- inline
---
-
-inline =  choice [ str
-                 , endline
-                 , whitespace
-                 , quoted
-                 , apostrophe
-                 , spacer
-                 , strong
-                 , math
-                 , ellipses
-                 , emDash
-                 , enDash
-                 , hyphen
-                 , emph
-                 , strikeout
-                 , superscript
-                 , subscript
-                 , ref
-                 , lab
-                 , code
-                 , url
-                 , link
-                 , image
-                 , footnote
-                 , linebreak
-                 , accentedChar
-                 , specialChar
-                 , rawLaTeXInline
-                 , escapedChar
-                 , unescapedChar
-                 ] <?> "inline"
-
-accentedChar = normalAccentedChar <|> specialAccentedChar
-
-normalAccentedChar = try $ do
-  char '\\'
-  accent <- oneOf "'`^\"~"
-  character <- (try $ char '{' >> letter >>~ char '}') <|> letter
-  let table = fromMaybe [] $ lookup character accentTable 
-  let result = case lookup accent table of
-                 Just num  -> chr num
-                 Nothing   -> '?'
-  return $ Str [result]
-
--- an association list of letters and association list of accents
--- and decimal character numbers.
-accentTable = 
-  [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
-    ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
-    ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
-    ('N', [('~', 209)]),
-    ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
-    ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
-    ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
-    ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
-    ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
-    ('n', [('~', 241)]),
-    ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
-    ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
-
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
-                               oslash, pound, euro, copyright, sect ]
-
-ccedil = try $ do
-  char '\\'
-  letter <- oneOfStrings ["cc", "cC"]
-  let num = if letter == "cc" then 231 else 199
-  return $ Str [chr num]
-
-aring = try $ do
-  char '\\'
-  letter <- oneOfStrings ["aa", "AA"]
-  let num = if letter == "aa" then 229 else 197
-  return $ Str [chr num]
-
-iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> 
-       return (Str [chr 239])
-
-icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
-        return (Str [chr 238])
-
-szlig = try (string "\\ss") >> return (Str [chr 223])
-
-oslash = try $ do
-  char '\\'
-  letter <- choice [char 'o', char 'O']
-  let num = if letter == 'o' then 248 else 216
-  return $ Str [chr num]
-
-aelig = try $ do
-  char '\\'
-  letter <- oneOfStrings ["ae", "AE"]
-  let num = if letter == "ae" then 230 else 198
-  return $ Str [chr num]
-
-pound = try (string "\\pounds") >> return (Str [chr 163])
-
-euro = try (string "\\euro") >> return (Str [chr 8364])
-
-copyright = try (string "\\copyright") >> return (Str [chr 169])
-
-sect = try (string "\\S") >> return (Str [chr 167])
-
-escapedChar = do
-  result <- escaped (oneOf " $%&_#{}\n")
-  return $ if result == Str "\n" then Str " " else result
-
--- ignore standalone, nonescaped special characters
-unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "")
-
-specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
-
-backslash = try (string "\\textbackslash") >> return (Str "\\")
-
-tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-
-caret = try (string "\\^{}") >> return (Str "^")
-
-bar = try (string "\\textbar") >> return (Str "\\")
-
-lt = try (string "\\textless") >> return (Str "<")
-
-gt = try (string "\\textgreater") >> return (Str ">")
-
-doubleQuote = char '"' >> return (Str "\"")
-
-code = code1 <|> code2
-
-code1 = try $ do 
-  string "\\verb"
-  marker <- anyChar
-  result <- manyTill anyChar (char marker)
-  return $ Code $ removeLeadingTrailingSpace result
-
-code2 = try $ do
-  string "\\texttt{"
-  result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
-  return $ Code result
-
-emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
-             manyTill inline (char '}') >>= return . Emph
-
-strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
-                  return . Strikeout
-
-superscript = try $ string "\\textsuperscript{" >> 
-                    manyTill inline (char '}') >>= return . Superscript
-
--- note: \textsubscript isn't a standard latex command, but we use
--- a defined version in pandoc.
-subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
-                  return . Subscript
-
-apostrophe = char '\'' >> return Apostrophe
-
-quoted = doubleQuoted <|> singleQuoted
-
-singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
-               return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
-               return . Quoted DoubleQuote . normalizeSpaces
-
-singleQuoteStart = char '`'
-
-singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
-
-doubleQuoteStart = string "``"
-
-doubleQuoteEnd = try $ string "''"
-
-ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
-                 return Ellipses
-
-enDash = try (string "--") >> return EnDash
-
-emDash = try (string "---") >> return EmDash
-
-hyphen = char '-' >> return (Str "-")
-
-lab = try $ do
-  string "\\label{"
-  result <- manyTill anyChar (char '}')
-  return $ Str $ "(" ++ result ++ ")"
-
-ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
-
-strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
-         return . Strong
-
-whitespace = many1 (oneOf "~ \t") >> return Space
-
--- hard line break
-linebreak = try (string "\\\\") >> return LineBreak
-
-spacer = try (string "\\,") >> return (Str "")
-
-str = many1 (noneOf specialChars) >>= return . Str
-
--- endline internal to paragraph
-endline = try $ newline >> notFollowedBy blankline >> return Space
-
--- math
-math = math1 <|> math2 <?> "math"
-
-math1 = try $ do
-  char '$'
-  result <- many (noneOf "$")
-  char '$'
-  return $ TeX ("$" ++ result ++ "$")
-
-math2 = try $ do
-  string "\\("
-  result <- many (noneOf "$")
-  string "\\)"
-  return $ TeX ("$" ++ result ++ "$")
-
---
--- links and images
---
-
-url = try $ do
-  string "\\url"
-  url <- charsInBalanced '{' '}'
-  return $ Link [Code url] (url, "")
-
-link = try $ do
-  string "\\href{"
-  url <- manyTill anyChar (char '}')
-  char '{'
-  label <- manyTill inline (char '}') 
-  return $ Link (normalizeSpaces label) (url, "")
-
-image = try $ do
-  ("includegraphics", _, args) <- command
-  let args' = filter isArg args -- filter out options
-  let src = if null args' then
-              ("", "")
-            else
-              (stripFirstAndLast (head args'), "")
-  return $ Image [Str "image"] src
-
-footnote = try $ do
-  (name, _, (contents:[])) <- command
-  if ((name == "footnote") || (name == "thanks"))
-     then string ""
-     else fail "not a footnote or thanks command"
-  let contents' = stripFirstAndLast contents
-  -- parse the extracted block, which may contain various block elements:
-  rest <- getInput
-  setInput $ contents'
-  blocks <- parseBlocks
-  setInput rest
-  return $ Note blocks
-
--- | Parse any LaTeX command and return it in a raw TeX inline element.
-rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try $ do
-  (name, star, args) <- command
-  state <- getState
-  if ((name == "begin") || (name == "end") || (name == "item"))
-     then fail "not an inline command" 
-     else string ""
-  return $ TeX ("\\" ++ name ++ star ++ concat args)
-
diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs
deleted file mode 100644
index df84c0ac7..000000000
--- a/Text/Pandoc/Readers/Markdown.hs
+++ /dev/null
@@ -1,909 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.Markdown
-   Copyright   : Copyright (C) 2006-7 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Conversion of markdown-formatted plain text to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.Markdown ( 
-                                     readMarkdown 
-                                    ) where
-
-import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy )
-import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum )
-import Network.URI ( isURI )
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared 
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
-import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, 
-                                  anyHtmlInlineTag, anyHtmlTag,
-                                  anyHtmlEndTag, htmlEndTag, extractTagType,
-                                  htmlBlockElement )
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
-import Text.ParserCombinators.Parsec
-
--- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: ParserState -> String -> Pandoc
-readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
-
---
--- Constants and data structure definitions
---
-
-spaceChars = " \t"
-bulletListMarkers = "*+-"
-hruleChars = "*-_"
-setextHChars = "=-"
-
--- treat these as potentially non-text when parsing inline:
-specialChars = "\\[]*_~`<>$!^-.&'\""
-
---
--- auxiliary functions
---
-
-indentSpaces = try $ do
-  state <- getState
-  let tabStop = stateTabStop state
-  try (count tabStop (char ' ')) <|> 
-    (many (char ' ') >> string "\t") <?> "indentation"
-
-nonindentSpaces = do
-  state <- getState
-  let tabStop = stateTabStop state
-  sps <- many (char ' ')
-  if length sps < tabStop 
-     then return sps
-     else unexpected "indented line"
-
--- | Fail unless we're at beginning of a line.
-failUnlessBeginningOfLine = do
-  pos <- getPosition
-  if sourceColumn pos == 1 then return () else fail "not beginning of line"
-
--- | Fail unless we're in "smart typography" mode.
-failUnlessSmart = do
-  state <- getState
-  if stateSmart state then return () else fail "Smart typography feature"
-
--- | Parse an inline Str element with a given content.
-inlineString str = try $ do 
-  (Str res) <- inline 
-  if res == str then return res else fail $ "unexpected Str content"
-
--- | Parse a sequence of inline elements between a string
---  @opener@ and a string @closer@, including inlines
---  between balanced pairs of @opener@ and a @closer@.
-inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
-inlinesInBalanced opener closer = try $ do
-  string opener
-  result <- manyTill ( (do lookAhead (inlineString opener)
-                           -- because it might be a link...
-                           bal <- inlinesInBalanced opener closer 
-                           return $ [Str opener] ++ bal ++ [Str closer])
-                       <|> (count 1 inline)) 
-                     (try (string closer))
-  return $ concat result
-
---
--- document structure
---
-
-titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
-
-authorsLine = try $ do 
-  char '%'
-  skipSpaces
-  authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
-  newline
-  return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
-
-dateLine = try $ do
-  char '%'
-  skipSpaces
-  date <- many (noneOf "\n")
-  newline
-  return $ decodeCharacterReferences $ removeTrailingSpace date
-
-titleBlock = try $ do
-  failIfStrict
-  title <- option [] titleLine
-  author <- option [] authorsLine
-  date <- option "" dateLine
-  optional blanklines
-  return (title, author, date)
-
-parseMarkdown = do
-  -- markdown allows raw HTML
-  updateState (\state -> state { stateParseRaw = True })
-  startPos <- getPosition
-  -- go through once just to get list of reference keys
-  -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= 
-                  return . concat
-  setInput docMinusKeys
-  setPosition startPos
-  st <- getState
-  -- go through again for notes unless strict...
-  if stateStrict st
-     then return ()
-     else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= 
-                              return . concat
-             st <- getState
-             let reversedNotes = stateNotes st
-             updateState $ \st -> st { stateNotes = reverse reversedNotes }
-             setInput docMinusNotes
-             setPosition startPos
-  -- now parse it for real...
-  (title, author, date) <- option ([],[],"") titleBlock
-  blocks <- parseBlocks 
-  return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
-
--- 
--- initial pass for references and notes
---
-
-referenceKey = try $ do
-  startPos <- getPosition
-  nonindentSpaces
-  label <- reference
-  char ':'
-  skipSpaces
-  optional (char '<')
-  src <- many (noneOf "> \n\t")
-  optional (char '>')
-  tit <- option "" referenceTitle
-  blanklines
-  endPos <- getPosition
-  let newkey = (label, (removeTrailingSpace src,  tit))
-  st <- getState
-  let oldkeys = stateKeys st
-  updateState $ \st -> st { stateKeys = newkey : oldkeys }
-  -- return blanks so line count isn't affected
-  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-referenceTitle = try $ do 
-  (many1 spaceChar >> option '\n' newline) <|> newline
-  skipSpaces
-  tit <-    (charsInBalanced '(' ')' >>= return . unwords . words)
-        <|> do delim <- char '\'' <|> char '"'
-               manyTill anyChar (try (char delim >> skipSpaces >>
-                                      notFollowedBy (noneOf ")\n")))
-  return $ decodeCharacterReferences tit
-
-noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
-
-rawLine = do
-  notFollowedBy blankline
-  notFollowedBy' noteMarker
-  contents <- many1 nonEndline
-  end <- option "" (newline >> optional indentSpaces >> return "\n") 
-  return $ contents ++ end
-
-rawLines = many1 rawLine >>= return . concat
-
-noteBlock = try $ do
-  startPos <- getPosition
-  ref <- noteMarker
-  char ':'
-  optional blankline
-  optional indentSpaces
-  raw <- sepBy rawLines (try (blankline >> indentSpaces))
-  optional blanklines
-  endPos <- getPosition
-  -- parse the extracted text, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
-  let newnote = (ref, contents)
-  st <- getState
-  let oldnotes = stateNotes st
-  updateState $ \st -> st { stateNotes = newnote : oldnotes }
-  -- return blanks so line count isn't affected
-  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
---
--- parsing blocks
---
-
-parseBlocks = manyTill block eof
-
-block = choice [ header 
-               , table
-               , codeBlock
-               , hrule
-               , list
-               , blockQuote
-               , htmlBlock
-               , rawLaTeXEnvironment'
-               , para
-               , plain
-               , nullBlock ] <?> "block"
-
---
--- header blocks
---
-
-header = atxHeader <|> setextHeader <?> "header"
-
-atxHeader = try $ do
-  level <- many1 (char '#') >>= return . length
-  notFollowedBy (char '.' <|> char ')') -- this would be a list
-  skipSpaces
-  text <- manyTill inline atxClosing >>= return . normalizeSpaces
-  return $ Header level text
-
-atxClosing = try $ skipMany (char '#') >> blanklines
-
-setextHeader = try $ do
-  -- first, see if this block has any chance of being a setextHeader:
-  lookAhead (anyLine >> oneOf setextHChars)
-  text <- many1Till inline newline >>= return . normalizeSpaces
-  level <- choice $ zipWith 
-           (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev)
-           setextHChars [1..(length setextHChars)]
-  return $ Header level text
-
---
--- hrule block
---
-
-hrule = try $ do
-  skipSpaces
-  start <- oneOf hruleChars
-  count 2 (skipSpaces >> char start)
-  skipMany (skipSpaces >> char start)
-  newline
-  optional blanklines
-  return HorizontalRule
-
---
--- code blocks
---
-
-indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
-
-codeBlock = do
-  contents <- many1 (indentedLine <|> 
-                     try (do b <- blanklines
-                             l <- indentedLine
-                             return $ b ++ l))
-  optional blanklines
-  return $ CodeBlock $ stripTrailingNewlines $ concat contents
-
---
--- block quotes
---
-
-emacsBoxQuote = try $ do
-  failIfStrict
-  string ",----"
-  manyTill anyChar newline
-  raw <- manyTill 
-         (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
-         (try (string "`----"))
-  blanklines
-  return raw
-
-emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
-
-emailBlockQuote = try $ do
-  emailBlockQuoteStart
-  raw <- sepBy (many (nonEndline <|> 
-                      (try (endline >> notFollowedBy emailBlockQuoteStart >>
-                       return '\n'))))
-               (try (newline >> emailBlockQuoteStart))
-  newline <|> (eof >> return '\n')
-  optional blanklines
-  return raw
-
-blockQuote = do 
-  raw <- emailBlockQuote <|> emacsBoxQuote
-  -- parse the extracted block, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
-  return $ BlockQuote contents
- 
---
--- list blocks
---
-
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-bulletListStart = try $ do
-  optional newline -- if preceded by a Plain block in a list context
-  nonindentSpaces
-  notFollowedBy' hrule     -- because hrules start out just like lists
-  oneOf bulletListMarkers
-  spaceChar
-  skipSpaces
-
-anyOrderedListStart = try $ do
-  optional newline -- if preceded by a Plain block in a list context
-  nonindentSpaces
-  notFollowedBy $ string "p." >> spaceChar >> digit  -- page number
-  state <- getState
-  if stateStrict state
-     then do many1 digit
-             char '.'
-             spaceChar
-             return (1, DefaultStyle, DefaultDelim)
-     else anyOrderedListMarker >>~ spaceChar
-
-orderedListStart style delim = try $ do
-  optional newline -- if preceded by a Plain block in a list context
-  nonindentSpaces
-  state <- getState
-  num <- if stateStrict state
-            then do many1 digit
-                    char '.'
-                    return 1
-            else orderedListMarker style delim 
-  if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
-     num `elem` [1, 5, 10, 50, 100, 500, 1000]))
-     then char '\t' <|> (spaceChar >> spaceChar)
-     else spaceChar
-  skipSpaces
-
--- parse a line of a list item (start = parser for beginning of list item)
-listLine start = try $ do
-  notFollowedBy' start
-  notFollowedBy blankline
-  notFollowedBy' (do indentSpaces
-                     many (spaceChar)
-                     bulletListStart <|> (anyOrderedListStart >> return ()))
-  line <- manyTill anyChar newline
-  return $ line ++ "\n"
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem start = try $ do
-  start
-  result <- many1 (listLine start)
-  blanks <- many blankline
-  return $ concat result ++ blanks
-
--- continuation of a list item - indented and separated by blankline 
--- or (in compact lists) endline.
--- note: nested lists are parsed as continuations
-listContinuation start = try $ do
-  lookAhead indentSpaces
-  result <- many1 (listContinuationLine start)
-  blanks <- many blankline
-  return $ concat result ++ blanks
-
-listContinuationLine start = try $ do
-  notFollowedBy blankline
-  notFollowedBy' start
-  optional indentSpaces
-  result <- manyTill anyChar newline
-  return $ result ++ "\n"
-
-listItem start = try $ do 
-  first <- rawListItem start
-  continuations <- many (listContinuation start)
-  -- parsing with ListItemState forces markers at beginning of lines to
-  -- count as list item markers, even if not separated by blank space.
-  -- see definition of "endline"
-  state <- getState
-  let oldContext = stateParserContext state
-  setState $ state {stateParserContext = ListItemState}
-  -- parse the extracted block, which may contain various block elements:
-  let raw = concat (first:continuations)
-  contents <- parseFromString parseBlocks raw
-  updateState (\st -> st {stateParserContext = oldContext})
-  return contents
-
-orderedList = try $ do
-  (start, style, delim) <- lookAhead anyOrderedListStart
-  items <- many1 (listItem (orderedListStart style delim))
-  return $ OrderedList (start, style, delim) $ compactify items
-
-bulletList = many1 (listItem bulletListStart) >>= 
-             return . BulletList . compactify
-
--- definition lists
-
-definitionListItem = try $ do
-  notFollowedBy blankline
-  notFollowedBy' indentSpaces
-  -- first, see if this has any chance of being a definition list:
-  lookAhead (anyLine >> char ':')
-  term <- manyTill inline newline
-  raw <- many1 defRawBlock
-  state <- getState
-  let oldContext = stateParserContext state
-  -- parse the extracted block, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ concat raw
-  updateState (\st -> st {stateParserContext = oldContext})
-  return ((normalizeSpaces term), contents)
-
-defRawBlock = try $ do
-  char ':'
-  state <- getState
-  let tabStop = stateTabStop state
-  try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
-  firstline <- anyLine
-  rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
-  trailing <- option "" blanklines
-  return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
-
-definitionList = do
-  failIfStrict
-  items <- many1 definitionListItem
-  let (terms, defs) = unzip items
-  let defs' = compactify defs
-  let items' = zip terms defs'
-  return $ DefinitionList items'
-
---
--- paragraph block
---
-
-para = try $ do 
-  result <- many1 inline
-  newline
-  blanklines <|> do st <- getState
-                    if stateStrict st
-                       then lookAhead (blockQuote <|> header) >> return ""
-                       else lookAhead emacsBoxQuote >> return ""
-  return $ Para $ normalizeSpaces result
-
-plain = many1 inline >>= return . Plain . normalizeSpaces 
-
--- 
--- raw html
---
-
-htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
-
-htmlBlock = do
-  st <- getState
-  if stateStrict st
-    then try $ do failUnlessBeginningOfLine
-                  first <- htmlElement
-                  finalSpace <- many (oneOf spaceChars)
-                  finalNewlines <- many newline
-                  return $ RawHtml $ first ++ finalSpace ++ finalNewlines
-    else rawHtmlBlocks
-
--- True if tag is self-closing
-isSelfClosing tag = 
-  isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
-
-strictHtmlBlock = try $ do
-  tag <- anyHtmlBlockTag 
-  let tag' = extractTagType tag
-  if isSelfClosing tag || tag' == "hr" 
-     then return tag
-     else do contents <- many (notFollowedBy' (htmlEndTag tag') >> 
-                               (htmlElement <|> (count 1 anyChar)))
-             end <- htmlEndTag tag'
-             return $ tag ++ concat contents ++ end
-
-rawHtmlBlocks = do
-  htmlBlocks <- many1 rawHtmlBlock    
-  let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
-  let combined' = if not (null combined) && last combined == '\n'
-                     then init combined  -- strip extra newline 
-                     else combined 
-  return $ RawHtml combined'
-
---
--- LaTeX
---
-
-rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
-
---
--- Tables
--- 
-
--- Parse a dashed line with optional trailing spaces; return its length
--- and the length including trailing space.
-dashedLine ch = do
-  dashes <- many1 (char ch)
-  sp     <- many spaceChar
-  return $ (length dashes, length $ dashes ++ sp)
-
--- Parse a table header with dashed lines of '-' preceded by 
--- one line of text.
-simpleTableHeader = try $ do
-  rawContent  <- anyLine
-  initSp      <- nonindentSpaces
-  dashes      <- many1 (dashedLine '-')
-  newline
-  let (lengths, lines) = unzip dashes
-  let indices  = scanl (+) (length initSp) lines
-  let rawHeads = tail $ splitByIndices (init indices) rawContent
-  let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths
-  return (rawHeads, aligns, indices)
-
--- Parse a table footer - dashed lines followed by blank line.
-tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
-
--- Parse a table separator - dashed line.
-tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
-
--- Parse a raw line and split it into chunks by indices.
-rawTableLine indices = do
-  notFollowedBy' (blanklines <|> tableFooter)
-  line <- many1Till anyChar newline
-  return $ map removeLeadingTrailingSpace $ tail $ 
-           splitByIndices (init indices) line
-
--- Parse a table line and return a list of lists of blocks (columns).
-tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
-
--- Parse a multiline table row and return a list of blocks (columns).
-multilineRow indices = do
-  colLines <- many1 (rawTableLine indices)
-  optional blanklines
-  let cols = map unlines $ transpose colLines
-  mapM (parseFromString (many plain)) cols
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int     -- Number of columns on terminal
-                  -> [Int]   -- Indices
-                  -> [Float] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []  
-widthsFromIndices numColumns indices = 
-  let lengths = zipWith (-) indices (0:indices)
-      totLength = sum lengths
-      quotient = if totLength > numColumns
-                   then fromIntegral totLength
-                   else fromIntegral numColumns
-      fracs = map (\l -> (fromIntegral l) / quotient) lengths in
-  tail fracs
-
--- Parses a table caption:  inlines beginning with 'Table:'
--- and followed by blank lines.
-tableCaption = try $ do
-  nonindentSpaces
-  string "Table:"
-  result <- many1 inline
-  blanklines
-  return $ normalizeSpaces result
-
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith headerParser lineParser footerParser = try $ do
-    (rawHeads, aligns, indices) <- headerParser
-    lines <- many1Till (lineParser indices) footerParser
-    caption <- option [] tableCaption
-    heads <- mapM (parseFromString (many plain)) rawHeads
-    state <- getState
-    let numColumns = stateColumns state
-    let widths = widthsFromIndices numColumns indices
-    return $ Table caption aligns widths heads lines
-
--- Parse a simple table with '---' header and one line per row.
-simpleTable = tableWith simpleTableHeader tableLine blanklines
-
--- Parse a multiline table:  starts with row of '-' on top, then header
--- (which may be multiline), then the rows,
--- which may be multiline, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
-multilineTable = tableWith multilineTableHeader multilineRow tableFooter
-
-multilineTableHeader = try $ do
-  tableSep 
-  rawContent  <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
-  initSp      <- nonindentSpaces
-  dashes      <- many1 (dashedLine '-')
-  newline
-  let (lengths, lines) = unzip dashes
-  let indices  = scanl (+) (length initSp) lines
-  let rawHeadsList = transpose $ map 
-                     (\ln -> tail $ splitByIndices (init indices) ln)
-                     rawContent
-  let rawHeads = map (joinWithSep " ") rawHeadsList
-  let aligns   = zipWith alignType rawHeadsList lengths
-  return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
-
--- Returns an alignment type for a table, based on a list of strings
--- (the rows of the column header) and a number (the length of the
--- dashed line under the rows.
-alignType :: [String] -> Int -> Alignment
-alignType []  len = AlignDefault
-alignType strLst len =
-  let str        = head $ sortBy (comparing length) $ 
-                          map removeTrailingSpace strLst
-      leftSpace  = if null str then False else (str !! 0) `elem` " \t"
-      rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
-  in  case (leftSpace, rightSpace) of
-        (True,  False)   -> AlignRight
-        (False, True)    -> AlignLeft
-        (True,  True)    -> AlignCenter
-        (False, False)   -> AlignDefault
-
-table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
-
--- 
--- inline
---
-
-inline = choice [ str
-                , smartPunctuation
-                , whitespace
-                , endline
-                , code
-                , charRef
-                , strong
-                , emph
-                , note
-                , inlineNote
-                , link
-                , image
-                , math
-                , strikeout
-                , superscript
-                , subscript
-                , autoLink
-                , rawHtmlInline'
-                , rawLaTeXInline'
-                , escapedChar
-                , symbol
-                , ltSign ] <?> "inline"
-
-escapedChar = do
-  char '\\'
-  state <- getState
-  result <- option '\\' $ if stateStrict state 
-                             then oneOf "\\`*_{}[]()>#+-.!~"
-                             else satisfy (not . isAlphaNum)
-  return $ Str [result]
-
-ltSign = do
-  st <- getState
-  if stateStrict st
-     then char '<'
-     else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
-  return $ Str ['<']
-
-specialCharsMinusLt = filter (/= '<') specialChars
-
-symbol = do 
-  result <- oneOf specialCharsMinusLt
-  return $ Str [result]
-
--- parses inline code, between n `s and n `s
-code = try $ do 
-  starts <- many1 (char '`')
-  skipSpaces
-  result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
-                       (char '\n' >> return " ")) 
-                      (try (skipSpaces >> count (length starts) (char '`') >> 
-                      notFollowedBy (char '`')))
-  return $ Code $ removeLeadingTrailingSpace $ concat result
-
-mathWord = many1 ((noneOf " \t\n\\$") <|>
-                  (try (char '\\') >>~ notFollowedBy (char '$')))
-
-math = try $ do
-  failIfStrict
-  char '$'
-  notFollowedBy space
-  words <- sepBy1 mathWord (many1 space)
-  char '$'
-  return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
-
-emph = ((enclosed (char '*') (char '*') inline) <|>
-        (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= 
-        return . Emph . normalizeSpaces
-
-strong = ((enclosed (string "**") (try $ string "**") inline) <|> 
-          (enclosed (string "__") (try $ string "__") inline)) >>=
-         return . Strong . normalizeSpaces
-
-strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
-            return . Strikeout . normalizeSpaces
-
-superscript = failIfStrict >> enclosed (char '^') (char '^') 
-              (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
-              return . Superscript
-
-subscript = failIfStrict >> enclosed (char '~') (char '~')
-            (notFollowedBy' whitespace >> inline) >>=  -- may not contain Space
-            return . Subscript 
-
-smartPunctuation = failUnlessSmart >> 
-                   choice [ quoted, apostrophe, dash, ellipses ]
-
-apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-
-quoted = doubleQuoted <|> singleQuoted 
-
-withQuoteContext context parser = do
-  oldState <- getState
-  let oldQuoteContext = stateQuoteContext oldState
-  setState oldState { stateQuoteContext = context }
-  result <- parser
-  newState <- getState
-  setState newState { stateQuoteContext = oldQuoteContext }
-  return result
-
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
-    return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted = try $ do 
-  doubleQuoteStart
-  withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
-    return . Quoted DoubleQuote . normalizeSpaces
-
-failIfInQuoteContext context = do
-  st <- getState
-  if stateQuoteContext st == context
-     then fail "already inside quotes"
-     else return ()
-
-singleQuoteStart = do 
-  failIfInQuoteContext InSingleQuote
-  char '\8216' <|> 
-     do char '\''  
-        notFollowedBy (oneOf ")!],.;:-? \t\n")
-        notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
-                            satisfy (not . isAlphaNum))) -- possess/contraction
-        return '\''
-
-singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
-
-doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
-                   (char '"' <|> char '\8220') >>
-                   notFollowedBy (oneOf " \t\n")
-
-doubleQuoteEnd = char '"' <|> char '\8221'
-
-ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
-
-dash = enDash <|> emDash
-
-enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
-
-emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
-               skipSpaces >> return EmDash
-
-whitespace = do
-  sps <- many1 (oneOf spaceChars)
-  if length sps >= 2
-     then option Space (endline >> return LineBreak)
-     else return Space <?> "whitespace"
-
-nonEndline = satisfy (/='\n')
-
-strChar = noneOf (specialChars ++ spaceChars ++ "\n")
-
-str = many1 strChar >>= return . Str
-
--- an endline character that can be treated as a space, not a structural break
-endline = try $ do
-  newline
-  notFollowedBy blankline
-  st <- getState
-  if stateStrict st 
-    then do notFollowedBy emailBlockQuoteStart
-            notFollowedBy (char '#')  -- atx header
-    else return () 
-  -- parse potential list-starts differently if in a list:
-  if stateParserContext st == ListItemState
-     then notFollowedBy' (bulletListStart <|> 
-                          (anyOrderedListStart >> return ()))
-     else return ()
-  return Space
-
---
--- links
---
-
--- a reference label for a link
-reference = notFollowedBy' (string "[^") >>  -- footnote reference
-            inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
-
--- source for a link, with optional title
-source = try $ do 
-  char '('
-  optional (char '<')
-  src <- many (noneOf ")> \t\n")
-  optional (char '>')
-  tit <- option "" linkTitle
-  skipSpaces
-  char ')'
-  return (removeTrailingSpace src, tit)
-
-linkTitle = try $ do 
-  (many1 spaceChar >> option '\n' newline) <|> newline
-  skipSpaces
-  delim <- char '\'' <|> char '"'
-  tit <-   manyTill anyChar (try (char delim >> skipSpaces >>
-                                  notFollowedBy (noneOf ")\n")))
-  return $ decodeCharacterReferences tit
-
-link = try $ do
-  label <- reference
-  src <- source <|> referenceLink label
-  return $ Link label src
-
--- a link like [this][ref] or [this][] or [this]
-referenceLink label = do
-  ref <- option [] (try (optional (char ' ') >> 
-                         optional (newline >> skipSpaces) >> reference))
-  let ref' = if null ref then label else ref
-  state <- getState
-  case lookupKeySrc (stateKeys state) ref' of
-     Nothing     -> fail "no corresponding key" 
-     Just target -> return target 
-
-emailAddress = try $ do
-  name <- many1 (alphaNum <|> char '+')
-  char '@'
-  first <- many1 alphaNum
-  rest <- many1 (char '.' >> many1 alphaNum)
-  return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
-
-uri = try $ do
-  str <- many1 (noneOf "\n\t >")
-  if isURI str
-     then return str
-     else fail "not a URI"
-
-autoLink = try $ do
-  char '<'
-  src <- uri <|> emailAddress
-  char '>'
-  let src' = if "mailto:" `isPrefixOf` src
-                then drop 7 src
-                else src 
-  st <- getState
-  return $ if stateStrict st
-              then Link [Str src'] (src, "")
-              else Link [Code src'] (src, "")
-
-image = try $ do
-  char '!'
-  (Link label src) <- link
-  return $ Image label src
-
-note = try $ do
-  failIfStrict
-  ref <- noteMarker
-  state <- getState
-  let notes = stateNotes state
-  case lookup ref notes of
-    Nothing       -> fail "note not found"
-    Just contents -> return $ Note contents
-
-inlineNote = try $ do
-  failIfStrict
-  char '^'
-  contents <- inlinesInBalanced "[" "]"
-  return $ Note [Para contents]
-
-rawLaTeXInline' = failIfStrict >> rawLaTeXInline
-
-rawHtmlInline' = do
-  st <- getState
-  result <- choice $ if stateStrict st
-                        then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] 
-                        else [htmlBlockElement, anyHtmlInlineTag]
-  return $ HtmlInline result
-
diff --git a/Text/Pandoc/Readers/RST.hs b/Text/Pandoc/Readers/RST.hs
deleted file mode 100644
index 1239eb688..000000000
--- a/Text/Pandoc/Readers/RST.hs
+++ /dev/null
@@ -1,640 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.RST 
-   Copyright   : Copyright (C) 2006-7 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Conversion from reStructuredText to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.RST ( 
-                                readRST
-                               ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared 
-import Text.ParserCombinators.Parsec
-import Data.List ( findIndex, delete )
-
--- | Parse reStructuredText string and return Pandoc document.
-readRST :: ParserState -> String -> Pandoc
-readRST state str = (readWith parseRST) state (str ++ "\n\n")
-
---
--- Constants and data structure definitions
----
-
-bulletListMarkers = "*+-"
-underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
-
--- treat these as potentially non-text when parsing inline:
-specialChars = "\\`|*_<>$:[-"
-
---
--- parsing documents
---
-
-isAnonKey (ref, src) = ref == [Str "_"]
-
-isHeader :: Int -> Block -> Bool
-isHeader n (Header x _) = x == n
-isHeader _ _ = False
-
--- | Promote all headers in a list of blocks.  (Part of
--- title transformation for RST.)
-promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level text):rest) = 
-    (Header (level - num) text):(promoteHeaders num rest)
-promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
-promoteHeaders num [] = []
-
--- | If list of blocks starts with a header (or a header and subheader)
--- of level that are not found elsewhere, return it as a title and
--- promote all the other headers. 
-titleTransform :: [Block]              -- ^ list of blocks
-               -> ([Block], [Inline])  -- ^ modified list of blocks, title
-titleTransform ((Header 1 head1):(Header 2 head2):rest) =  -- title subtitle
-    if (any (isHeader 1) rest) || (any (isHeader 2) rest)
-       then ((Header 1 head1):(Header 2 head2):rest, [])
-       else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
-titleTransform ((Header 1 head1):rest) =       -- title, no subtitle
-    if (any (isHeader 1) rest)
-       then ((Header 1 head1):rest, [])
-       else ((promoteHeaders 1 rest), head1)
-titleTransform blocks = (blocks, [])
-
-parseRST = do
-  startPos <- getPosition
-  -- go through once just to get list of reference keys
-  -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
-  setInput docMinusKeys
-  setPosition startPos
-  st <- getState
-  let reversedKeys = stateKeys st
-  updateState $ \st -> st { stateKeys = reverse reversedKeys }
-  -- now parse it for real...
-  blocks <- parseBlocks 
-  let blocks' = filter (/= Null) blocks
-  state <- getState
-  let (blocks'', title) = if stateStandalone state
-                              then titleTransform blocks'
-                              else (blocks', [])
-  let authors = stateAuthors state
-  let date = stateDate state
-  let title' = if (null title) then (stateTitle state) else title
-  return $ Pandoc (Meta title' authors date) blocks''
-
---
--- parsing blocks
---
-
-parseBlocks = manyTill block eof
-
-block = choice [ codeBlock
-               , rawHtmlBlock
-               , rawLaTeXBlock
-               , fieldList
-               , blockQuote
-               , imageBlock
-               , unknownDirective
-               , header
-               , hrule
-               , list
-               , lineBlock
-               , para
-               , plain
-               , nullBlock ] <?> "block"
-
---
--- field list
---
-
-fieldListItem indent = try $ do
-  string indent
-  char ':'
-  name <- many1 alphaNum
-  string ": "
-  skipSpaces
-  first <- manyTill anyChar newline
-  rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> 
-                            indentedBlock
-  return (name, joinWithSep " " (first:(lines rest)))
-
-fieldList = try $ do
-  indent <- lookAhead $ many (oneOf " \t")
-  items <- many1 $ fieldListItem indent
-  blanklines
-  let authors = case lookup "Authors" items of
-                  Just auth -> [auth]
-                  Nothing  -> map snd (filter (\(x,y) -> x == "Author") items)
-  if null authors 
-     then return () 
-     else updateState $ \st -> st {stateAuthors = authors}
-  case (lookup "Date" items) of
-           Just dat -> updateState $ \st -> st {stateDate = dat}
-           Nothing  -> return ()
-  case (lookup "Title" items) of
-           Just tit -> parseFromString (many inline) tit >>=
-                       \t -> updateState $ \st -> st {stateTitle = t}
-           Nothing  -> return ()
-  let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && 
-                  (x /= "Date") && (x /= "Title")) items
-  if null remaining
-              then return Null
-              else do terms <- mapM (return . (:[]) . Str . fst) remaining
-                      defs  <- mapM (parseFromString (many block) . snd) 
-                                    remaining
-                      return $ DefinitionList $ zip terms defs
-
---
--- line block
---
-
-lineBlockLine = try $ do
-  string "| "
-  white <- many (oneOf " \t")
-  line <- manyTill inline newline
-  return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
-
-lineBlock = try $ do
-  lines <- many1 lineBlockLine
-  blanklines
-  return $ Para (concat lines)
-
---
--- paragraph block
---
-
-para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-
-codeBlockStart = string "::" >> blankline >> blankline
-
--- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock = try $ do
-  result <- many1 (notFollowedBy' codeBlockStart >> inline)
-  lookAhead (string "::")
-  return $ Para $ if last result == Space
-                     then normalizeSpaces result
-                     else (normalizeSpaces result) ++ [Str ":"]
-
--- regular paragraph
-paraNormal = try $ do 
-  result <- many1 inline
-  newline
-  blanklines
-  return $ Para $ normalizeSpaces result
-
-plain = many1 inline >>= return . Plain . normalizeSpaces 
-
---
--- image block
---
-
-imageBlock = try $ do
-  string ".. image:: "
-  src <- manyTill anyChar newline
-  fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
-                           many1 $ fieldListItem indent
-  optional blanklines
-  case lookup "alt" fields of
-        Just alt -> return $ Plain [Image [Str alt] (src, alt)]
-        Nothing  -> return $ Plain [Image [Str "image"] (src, "")]
---
--- header blocks
---
-
-header = doubleHeader <|> singleHeader <?> "header"
-
--- a header with lines on top and bottom
-doubleHeader = try $ do
-  c <- oneOf underlineChars
-  rest <- many (char c)  -- the top line
-  let lenTop = length (c:rest)
-  skipSpaces
-  newline
-  txt <- many1 (notFollowedBy blankline >> inline)
-  pos <- getPosition
-  let len = (sourceColumn pos) - 1
-  if (len > lenTop) then fail "title longer than border" else return ()
-  blankline              -- spaces and newline
-  count lenTop (char c)  -- the bottom line
-  blanklines
-  -- check to see if we've had this kind of header before.  
-  -- if so, get appropriate level.  if not, add to list.
-  state <- getState
-  let headerTable = stateHeaderTable state
-  let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
-        Just ind -> (headerTable, ind + 1)
-        Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
-  setState (state { stateHeaderTable = headerTable' })
-  return $ Header level (normalizeSpaces txt)
-
--- a header with line on the bottom only
-singleHeader = try $ do 
-  notFollowedBy' whitespace
-  txt <- many1 (do {notFollowedBy blankline; inline})
-  pos <- getPosition
-  let len = (sourceColumn pos) - 1
-  blankline
-  c <- oneOf underlineChars
-  rest <- count (len - 1) (char c)
-  many (char c)
-  blanklines
-  state <- getState
-  let headerTable = stateHeaderTable state
-  let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
-        Just ind -> (headerTable, ind + 1)
-        Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
-  setState (state { stateHeaderTable = headerTable' })
-  return $ Header level (normalizeSpaces txt)
-
---
--- hrule block
---
-
-hrule = try $ do
-  chr <- oneOf underlineChars
-  count 3 (char chr)
-  skipMany (char chr)
-  blankline
-  blanklines
-  return HorizontalRule
-
---
--- code blocks
---
-
--- read a line indented by a given string
-indentedLine indents = try $ do
-  string indents
-  result <- manyTill anyChar newline
-  return $ result ++ "\n"
-
--- two or more indented lines, possibly separated by blank lines.
--- any amount of indentation will work.
-indentedBlock = do 
-  indents <- lookAhead $ many1 (oneOf " \t")
-  lns <- many $ choice $ [ indentedLine indents,
-                           try $ do b <- blanklines
-                                    l <- indentedLine indents
-                                    return (b ++ l) ]
-  optional blanklines 
-  return $ concat lns
-
-codeBlock = try $ do
-  codeBlockStart
-  result <- indentedBlock
-  return $ CodeBlock $ stripTrailingNewlines result
-
---
--- raw html
---
-
-rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
-                     indentedBlock >>= return . RawHtml
-
---
--- raw latex
---
-
-rawLaTeXBlock = try $ do
-  string ".. raw:: latex"
-  blanklines
-  result <- indentedBlock
-  return $ Para [(TeX result)]
-
---
--- block quotes
---
-
-blockQuote = do
-  raw <- indentedBlock
-  -- parse the extracted block, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ raw ++ "\n\n"
-  return $ BlockQuote contents
-
---
--- list blocks
---
-
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-definitionListItem = try $ do
-  term <- many1Till inline endline
-  raw <- indentedBlock
-  -- parse the extracted block, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ raw ++ "\n\n"
-  return (normalizeSpaces term, contents)
-
-definitionList = many1 definitionListItem >>= return . DefinitionList
-
--- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart = try $ do
-  notFollowedBy' hrule  -- because hrules start out just like lists
-  marker <- oneOf bulletListMarkers
-  white <- many1 spaceChar
-  return $ length (marker:white)
-
--- parses ordered list start and returns its length (inc following whitespace)
-orderedListStart style delim = try $ do
-  (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
-  white <- many1 spaceChar
-  return $ markerLen + length white
-
--- parse a line of a list item
-listLine markerLength = try $ do
-  notFollowedBy blankline
-  indentWith markerLength
-  line <- manyTill anyChar newline
-  return $ line ++ "\n"
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith num = do
-  state <- getState
-  let tabStop = stateTabStop state
-  if (num < tabStop)
-     then count num  (char ' ')
-     else choice [ try (count num (char ' ')), 
-                   (try (char '\t' >> count (num - tabStop) (char ' '))) ] 
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem start = do
-  markerLength <- start
-  firstLine <- manyTill anyChar newline
-  restLines <- many (listLine markerLength)
-  return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
-
--- continuation of a list item - indented and separated by blankline or 
--- (in compact lists) endline.  
--- Note: nested lists are parsed as continuations.
-listContinuation markerLength = try $ do
-  blanks <- many1 blankline
-  result <- many1 (listLine markerLength)
-  return $ blanks ++ concat result
-
-listItem start = try $ do 
-  (markerLength, first) <- rawListItem start
-  rest <- many (listContinuation markerLength)
-  blanks <- choice [ try (many blankline >>~ lookAhead start),
-                     many1 blankline ]  -- whole list must end with blank.
-  -- parsing with ListItemState forces markers at beginning of lines to
-  -- count as list item markers, even if not separated by blank space.
-  -- see definition of "endline"
-  state <- getState
-  let oldContext = stateParserContext state
-  setState $ state {stateParserContext = ListItemState}
-  -- parse the extracted block, which may itself contain block elements
-  parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
-  updateState (\st -> st {stateParserContext = oldContext})
-  return parsed
-
-orderedList = do
-  (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
-  items <- many1 (listItem (orderedListStart style delim))
-  let items' = compactify items
-  return $ OrderedList (start, style, delim) items'
-
-bulletList = many1 (listItem bulletListStart) >>= 
-             return . BulletList . compactify
-
---
--- unknown directive (e.g. comment)
---
-
-unknownDirective = try $ do
-  string ".. "
-  manyTill anyChar newline
-  many (string "   :" >> many1 (noneOf "\n:") >> char ':' >>
-        many1 (noneOf "\n") >> newline)
-  optional blanklines
-  return Null
-
--- 
--- reference key
---
-
-referenceKey = do
-  startPos <- getPosition
-  key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
-  st <- getState
-  let oldkeys = stateKeys st
-  updateState $ \st -> st { stateKeys = key : oldkeys }
-  optional blanklines
-  endPos <- getPosition
-  -- return enough blanks to replace key
-  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-targetURI = do
-  skipSpaces
-  optional newline
-  contents <- many1 (try (many spaceChar >> newline >> 
-                          many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
-  blanklines
-  return contents
-
-imageKey = try $ do
-  string ".. |"
-  ref <- manyTill inline (char '|')
-  skipSpaces
-  string "image::"
-  src <- targetURI
-  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
-anonymousKey = try $ do
-  oneOfStrings [".. __:", "__"]
-  src <- targetURI
-  state <- getState
-  return ([Str "_"], (removeLeadingTrailingSpace src, ""))
-
-regularKeyQuoted = try $ do
-  string ".. _`"
-  ref <- manyTill inline (char '`')
-  char ':'
-  src <- targetURI
-  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
-regularKey = try $ do
-  string ".. _"
-  ref <- manyTill inline (char ':')
-  src <- targetURI
-  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
- -- 
- -- inline
- --
-
-inline = choice [ link
-                , str
-                , whitespace
-                , endline
-                , strong
-                , emph
-                , code
-                , image
-                , hyphens
-                , superscript
-                , subscript
-                , escapedChar
-                , symbol ] <?> "inline"
-
-hyphens = do
-  result <- many1 (char '-')
-  option Space endline 
-  -- don't want to treat endline after hyphen or dash as a space
-  return $ Str result
-
-escapedChar = escaped anyChar
-
-symbol = do 
-  result <- oneOf specialChars
-  return $ Str [result]
-
--- parses inline code, between codeStart and codeEnd
-code = try $ do 
-  string "``"
-  result <- manyTill anyChar (try (string "``"))
-  return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-
-emph = enclosed (char '*') (char '*') inline >>= 
-       return . Emph . normalizeSpaces
-
-strong = enclosed (string "**") (try $ string "**") inline >>= 
-         return . Strong . normalizeSpaces
-
-interpreted role = try $ do
-  optional $ try $ string "\\ "
-  result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
-  nextChar <- lookAhead anyChar
-  try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
-  return [Str result]
-
-superscript = interpreted "sup" >>= (return . Superscript)
-
-subscript = interpreted "sub" >>= (return . Subscript)
-
-whitespace = many1 spaceChar >> return Space <?> "whitespace"
-
-str = notFollowedBy' oneWordReference >> 
-      many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
-
--- an endline character that can be treated as a space, not a structural break
-endline = try $ do
-  newline
-  notFollowedBy blankline
-  -- parse potential list-starts at beginning of line differently in a list:
-  st <- getState
-  if (stateParserContext st) == ListItemState
-     then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
-          notFollowedBy' bulletListStart
-     else return ()
-  return Space
-
---
--- links
---
-
-link = choice [explicitLink, referenceLink, autoLink]  <?> "link"
-
-explicitLink = try $ do
-  char '`'
-  notFollowedBy (char '`') -- `` is marks start of inline code
-  label <- manyTill inline (try (do {spaces; char '<'}))
-  src <- manyTill (noneOf ">\n ") (char '>')
-  skipSpaces
-  string "`_"
-  return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
-
-reference = try $ do
-  char '`'
-  notFollowedBy (char '`')
-  label <- many1Till inline (char '`') 
-  char '_'
-  return label
-
-oneWordReference = do
-  raw <- many1 alphaNum
-  char '_'
-  notFollowedBy alphaNum    -- because this_is_not a link
-  return [Str raw]
-
-referenceLink = try $ do
-  label <- reference <|> oneWordReference
-  key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
-  state <- getState
-  let keyTable = stateKeys state
-  src <- case lookupKeySrc keyTable key of
-           Nothing     -> fail "no corresponding key"
-           Just target -> return target
-  -- if anonymous link, remove first anon key so it won't be used again
-  let keyTable' = if (key == [Str "_"]) -- anonymous link? 
-                    then delete ([Str "_"], src) keyTable -- remove first anon key 
-                    else keyTable                    
-  setState $ state { stateKeys = keyTable' }
-  return $ Link (normalizeSpaces label) src 
-
-uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", 
-                           "mailto:", "news:", "telnet:" ]
-
-uri = try $ do
-  scheme <- uriScheme
-  identifier <- many1 (noneOf " \t\n")
-  return $ scheme ++ identifier
-
-autoURI = do
-  src <- uri
-  return $ Link [Str src] (src, "")
-
-emailChar = alphaNum <|> oneOf "-+_."
-
-emailAddress = try $ do
-  firstLetter <- alphaNum
-  restAddr <- many emailChar
-  let addr = firstLetter:restAddr
-  char '@'
-  dom <- domain
-  return $ addr ++ '@':dom
-
-domainChar = alphaNum <|> char '-'
-
-domain = do
-  first <- many1 domainChar
-  dom <- many1 (try (do{ char '.'; many1 domainChar }))
-  return $ joinWithSep "." (first:dom)
-
-autoEmail = do
-  src <- emailAddress
-  return $ Link [Str src] ("mailto:" ++ src, "")
-
-autoLink = autoURI <|> autoEmail
-
--- For now, we assume that all substitution references are for images.
-image = try $ do
-  char '|'
-  ref <- manyTill inline (char '|')
-  state <- getState
-  let keyTable = stateKeys state
-  src <- case lookupKeySrc keyTable ref of
-           Nothing     -> fail "no corresponding key"
-           Just target -> return target
-  return $ Image (normalizeSpaces ref) src
-
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-11-28 21:01:17 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-11-28 21:01:17 +0000
commit	47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864 (patch)
tree	ead5f33d20aa3a88c78f11bb8e598a4f741247f1 /Text/Pandoc/Readers
parent	3dfe4cb708ac14369aab19383c1008b2cf4adbc4 (diff)
download	pandoc-47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864.tar.gz