Removed Text directory. This is a remnant of an experiment

moving the contents of src/ to the top level, and should have been deleted long ago. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1097 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-11-28 21:01:17 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2007-11-28 21:01:17 +0000
commit: 47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864 (patch)
tree: ead5f33d20aa3a88c78f11bb8e598a4f741247f1 /Text/Pandoc/Readers/RST.hs
parent: 3dfe4cb708ac14369aab19383c1008b2cf4adbc4 (diff)
download: pandoc-47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864.tar.gz
1 files changed, 0 insertions, 640 deletions
diff --git a/Text/Pandoc/Readers/RST.hs b/Text/Pandoc/Readers/RST.hs
deleted file mode 100644
index 1239eb688..000000000
--- a/Text/Pandoc/Readers/RST.hs
+++ /dev/null
@@ -1,640 +0,0 @@
-{-
-Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.RST 
-   Copyright   : Copyright (C) 2006-7 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Conversion from reStructuredText to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.RST ( 
-                                readRST
-                               ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared 
-import Text.ParserCombinators.Parsec
-import Data.List ( findIndex, delete )
-
--- | Parse reStructuredText string and return Pandoc document.
-readRST :: ParserState -> String -> Pandoc
-readRST state str = (readWith parseRST) state (str ++ "\n\n")
-
---
--- Constants and data structure definitions
----
-
-bulletListMarkers = "*+-"
-underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
-
--- treat these as potentially non-text when parsing inline:
-specialChars = "\\`|*_<>$:[-"
-
---
--- parsing documents
---
-
-isAnonKey (ref, src) = ref == [Str "_"]
-
-isHeader :: Int -> Block -> Bool
-isHeader n (Header x _) = x == n
-isHeader _ _ = False
-
--- | Promote all headers in a list of blocks.  (Part of
--- title transformation for RST.)
-promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level text):rest) = 
-    (Header (level - num) text):(promoteHeaders num rest)
-promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
-promoteHeaders num [] = []
-
--- | If list of blocks starts with a header (or a header and subheader)
--- of level that are not found elsewhere, return it as a title and
--- promote all the other headers. 
-titleTransform :: [Block]              -- ^ list of blocks
-               -> ([Block], [Inline])  -- ^ modified list of blocks, title
-titleTransform ((Header 1 head1):(Header 2 head2):rest) =  -- title subtitle
-    if (any (isHeader 1) rest) || (any (isHeader 2) rest)
-       then ((Header 1 head1):(Header 2 head2):rest, [])
-       else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
-titleTransform ((Header 1 head1):rest) =       -- title, no subtitle
-    if (any (isHeader 1) rest)
-       then ((Header 1 head1):rest, [])
-       else ((promoteHeaders 1 rest), head1)
-titleTransform blocks = (blocks, [])
-
-parseRST = do
-  startPos <- getPosition
-  -- go through once just to get list of reference keys
-  -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
-  setInput docMinusKeys
-  setPosition startPos
-  st <- getState
-  let reversedKeys = stateKeys st
-  updateState $ \st -> st { stateKeys = reverse reversedKeys }
-  -- now parse it for real...
-  blocks <- parseBlocks 
-  let blocks' = filter (/= Null) blocks
-  state <- getState
-  let (blocks'', title) = if stateStandalone state
-                              then titleTransform blocks'
-                              else (blocks', [])
-  let authors = stateAuthors state
-  let date = stateDate state
-  let title' = if (null title) then (stateTitle state) else title
-  return $ Pandoc (Meta title' authors date) blocks''
-
---
--- parsing blocks
---
-
-parseBlocks = manyTill block eof
-
-block = choice [ codeBlock
-               , rawHtmlBlock
-               , rawLaTeXBlock
-               , fieldList
-               , blockQuote
-               , imageBlock
-               , unknownDirective
-               , header
-               , hrule
-               , list
-               , lineBlock
-               , para
-               , plain
-               , nullBlock ] <?> "block"
-
---
--- field list
---
-
-fieldListItem indent = try $ do
-  string indent
-  char ':'
-  name <- many1 alphaNum
-  string ": "
-  skipSpaces
-  first <- manyTill anyChar newline
-  rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> 
-                            indentedBlock
-  return (name, joinWithSep " " (first:(lines rest)))
-
-fieldList = try $ do
-  indent <- lookAhead $ many (oneOf " \t")
-  items <- many1 $ fieldListItem indent
-  blanklines
-  let authors = case lookup "Authors" items of
-                  Just auth -> [auth]
-                  Nothing  -> map snd (filter (\(x,y) -> x == "Author") items)
-  if null authors 
-     then return () 
-     else updateState $ \st -> st {stateAuthors = authors}
-  case (lookup "Date" items) of
-           Just dat -> updateState $ \st -> st {stateDate = dat}
-           Nothing  -> return ()
-  case (lookup "Title" items) of
-           Just tit -> parseFromString (many inline) tit >>=
-                       \t -> updateState $ \st -> st {stateTitle = t}
-           Nothing  -> return ()
-  let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && 
-                  (x /= "Date") && (x /= "Title")) items
-  if null remaining
-              then return Null
-              else do terms <- mapM (return . (:[]) . Str . fst) remaining
-                      defs  <- mapM (parseFromString (many block) . snd) 
-                                    remaining
-                      return $ DefinitionList $ zip terms defs
-
---
--- line block
---
-
-lineBlockLine = try $ do
-  string "| "
-  white <- many (oneOf " \t")
-  line <- manyTill inline newline
-  return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
-
-lineBlock = try $ do
-  lines <- many1 lineBlockLine
-  blanklines
-  return $ Para (concat lines)
-
---
--- paragraph block
---
-
-para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-
-codeBlockStart = string "::" >> blankline >> blankline
-
--- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock = try $ do
-  result <- many1 (notFollowedBy' codeBlockStart >> inline)
-  lookAhead (string "::")
-  return $ Para $ if last result == Space
-                     then normalizeSpaces result
-                     else (normalizeSpaces result) ++ [Str ":"]
-
--- regular paragraph
-paraNormal = try $ do 
-  result <- many1 inline
-  newline
-  blanklines
-  return $ Para $ normalizeSpaces result
-
-plain = many1 inline >>= return . Plain . normalizeSpaces 
-
---
--- image block
---
-
-imageBlock = try $ do
-  string ".. image:: "
-  src <- manyTill anyChar newline
-  fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
-                           many1 $ fieldListItem indent
-  optional blanklines
-  case lookup "alt" fields of
-        Just alt -> return $ Plain [Image [Str alt] (src, alt)]
-        Nothing  -> return $ Plain [Image [Str "image"] (src, "")]
---
--- header blocks
---
-
-header = doubleHeader <|> singleHeader <?> "header"
-
--- a header with lines on top and bottom
-doubleHeader = try $ do
-  c <- oneOf underlineChars
-  rest <- many (char c)  -- the top line
-  let lenTop = length (c:rest)
-  skipSpaces
-  newline
-  txt <- many1 (notFollowedBy blankline >> inline)
-  pos <- getPosition
-  let len = (sourceColumn pos) - 1
-  if (len > lenTop) then fail "title longer than border" else return ()
-  blankline              -- spaces and newline
-  count lenTop (char c)  -- the bottom line
-  blanklines
-  -- check to see if we've had this kind of header before.  
-  -- if so, get appropriate level.  if not, add to list.
-  state <- getState
-  let headerTable = stateHeaderTable state
-  let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
-        Just ind -> (headerTable, ind + 1)
-        Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
-  setState (state { stateHeaderTable = headerTable' })
-  return $ Header level (normalizeSpaces txt)
-
--- a header with line on the bottom only
-singleHeader = try $ do 
-  notFollowedBy' whitespace
-  txt <- many1 (do {notFollowedBy blankline; inline})
-  pos <- getPosition
-  let len = (sourceColumn pos) - 1
-  blankline
-  c <- oneOf underlineChars
-  rest <- count (len - 1) (char c)
-  many (char c)
-  blanklines
-  state <- getState
-  let headerTable = stateHeaderTable state
-  let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
-        Just ind -> (headerTable, ind + 1)
-        Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
-  setState (state { stateHeaderTable = headerTable' })
-  return $ Header level (normalizeSpaces txt)
-
---
--- hrule block
---
-
-hrule = try $ do
-  chr <- oneOf underlineChars
-  count 3 (char chr)
-  skipMany (char chr)
-  blankline
-  blanklines
-  return HorizontalRule
-
---
--- code blocks
---
-
--- read a line indented by a given string
-indentedLine indents = try $ do
-  string indents
-  result <- manyTill anyChar newline
-  return $ result ++ "\n"
-
--- two or more indented lines, possibly separated by blank lines.
--- any amount of indentation will work.
-indentedBlock = do 
-  indents <- lookAhead $ many1 (oneOf " \t")
-  lns <- many $ choice $ [ indentedLine indents,
-                           try $ do b <- blanklines
-                                    l <- indentedLine indents
-                                    return (b ++ l) ]
-  optional blanklines 
-  return $ concat lns
-
-codeBlock = try $ do
-  codeBlockStart
-  result <- indentedBlock
-  return $ CodeBlock $ stripTrailingNewlines result
-
---
--- raw html
---
-
-rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
-                     indentedBlock >>= return . RawHtml
-
---
--- raw latex
---
-
-rawLaTeXBlock = try $ do
-  string ".. raw:: latex"
-  blanklines
-  result <- indentedBlock
-  return $ Para [(TeX result)]
-
---
--- block quotes
---
-
-blockQuote = do
-  raw <- indentedBlock
-  -- parse the extracted block, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ raw ++ "\n\n"
-  return $ BlockQuote contents
-
---
--- list blocks
---
-
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-definitionListItem = try $ do
-  term <- many1Till inline endline
-  raw <- indentedBlock
-  -- parse the extracted block, which may contain various block elements:
-  contents <- parseFromString parseBlocks $ raw ++ "\n\n"
-  return (normalizeSpaces term, contents)
-
-definitionList = many1 definitionListItem >>= return . DefinitionList
-
--- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart = try $ do
-  notFollowedBy' hrule  -- because hrules start out just like lists
-  marker <- oneOf bulletListMarkers
-  white <- many1 spaceChar
-  return $ length (marker:white)
-
--- parses ordered list start and returns its length (inc following whitespace)
-orderedListStart style delim = try $ do
-  (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
-  white <- many1 spaceChar
-  return $ markerLen + length white
-
--- parse a line of a list item
-listLine markerLength = try $ do
-  notFollowedBy blankline
-  indentWith markerLength
-  line <- manyTill anyChar newline
-  return $ line ++ "\n"
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith num = do
-  state <- getState
-  let tabStop = stateTabStop state
-  if (num < tabStop)
-     then count num  (char ' ')
-     else choice [ try (count num (char ' ')), 
-                   (try (char '\t' >> count (num - tabStop) (char ' '))) ] 
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem start = do
-  markerLength <- start
-  firstLine <- manyTill anyChar newline
-  restLines <- many (listLine markerLength)
-  return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
-
--- continuation of a list item - indented and separated by blankline or 
--- (in compact lists) endline.  
--- Note: nested lists are parsed as continuations.
-listContinuation markerLength = try $ do
-  blanks <- many1 blankline
-  result <- many1 (listLine markerLength)
-  return $ blanks ++ concat result
-
-listItem start = try $ do 
-  (markerLength, first) <- rawListItem start
-  rest <- many (listContinuation markerLength)
-  blanks <- choice [ try (many blankline >>~ lookAhead start),
-                     many1 blankline ]  -- whole list must end with blank.
-  -- parsing with ListItemState forces markers at beginning of lines to
-  -- count as list item markers, even if not separated by blank space.
-  -- see definition of "endline"
-  state <- getState
-  let oldContext = stateParserContext state
-  setState $ state {stateParserContext = ListItemState}
-  -- parse the extracted block, which may itself contain block elements
-  parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
-  updateState (\st -> st {stateParserContext = oldContext})
-  return parsed
-
-orderedList = do
-  (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
-  items <- many1 (listItem (orderedListStart style delim))
-  let items' = compactify items
-  return $ OrderedList (start, style, delim) items'
-
-bulletList = many1 (listItem bulletListStart) >>= 
-             return . BulletList . compactify
-
---
--- unknown directive (e.g. comment)
---
-
-unknownDirective = try $ do
-  string ".. "
-  manyTill anyChar newline
-  many (string "   :" >> many1 (noneOf "\n:") >> char ':' >>
-        many1 (noneOf "\n") >> newline)
-  optional blanklines
-  return Null
-
--- 
--- reference key
---
-
-referenceKey = do
-  startPos <- getPosition
-  key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
-  st <- getState
-  let oldkeys = stateKeys st
-  updateState $ \st -> st { stateKeys = key : oldkeys }
-  optional blanklines
-  endPos <- getPosition
-  -- return enough blanks to replace key
-  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-targetURI = do
-  skipSpaces
-  optional newline
-  contents <- many1 (try (many spaceChar >> newline >> 
-                          many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
-  blanklines
-  return contents
-
-imageKey = try $ do
-  string ".. |"
-  ref <- manyTill inline (char '|')
-  skipSpaces
-  string "image::"
-  src <- targetURI
-  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
-anonymousKey = try $ do
-  oneOfStrings [".. __:", "__"]
-  src <- targetURI
-  state <- getState
-  return ([Str "_"], (removeLeadingTrailingSpace src, ""))
-
-regularKeyQuoted = try $ do
-  string ".. _`"
-  ref <- manyTill inline (char '`')
-  char ':'
-  src <- targetURI
-  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
-regularKey = try $ do
-  string ".. _"
-  ref <- manyTill inline (char ':')
-  src <- targetURI
-  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
- -- 
- -- inline
- --
-
-inline = choice [ link
-                , str
-                , whitespace
-                , endline
-                , strong
-                , emph
-                , code
-                , image
-                , hyphens
-                , superscript
-                , subscript
-                , escapedChar
-                , symbol ] <?> "inline"
-
-hyphens = do
-  result <- many1 (char '-')
-  option Space endline 
-  -- don't want to treat endline after hyphen or dash as a space
-  return $ Str result
-
-escapedChar = escaped anyChar
-
-symbol = do 
-  result <- oneOf specialChars
-  return $ Str [result]
-
--- parses inline code, between codeStart and codeEnd
-code = try $ do 
-  string "``"
-  result <- manyTill anyChar (try (string "``"))
-  return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
-
-emph = enclosed (char '*') (char '*') inline >>= 
-       return . Emph . normalizeSpaces
-
-strong = enclosed (string "**") (try $ string "**") inline >>= 
-         return . Strong . normalizeSpaces
-
-interpreted role = try $ do
-  optional $ try $ string "\\ "
-  result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
-  nextChar <- lookAhead anyChar
-  try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
-  return [Str result]
-
-superscript = interpreted "sup" >>= (return . Superscript)
-
-subscript = interpreted "sub" >>= (return . Subscript)
-
-whitespace = many1 spaceChar >> return Space <?> "whitespace"
-
-str = notFollowedBy' oneWordReference >> 
-      many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
-
--- an endline character that can be treated as a space, not a structural break
-endline = try $ do
-  newline
-  notFollowedBy blankline
-  -- parse potential list-starts at beginning of line differently in a list:
-  st <- getState
-  if (stateParserContext st) == ListItemState
-     then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
-          notFollowedBy' bulletListStart
-     else return ()
-  return Space
-
---
--- links
---
-
-link = choice [explicitLink, referenceLink, autoLink]  <?> "link"
-
-explicitLink = try $ do
-  char '`'
-  notFollowedBy (char '`') -- `` is marks start of inline code
-  label <- manyTill inline (try (do {spaces; char '<'}))
-  src <- manyTill (noneOf ">\n ") (char '>')
-  skipSpaces
-  string "`_"
-  return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
-
-reference = try $ do
-  char '`'
-  notFollowedBy (char '`')
-  label <- many1Till inline (char '`') 
-  char '_'
-  return label
-
-oneWordReference = do
-  raw <- many1 alphaNum
-  char '_'
-  notFollowedBy alphaNum    -- because this_is_not a link
-  return [Str raw]
-
-referenceLink = try $ do
-  label <- reference <|> oneWordReference
-  key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
-  state <- getState
-  let keyTable = stateKeys state
-  src <- case lookupKeySrc keyTable key of
-           Nothing     -> fail "no corresponding key"
-           Just target -> return target
-  -- if anonymous link, remove first anon key so it won't be used again
-  let keyTable' = if (key == [Str "_"]) -- anonymous link? 
-                    then delete ([Str "_"], src) keyTable -- remove first anon key 
-                    else keyTable                    
-  setState $ state { stateKeys = keyTable' }
-  return $ Link (normalizeSpaces label) src 
-
-uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", 
-                           "mailto:", "news:", "telnet:" ]
-
-uri = try $ do
-  scheme <- uriScheme
-  identifier <- many1 (noneOf " \t\n")
-  return $ scheme ++ identifier
-
-autoURI = do
-  src <- uri
-  return $ Link [Str src] (src, "")
-
-emailChar = alphaNum <|> oneOf "-+_."
-
-emailAddress = try $ do
-  firstLetter <- alphaNum
-  restAddr <- many emailChar
-  let addr = firstLetter:restAddr
-  char '@'
-  dom <- domain
-  return $ addr ++ '@':dom
-
-domainChar = alphaNum <|> char '-'
-
-domain = do
-  first <- many1 domainChar
-  dom <- many1 (try (do{ char '.'; many1 domainChar }))
-  return $ joinWithSep "." (first:dom)
-
-autoEmail = do
-  src <- emailAddress
-  return $ Link [Str src] ("mailto:" ++ src, "")
-
-autoLink = autoURI <|> autoEmail
-
--- For now, we assume that all substitution references are for images.
-image = try $ do
-  char '|'
-  ref <- manyTill inline (char '|')
-  state <- getState
-  let keyTable = stateKeys state
-  src <- case lookupKeySrc keyTable ref of
-           Nothing     -> fail "no corresponding key"
-           Just target -> return target
-  return $ Image (normalizeSpaces ref) src
-
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-11-28 21:01:17 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2007-11-28 21:01:17 +0000
commit	47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864 (patch)
tree	ead5f33d20aa3a88c78f11bb8e598a4f741247f1 /Text/Pandoc/Readers/RST.hs
parent	3dfe4cb708ac14369aab19383c1008b2cf4adbc4 (diff)
download	pandoc-47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864.tar.gz