aboutsummaryrefslogtreecommitdiff
path: root/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2009-01-24 20:00:26 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2009-01-24 20:00:26 +0000
commit42aca57dee8d88afa5fac512aeb1198102908865 (patch)
tree1c6a98bd226f4fffde6768010715bc1d80e5d168 /Text/Pandoc/Readers
parent39e8d8486693029abfef84c45e85416f7c775280 (diff)
downloadpandoc-42aca57dee8d88afa5fac512aeb1198102908865.tar.gz
Moved all haskell source to src subdirectory.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1528 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text/Pandoc/Readers')
-rw-r--r--Text/Pandoc/Readers/HTML.hs675
-rw-r--r--Text/Pandoc/Readers/LaTeX.hs774
-rw-r--r--Text/Pandoc/Readers/Markdown.hs1243
-rw-r--r--Text/Pandoc/Readers/RST.hs707
-rw-r--r--Text/Pandoc/Readers/TeXMath.hs233
5 files changed, 0 insertions, 3632 deletions
diff --git a/Text/Pandoc/Readers/HTML.hs b/Text/Pandoc/Readers/HTML.hs
deleted file mode 100644
index 65e512b5e..000000000
--- a/Text/Pandoc/Readers/HTML.hs
+++ /dev/null
@@ -1,675 +0,0 @@
-{-
-Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.HTML
- Copyright : Copyright (C) 2006-8 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of HTML to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.HTML (
- readHtml,
- rawHtmlInline,
- rawHtmlBlock,
- anyHtmlBlockTag,
- anyHtmlInlineTag,
- anyHtmlTag,
- anyHtmlEndTag,
- htmlEndTag,
- extractTagType,
- htmlBlockElement,
- unsanitaryURI
- ) where
-
-import Text.ParserCombinators.Parsec
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
-import Data.Maybe ( fromMaybe )
-import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf, intercalate )
-import Data.Char ( toLower, isAlphaNum )
-import Network.URI ( parseURIReference, URI (..) )
-
--- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: ParserState -- ^ Parser state
- -> String -- ^ String to parse
- -> Pandoc
-readHtml = readWith parseHtml
-
---
--- Constants
---
-
-eitherBlockOrInline :: [[Char]]
-eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
- "map", "area", "object"]
-
-{-
-inlineHtmlTags :: [[Char]]
-inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
- "br", "cite", "code", "dfn", "em", "font", "i", "img",
- "input", "kbd", "label", "q", "s", "samp", "select",
- "small", "span", "strike", "strong", "sub", "sup",
- "textarea", "tt", "u", "var"] ++ eitherBlockOrInline
--}
-
-blockHtmlTags :: [[Char]]
-blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
- "dl", "fieldset", "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "hr", "html", "isindex", "menu", "noframes",
- "noscript", "ol", "p", "pre", "table", "ul", "dd",
- "dt", "frameset", "li", "tbody", "td", "tfoot",
- "th", "thead", "tr", "script"] ++ eitherBlockOrInline
-
-sanitaryTags :: [[Char]]
-sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big",
- "blockquote", "br", "button", "caption", "center",
- "cite", "code", "col", "colgroup", "dd", "del", "dfn",
- "dir", "div", "dl", "dt", "em", "fieldset", "font",
- "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
- "i", "img", "input", "ins", "kbd", "label", "legend",
- "li", "map", "menu", "ol", "optgroup", "option", "p",
- "pre", "q", "s", "samp", "select", "small", "span",
- "strike", "strong", "sub", "sup", "table", "tbody",
- "td", "textarea", "tfoot", "th", "thead", "tr", "tt",
- "u", "ul", "var"]
-
-sanitaryAttributes :: [[Char]]
-sanitaryAttributes = ["abbr", "accept", "accept-charset",
- "accesskey", "action", "align", "alt", "axis",
- "border", "cellpadding", "cellspacing", "char",
- "charoff", "charset", "checked", "cite", "class",
- "clear", "cols", "colspan", "color", "compact",
- "coords", "datetime", "dir", "disabled",
- "enctype", "for", "frame", "headers", "height",
- "href", "hreflang", "hspace", "id", "ismap",
- "label", "lang", "longdesc", "maxlength", "media",
- "method", "multiple", "name", "nohref", "noshade",
- "nowrap", "prompt", "readonly", "rel", "rev",
- "rows", "rowspan", "rules", "scope", "selected",
- "shape", "size", "span", "src", "start",
- "summary", "tabindex", "target", "title", "type",
- "usemap", "valign", "value", "vspace", "width"]
-
---
--- HTML utility functions
---
-
--- | Returns @True@ if sanitization is specified and the specified tag is
--- not on the sanitized tag list.
-unsanitaryTag :: [Char]
- -> GenParser tok ParserState Bool
-unsanitaryTag tag = do
- st <- getState
- return $ stateSanitizeHTML st && tag `notElem` sanitaryTags
-
--- | returns @True@ if sanitization is specified and the specified attribute
--- is not on the sanitized attribute list.
-unsanitaryAttribute :: ([Char], String, t)
- -> GenParser tok ParserState Bool
-unsanitaryAttribute (attr, val, _) = do
- st <- getState
- return $ stateSanitizeHTML st &&
- (attr `notElem` sanitaryAttributes ||
- (attr `elem` ["href","src"] && unsanitaryURI val))
-
--- | Returns @True@ if the specified URI is potentially a security risk.
-unsanitaryURI :: String -> Bool
-unsanitaryURI u =
- let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:",
- "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
- "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
- "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
- "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
- "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
- "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
- "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
- "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
- "snews:", "webcal:", "ymsgr:"]
- in case parseURIReference u of
- Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes
- Nothing -> True
-
--- | Read blocks until end tag.
-blocksTilEnd :: String -> GenParser Char ParserState [Block]
-blocksTilEnd tag = do
- blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
- return $ filter (/= Null) blocks
-
--- | Read inlines until end tag.
-inlinesTilEnd :: String -> GenParser Char ParserState [Inline]
-inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-
--- | Parse blocks between open and close tag.
-blocksIn :: String -> GenParser Char ParserState [Block]
-blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
-
--- | Parse inlines between open and close tag.
-inlinesIn :: String -> GenParser Char ParserState [Inline]
-inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
-
--- | Extract type from a tag: e.g. @br@ from @\<br\>@
-extractTagType :: String -> String
-extractTagType ('<':rest) =
- let isSpaceOrSlash c = c `elem` "/ \n\t" in
- map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
-extractTagType _ = ""
-
--- | Parse any HTML tag (opening or self-closing) and return text of tag
-anyHtmlTag :: GenParser Char ParserState [Char]
-anyHtmlTag = try $ do
- char '<'
- spaces
- tag <- many1 alphaNum
- attribs <- many htmlAttribute
- spaces
- ender <- option "" (string "/")
- let ender' = if null ender then "" else " /"
- spaces
- char '>'
- let result = "<" ++ tag ++
- concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
- unsanitary <- unsanitaryTag tag
- if unsanitary
- then return $ "<!-- unsafe HTML removed -->"
- else return result
-
-anyHtmlEndTag :: GenParser Char ParserState [Char]
-anyHtmlEndTag = try $ do
- char '<'
- spaces
- char '/'
- spaces
- tag <- many1 alphaNum
- spaces
- char '>'
- let result = "</" ++ tag ++ ">"
- unsanitary <- unsanitaryTag tag
- if unsanitary
- then return $ "<!-- unsafe HTML removed -->"
- else return result
-
-htmlTag :: String -> GenParser Char ParserState (String, [(String, String)])
-htmlTag tag = try $ do
- char '<'
- spaces
- stringAnyCase tag
- attribs <- many htmlAttribute
- spaces
- optional (string "/")
- spaces
- char '>'
- return (tag, (map (\(name, content, _) -> (name, content)) attribs))
-
--- parses a quoted html attribute value
-quoted :: Char -> GenParser Char st (String, String)
-quoted quoteChar = do
- result <- between (char quoteChar) (char quoteChar)
- (many (noneOf [quoteChar]))
- return (result, [quoteChar])
-
-nullAttribute :: ([Char], [Char], [Char])
-nullAttribute = ("", "", "")
-
-htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char])
-htmlAttribute = do
- attr <- htmlRegularAttribute <|> htmlMinimizedAttribute
- unsanitary <- unsanitaryAttribute attr
- if unsanitary
- then return nullAttribute
- else return attr
-
--- minimized boolean attribute
-htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char])
-htmlMinimizedAttribute = try $ do
- many1 space
- name <- many1 (choice [letter, oneOf ".-_:"])
- return (name, name, name)
-
-htmlRegularAttribute :: GenParser Char st ([Char], [Char], [Char])
-htmlRegularAttribute = try $ do
- many1 space
- name <- many1 (choice [letter, oneOf ".-_:"])
- spaces
- char '='
- spaces
- (content, quoteStr) <- choice [ (quoted '\''),
- (quoted '"'),
- (do
- a <- many (alphaNum <|> (oneOf "-._:"))
- return (a,"")) ]
- return (name, content,
- (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
-
--- | Parse an end tag of type 'tag'
-htmlEndTag :: [Char] -> GenParser Char st [Char]
-htmlEndTag tag = try $ do
- char '<'
- spaces
- char '/'
- spaces
- stringAnyCase tag
- spaces
- char '>'
- return $ "</" ++ tag ++ ">"
-
-{-
--- | Returns @True@ if the tag is (or can be) an inline tag.
-isInline :: String -> Bool
-isInline tag = (extractTagType tag) `elem` inlineHtmlTags
--}
-
--- | Returns @True@ if the tag is (or can be) a block tag.
-isBlock :: String -> Bool
-isBlock tag = (extractTagType tag) `elem` blockHtmlTags
-
-anyHtmlBlockTag :: GenParser Char ParserState [Char]
-anyHtmlBlockTag = try $ do
- tag <- anyHtmlTag <|> anyHtmlEndTag
- if isBlock tag then return tag else fail "not a block tag"
-
-anyHtmlInlineTag :: GenParser Char ParserState [Char]
-anyHtmlInlineTag = try $ do
- tag <- anyHtmlTag <|> anyHtmlEndTag
- if not (isBlock tag) then return tag else fail "not an inline tag"
-
--- | Parses material between script tags.
--- Scripts must be treated differently, because they can contain '<>' etc.
-htmlScript :: GenParser Char ParserState [Char]
-htmlScript = try $ do
- open <- string "<script"
- rest <- manyTill anyChar (htmlEndTag "script")
- st <- getState
- if stateSanitizeHTML st && not ("script" `elem` sanitaryTags)
- then return "<!-- unsafe HTML removed -->"
- else return $ open ++ rest ++ "</script>"
-
--- | Parses material between style tags.
--- Style tags must be treated differently, because they can contain CSS
-htmlStyle :: GenParser Char ParserState [Char]
-htmlStyle = try $ do
- open <- string "<style"
- rest <- manyTill anyChar (htmlEndTag "style")
- st <- getState
- if stateSanitizeHTML st && not ("style" `elem` sanitaryTags)
- then return "<!-- unsafe HTML removed -->"
- else return $ open ++ rest ++ "</style>"
-
-htmlBlockElement :: GenParser Char ParserState [Char]
-htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ]
-
-rawHtmlBlock :: GenParser Char ParserState Block
-rawHtmlBlock = try $ do
- body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag
- state <- getState
- if stateParseRaw state then return (RawHtml body) else return Null
-
--- This is a block whose contents should be passed through verbatim, not interpreted.
-rawVerbatimBlock :: GenParser Char ParserState [Char]
-rawVerbatimBlock = try $ do
- start <- anyHtmlBlockTag
- let tagtype = extractTagType start
- if tagtype `elem` ["pre"]
- then do
- contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar)
- end <- htmlEndTag tagtype
- return $ start ++ contents ++ end
- else fail "Not a verbatim block"
-
--- We don't want to parse </body> or </html> as raw HTML, since these
--- are handled in parseHtml.
-rawHtmlBlock' :: GenParser Char ParserState Block
-rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
- rawHtmlBlock
-
--- | Parses an HTML comment.
-htmlComment :: GenParser Char st [Char]
-htmlComment = try $ do
- string "<!--"
- comment <- manyTill anyChar (try (string "-->"))
- return $ "<!--" ++ comment ++ "-->"
-
---
--- parsing documents
---
-
-xmlDec :: GenParser Char st [Char]
-xmlDec = try $ do
- string "<?"
- rest <- manyTill anyChar (char '>')
- return $ "<?" ++ rest ++ ">"
-
-definition :: GenParser Char st [Char]
-definition = try $ do
- string "<!"
- rest <- manyTill anyChar (char '>')
- return $ "<!" ++ rest ++ ">"
-
-nonTitleNonHead :: GenParser Char ParserState Char
-nonTitleNonHead = try $ do
- notFollowedBy $ (htmlTag "title" >> return ' ') <|>
- (htmlEndTag "head" >> return ' ')
- (rawHtmlBlock >> return ' ') <|> anyChar
-
-parseTitle :: GenParser Char ParserState [Inline]
-parseTitle = try $ do
- (tag, _) <- htmlTag "title"
- contents <- inlinesTilEnd tag
- spaces
- return contents
-
--- parse header and return meta-information (for now, just title)
-parseHead :: GenParser Char ParserState ([Inline], [a], [Char])
-parseHead = try $ do
- htmlTag "head"
- spaces
- skipMany nonTitleNonHead
- contents <- option [] parseTitle
- skipMany nonTitleNonHead
- htmlEndTag "head"
- return (contents, [], "")
-
-skipHtmlTag :: String -> GenParser Char ParserState ()
-skipHtmlTag tag = optional (htmlTag tag)
-
--- h1 class="title" representation of title in body
-bodyTitle :: GenParser Char ParserState [Inline]
-bodyTitle = try $ do
- (_, attribs) <- htmlTag "h1"
- case (extractAttribute "class" attribs) of
- Just "title" -> return ""
- _ -> fail "not title"
- inlinesTilEnd "h1"
-
-parseHtml :: GenParser Char ParserState Pandoc
-parseHtml = do
- sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
- skipHtmlTag "html"
- spaces
- (title, authors, date) <- option ([], [], "") parseHead
- spaces
- skipHtmlTag "body"
- spaces
- optional bodyTitle -- skip title in body, because it's represented in meta
- blocks <- parseBlocks
- spaces
- optional (htmlEndTag "body")
- spaces
- optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html>
- eof
- return $ Pandoc (Meta title authors date) blocks
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
-
-block :: GenParser Char ParserState Block
-block = choice [ codeBlock
- , header
- , hrule
- , list
- , blockQuote
- , para
- , plain
- , rawHtmlBlock'
- ] <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-
-headerLevel :: Int -> GenParser Char ParserState Block
-headerLevel n = try $ do
- let level = "h" ++ show n
- htmlTag level
- contents <- inlinesTilEnd level
- return $ Header n (normalizeSpaces contents)
-
---
--- hrule block
---
-
-hrule :: GenParser Char ParserState Block
-hrule = try $ do
- (_, attribs) <- htmlTag "hr"
- state <- getState
- if not (null attribs) && stateParseRaw state
- then unexpected "attributes in hr" -- parse as raw in this case
- else return HorizontalRule
-
---
--- code blocks
---
-
--- Note: HTML tags in code blocks (e.g. for syntax highlighting) are
--- skipped, because they are not portable to output formats other than HTML.
-codeBlock :: GenParser Char ParserState Block
-codeBlock = try $ do
- htmlTag "pre"
- result <- manyTill
- (many1 (satisfy (/= '<')) <|>
- ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
- (htmlEndTag "pre")
- let result' = concat result
- -- drop leading newline if any
- let result'' = if "\n" `isPrefixOf` result'
- then drop 1 result'
- else result'
- -- drop trailing newline if any
- let result''' = if "\n" `isSuffixOf` result''
- then init result''
- else result''
- return $ CodeBlock ("",[],[]) $ decodeCharacterReferences result'''
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = try $ htmlTag "blockquote" >> spaces >>
- blocksTilEnd "blockquote" >>= (return . BlockQuote)
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
- (_, attribs) <- htmlTag "ol"
- (start, style) <- option (1, DefaultStyle) $
- do failIfStrict
- let sta = fromMaybe "1" $
- lookup "start" attribs
- let sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- let sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ -> DefaultStyle
- return (read sta, sty')
- spaces
- items <- sepEndBy1 (blocksIn "li") spaces
- htmlEndTag "ol"
- return $ OrderedList (start, style, DefaultDelim) items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
- htmlTag "ul"
- spaces
- items <- sepEndBy1 (blocksIn "li") spaces
- htmlEndTag "ul"
- return $ BulletList items
-
-definitionList :: GenParser Char ParserState Block
-definitionList = try $ do
- failIfStrict -- def lists not part of standard markdown
- htmlTag "dl"
- spaces
- items <- sepEndBy1 definitionListItem spaces
- htmlEndTag "dl"
- return $ DefinitionList items
-
-definitionListItem :: GenParser Char ParserState ([Inline], [Block])
-definitionListItem = try $ do
- terms <- sepEndBy1 (inlinesIn "dt") spaces
- defs <- sepEndBy1 (blocksIn "dd") spaces
- let term = intercalate [LineBreak] terms
- return (term, concat defs)
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = try $ htmlTag "p" >> inlinesTilEnd "p" >>=
- return . Para . normalizeSpaces
-
---
--- plain block
---
-
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
---
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline = choice [ charRef
- , strong
- , emph
- , superscript
- , subscript
- , strikeout
- , spanStrikeout
- , code
- , str
- , linebreak
- , whitespace
- , link
- , image
- , rawHtmlInline
- ] <?> "inline"
-
-code :: GenParser Char ParserState Inline
-code = try $ do
- htmlTag "code"
- result <- manyTill anyChar (htmlEndTag "code")
- -- remove internal line breaks, leading and trailing space,
- -- and decode character references
- return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
- intercalate " " $ lines result
-
-rawHtmlInline :: GenParser Char ParserState Inline
-rawHtmlInline = do
- result <- htmlScript <|> htmlStyle <|> htmlComment <|> anyHtmlInlineTag
- state <- getState
- if stateParseRaw state then return (HtmlInline result) else return (Str "")
-
-betweenTags :: [Char] -> GenParser Char ParserState [Inline]
-betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
- return . normalizeSpaces
-
-emph :: GenParser Char ParserState Inline
-emph = (betweenTags "em" <|> betweenTags "i") >>= return . Emph
-
-strong :: GenParser Char ParserState Inline
-strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
-
-superscript :: GenParser Char ParserState Inline
-superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
-
-subscript :: GenParser Char ParserState Inline
-subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
- return . Strikeout
-
-spanStrikeout :: GenParser Char ParserState Inline
-spanStrikeout = try $ do
- failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
- (_, attributes) <- htmlTag "span"
- result <- case (extractAttribute "class" attributes) of
- Just "strikeout" -> inlinesTilEnd "span"
- _ -> fail "not a strikeout"
- return $ Strikeout result
-
-whitespace :: GenParser Char st Inline
-whitespace = many1 space >> return Space
-
--- hard line break
-linebreak :: GenParser Char ParserState Inline
-linebreak = htmlTag "br" >> optional newline >> return LineBreak
-
-str :: GenParser Char st Inline
-str = many1 (noneOf "<& \t\n") >>= return . Str
-
---
--- links and images
---
-
--- extract contents of attribute (attribute names are case-insensitive)
-extractAttribute :: [Char] -> [([Char], String)] -> Maybe String
-extractAttribute _ [] = Nothing
-extractAttribute name ((attrName, contents):rest) =
- let name' = map toLower name
- attrName' = map toLower attrName
- in if attrName' == name'
- then Just (decodeCharacterReferences contents)
- else extractAttribute name rest
-
-link :: GenParser Char ParserState Inline
-link = try $ do
- (_, attributes) <- htmlTag "a"
- url <- case (extractAttribute "href" attributes) of
- Just url -> return url
- Nothing -> fail "no href"
- let title = fromMaybe "" $ extractAttribute "title" attributes
- lab <- inlinesTilEnd "a"
- return $ Link (normalizeSpaces lab) (url, title)
-
-image :: GenParser Char ParserState Inline
-image = try $ do
- (_, attributes) <- htmlTag "img"
- url <- case (extractAttribute "src" attributes) of
- Just url -> return url
- Nothing -> fail "no src"
- let title = fromMaybe "" $ extractAttribute "title" attributes
- let alt = fromMaybe "" (extractAttribute "alt" attributes)
- return $ Image [Str alt] (url, title)
-
diff --git a/Text/Pandoc/Readers/LaTeX.hs b/Text/Pandoc/Readers/LaTeX.hs
deleted file mode 100644
index 9ba5bf372..000000000
--- a/Text/Pandoc/Readers/LaTeX.hs
+++ /dev/null
@@ -1,774 +0,0 @@
-{-
-Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.LaTeX
- Copyright : Copyright (C) 2006-8 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of LaTeX to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.LaTeX (
- readLaTeX,
- rawLaTeXInline,
- rawLaTeXEnvironment'
- ) where
-
-import Text.ParserCombinators.Parsec
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Data.Maybe ( fromMaybe )
-import Data.Char ( chr )
-import Data.List ( isPrefixOf, isSuffixOf )
-
--- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: ParserState -- ^ Parser state, including options for parser
- -> String -- ^ String to parse
- -> Pandoc
-readLaTeX = readWith parseLaTeX
-
--- characters with special meaning
-specialChars :: [Char]
-specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
-
---
--- utility functions
---
-
--- | Returns text between brackets and its matching pair.
-bracketedText :: Char -> Char -> GenParser Char st [Char]
-bracketedText openB closeB = do
- result <- charsInBalanced' openB closeB
- return $ [openB] ++ result ++ [closeB]
-
--- | Returns an option or argument of a LaTeX command.
-optOrArg :: GenParser Char st [Char]
-optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
-
--- | True if the string begins with '{'.
-isArg :: [Char] -> Bool
-isArg ('{':_) = True
-isArg _ = False
-
--- | Returns list of options and arguments of a LaTeX command.
-commandArgs :: GenParser Char st [[Char]]
-commandArgs = many optOrArg
-
--- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command :: GenParser Char st ([Char], [Char], [[Char]])
-command = do
- char '\\'
- name <- many1 letter
- star <- option "" (string "*") -- some commands have starred versions
- args <- commandArgs
- return (name, star, args)
-
-begin :: [Char] -> GenParser Char st [Char]
-begin name = try $ do
- string $ "\\begin{" ++ name ++ "}"
- optional commandArgs
- spaces
- return name
-
-end :: [Char] -> GenParser Char st [Char]
-end name = try $ do
- string $ "\\end{" ++ name ++ "}"
- return name
-
--- | Returns a list of block elements containing the contents of an
--- environment.
-environment :: [Char] -> GenParser Char ParserState [Block]
-environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces
-
-anyEnvironment :: GenParser Char ParserState Block
-anyEnvironment = try $ do
- string "\\begin{"
- name <- many letter
- star <- option "" (string "*") -- some environments have starred variants
- char '}'
- optional commandArgs
- spaces
- contents <- manyTill block (end (name ++ star))
- spaces
- return $ BlockQuote contents
-
---
--- parsing documents
---
-
--- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble :: GenParser Char ParserState ()
-processLaTeXPreamble = try $ manyTill
- (choice [bibliographic, comment, unknownCommand, nullBlock])
- (try (string "\\begin{document}")) >>
- spaces
-
--- | Parse LaTeX and return 'Pandoc'.
-parseLaTeX :: GenParser Char ParserState Pandoc
-parseLaTeX = do
- optional processLaTeXPreamble -- preamble might not be present (fragment)
- spaces
- blocks <- parseBlocks
- spaces
- optional $ try (string "\\end{document}" >> many anyChar)
- -- might not be present (fragment)
- spaces
- eof
- state <- getState
- let blocks' = filter (/= Null) blocks
- let title' = stateTitle state
- let authors' = stateAuthors state
- let date' = stateDate state
- return $ Pandoc (Meta title' authors' date') blocks'
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = spaces >> many block
-
-block :: GenParser Char ParserState Block
-block = choice [ hrule
- , codeBlock
- , header
- , list
- , blockQuote
- , comment
- , bibliographic
- , para
- , itemBlock
- , unknownEnvironment
- , ignore
- , unknownCommand ] <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = try $ do
- char '\\'
- subs <- many (try (string "sub"))
- string "section"
- optional (char '*')
- char '{'
- title' <- manyTill inline (char '}')
- spaces
- return $ Header (length subs + 1) (normalizeSpaces title')
-
---
--- hrule block
---
-
-hrule :: GenParser Char st Block
-hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
- "\\newpage" ] >> spaces >> return HorizontalRule
-
---
--- code blocks
---
-
-codeBlock :: GenParser Char ParserState Block
-codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> lhsCodeBlock
--- Note: Verbatim is from fancyvrb.
-
-codeBlockWith :: String -> GenParser Char st Block
-codeBlockWith env = try $ do
- string ("\\begin{" ++ env ++ "}") -- don't use begin function because it
- -- gobbles whitespace
- optional blanklines -- we want to gobble blank lines, but not
- -- leading space
- contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
- spaces
- let classes = if env == "code" then ["haskell"] else []
- return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents)
-
-lhsCodeBlock :: GenParser Char ParserState Block
-lhsCodeBlock = do
- failUnlessLHS
- (CodeBlock (_,_,_) cont) <- codeBlockWith "code"
- return $ CodeBlock ("", ["sourceCode","haskell"], []) cont
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
- return . BlockQuote
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = bulletList <|> orderedList <|> definitionList <?> "list"
-
-listItem :: GenParser Char ParserState ([Inline], [Block])
-listItem = try $ do
- ("item", _, args) <- command
- spaces
- state <- getState
- let oldParserContext = stateParserContext state
- updateState (\s -> s {stateParserContext = ListItemState})
- blocks <- many block
- updateState (\s -> s {stateParserContext = oldParserContext})
- opt <- case args of
- ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
- parseFromString (many inline) $ tail $ init x
- _ -> return []
- return (opt, blocks)
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
- string "\\begin{enumerate}"
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ do failIfStrict
- char '['
- res <- anyOrderedListMarker
- char ']'
- return res
- spaces
- option "" $ try $ do string "\\setlength{\\itemindent}"
- char '{'
- manyTill anyChar (char '}')
- spaces
- start <- option 1 $ try $ do failIfStrict
- string "\\setcounter{enum"
- many1 (oneOf "iv")
- string "}{"
- num <- many1 digit
- char '}'
- spaces
- return $ (read num) + 1
- items <- many listItem
- end "enumerate"
- spaces
- return $ OrderedList (start, style, delim) $ map snd items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
- begin "itemize"
- spaces
- items <- many listItem
- end "itemize"
- spaces
- return (BulletList $ map snd items)
-
-definitionList :: GenParser Char ParserState Block
-definitionList = try $ do
- begin "description"
- spaces
- items <- many listItem
- end "description"
- spaces
- return (DefinitionList items)
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = do
- res <- many1 inline
- spaces
- return $ if null (filter (`notElem` [Str "", Space]) res)
- then Null
- else Para $ normalizeSpaces res
-
---
--- title authors date
---
-
-bibliographic :: GenParser Char ParserState Block
-bibliographic = choice [ maketitle, title, authors, date ]
-
-maketitle :: GenParser Char st Block
-maketitle = try (string "\\maketitle") >> spaces >> return Null
-
-title :: GenParser Char ParserState Block
-title = try $ do
- string "\\title{"
- tit <- manyTill inline (char '}')
- spaces
- updateState (\state -> state { stateTitle = tit })
- return Null
-
-authors :: GenParser Char ParserState Block
-authors = try $ do
- string "\\author{"
- authors' <- manyTill anyChar (char '}')
- spaces
- let authors'' = map removeLeadingTrailingSpace $ lines $
- substitute "\\\\" "\n" authors'
- updateState (\s -> s { stateAuthors = authors'' })
- return Null
-
-date :: GenParser Char ParserState Block
-date = try $ do
- string "\\date{"
- date' <- manyTill anyChar (char '}')
- spaces
- updateState (\state -> state { stateDate = date' })
- return Null
-
---
--- item block
--- for use in unknown environments that aren't being parsed as raw latex
---
-
--- this forces items to be parsed in different blocks
-itemBlock :: GenParser Char ParserState Block
-itemBlock = try $ do
- ("item", _, args) <- command
- state <- getState
- if stateParserContext state == ListItemState
- then fail "item should be handled by list block"
- else if null args
- then return Null
- else return $ Plain [Str (stripFirstAndLast (head args))]
-
---
--- raw LaTeX
---
-
--- | Parse any LaTeX environment and return a Para block containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = do
- contents <- rawLaTeXEnvironment'
- spaces
- return $ Para [TeX contents]
-
--- | Parse any LaTeX environment and return a string containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment' :: GenParser Char st String
-rawLaTeXEnvironment' = try $ do
- string "\\begin{"
- name <- many1 letter
- star <- option "" (string "*") -- for starred variants
- let name' = name ++ star
- char '}'
- args <- option [] commandArgs
- let argStr = concat args
- contents <- manyTill (choice [ (many1 (noneOf "\\")),
- rawLaTeXEnvironment',
- string "\\" ])
- (end name')
- return $ "\\begin{" ++ name' ++ "}" ++ argStr ++
- concat contents ++ "\\end{" ++ name' ++ "}"
-
-unknownEnvironment :: GenParser Char ParserState Block
-unknownEnvironment = try $ do
- state <- getState
- result <- if stateParseRaw state -- check whether we should include raw TeX
- then rawLaTeXEnvironment -- if so, get whole raw environment
- else anyEnvironment -- otherwise just the contents
- return result
-
--- \ignore{} is used conventionally in literate haskell for definitions
--- that are to be processed by the compiler but not printed.
-ignore :: GenParser Char ParserState Block
-ignore = try $ do
- ("ignore", _, _) <- command
- spaces
- return Null
-
-unknownCommand :: GenParser Char ParserState Block
-unknownCommand = try $ do
- notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
- "document"]
- state <- getState
- if stateParserContext state == ListItemState
- then notFollowedBy' $ string "\\item"
- else return ()
- if stateParseRaw state
- then do
- (name, star, args) <- command
- spaces
- return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)]
- else do -- skip unknown command, leaving arguments to be parsed
- char '\\'
- letter
- many (letter <|> digit)
- optional (try $ string "{}")
- spaces
- return Null
-
--- latex comment
-comment :: GenParser Char st Block
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
-
---
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline = choice [ str
- , endline
- , whitespace
- , quoted
- , apostrophe
- , spacer
- , strong
- , math
- , ellipses
- , emDash
- , enDash
- , hyphen
- , emph
- , strikeout
- , superscript
- , subscript
- , ref
- , lab
- , code
- , url
- , link
- , image
- , footnote
- , linebreak
- , accentedChar
- , specialChar
- , rawLaTeXInline
- , escapedChar
- , unescapedChar
- ] <?> "inline"
-
-accentedChar :: GenParser Char st Inline
-accentedChar = normalAccentedChar <|> specialAccentedChar
-
-normalAccentedChar :: GenParser Char st Inline
-normalAccentedChar = try $ do
- char '\\'
- accent <- oneOf "'`^\"~"
- character <- (try $ char '{' >> letter >>~ char '}') <|> letter
- let table = fromMaybe [] $ lookup character accentTable
- let result = case lookup accent table of
- Just num -> chr num
- Nothing -> '?'
- return $ Str [result]
-
--- an association list of letters and association list of accents
--- and decimal character numbers.
-accentTable :: [(Char, [(Char, Int)])]
-accentTable =
- [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
- ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
- ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
- ('N', [('~', 209)]),
- ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
- ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
- ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
- ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
- ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
- ('n', [('~', 241)]),
- ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
- ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
-
-specialAccentedChar :: GenParser Char st Inline
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
- oslash, pound, euro, copyright, sect ]
-
-ccedil :: GenParser Char st Inline
-ccedil = try $ do
- char '\\'
- letter' <- oneOfStrings ["cc", "cC"]
- let num = if letter' == "cc" then 231 else 199
- return $ Str [chr num]
-
-aring :: GenParser Char st Inline
-aring = try $ do
- char '\\'
- letter' <- oneOfStrings ["aa", "AA"]
- let num = if letter' == "aa" then 229 else 197
- return $ Str [chr num]
-
-iuml :: GenParser Char st Inline
-iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
- return (Str [chr 239])
-
-szlig :: GenParser Char st Inline
-szlig = try (string "\\ss") >> return (Str [chr 223])
-
-oslash :: GenParser Char st Inline
-oslash = try $ do
- char '\\'
- letter' <- choice [char 'o', char 'O']
- let num = if letter' == 'o' then 248 else 216
- return $ Str [chr num]
-
-aelig :: GenParser Char st Inline
-aelig = try $ do
- char '\\'
- letter' <- oneOfStrings ["ae", "AE"]
- let num = if letter' == "ae" then 230 else 198
- return $ Str [chr num]
-
-pound :: GenParser Char st Inline
-pound = try (string "\\pounds") >> return (Str [chr 163])
-
-euro :: GenParser Char st Inline
-euro = try (string "\\euro") >> return (Str [chr 8364])
-
-copyright :: GenParser Char st Inline
-copyright = try (string "\\copyright") >> return (Str [chr 169])
-
-sect :: GenParser Char st Inline
-sect = try (string "\\S") >> return (Str [chr 167])
-
-escapedChar :: GenParser Char st Inline
-escapedChar = do
- result <- escaped (oneOf " $%&_#{}\n")
- return $ if result == Str "\n" then Str " " else result
-
--- nonescaped special characters
-unescapedChar :: GenParser Char st Inline
-unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c])
-
-specialChar :: GenParser Char st Inline
-specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
-
-backslash :: GenParser Char st Inline
-backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\")
-
-tilde :: GenParser Char st Inline
-tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-
-caret :: GenParser Char st Inline
-caret = try (string "\\^{}") >> return (Str "^")
-
-bar :: GenParser Char st Inline
-bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\")
-
-lt :: GenParser Char st Inline
-lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<")
-
-gt :: GenParser Char st Inline
-gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">")
-
-doubleQuote :: GenParser Char st Inline
-doubleQuote = char '"' >> return (Str "\"")
-
-code :: GenParser Char ParserState Inline
-code = code1 <|> code2 <|> lhsInlineCode
-
-code1 :: GenParser Char st Inline
-code1 = try $ do
- string "\\verb"
- marker <- anyChar
- result <- manyTill anyChar (char marker)
- return $ Code $ removeLeadingTrailingSpace result
-
-code2 :: GenParser Char st Inline
-code2 = try $ do
- string "\\texttt{"
- result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
- return $ Code result
-
-lhsInlineCode :: GenParser Char ParserState Inline
-lhsInlineCode = try $ do
- failUnlessLHS
- char '|'
- result <- manyTill (noneOf "|\n") (char '|')
- return $ Code result
-
-emph :: GenParser Char ParserState Inline
-emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
- manyTill inline (char '}') >>= return . Emph
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
- return . Strikeout
-
-superscript :: GenParser Char ParserState Inline
-superscript = try $ string "\\textsuperscript{" >>
- manyTill inline (char '}') >>= return . Superscript
-
--- note: \textsubscript isn't a standard latex command, but we use
--- a defined version in pandoc.
-subscript :: GenParser Char ParserState Inline
-subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
- return . Subscript
-
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = char '\'' >> return Apostrophe
-
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
- return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
- return . Quoted DoubleQuote . normalizeSpaces
-
-singleQuoteStart :: GenParser Char st Char
-singleQuoteStart = char '`'
-
-singleQuoteEnd :: GenParser Char st ()
-singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
-
-doubleQuoteStart :: CharParser st String
-doubleQuoteStart = string "``"
-
-doubleQuoteEnd :: CharParser st String
-doubleQuoteEnd = string "\"" <|> try (string "''")
-
-ellipses :: GenParser Char st Inline
-ellipses = try $ string "\\ldots" >> optional (try $ string "{}") >>
- return Ellipses
-
-enDash :: GenParser Char st Inline
-enDash = try (string "--") >> return EnDash
-
-emDash :: GenParser Char st Inline
-emDash = try (string "---") >> return EmDash
-
-hyphen :: GenParser Char st Inline
-hyphen = char '-' >> return (Str "-")
-
-lab :: GenParser Char st Inline
-lab = try $ do
- string "\\label{"
- result <- manyTill anyChar (char '}')
- return $ Str $ "(" ++ result ++ ")"
-
-ref :: GenParser Char st Inline
-ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
-
-strong :: GenParser Char ParserState Inline
-strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
- return . Strong
-
-whitespace :: GenParser Char st Inline
-whitespace = many1 (oneOf "~ \t") >> return Space
-
--- hard line break
-linebreak :: GenParser Char st Inline
-linebreak = try (string "\\\\") >> return LineBreak
-
-spacer :: GenParser Char st Inline
-spacer = try (string "\\,") >> return (Str "")
-
-str :: GenParser Char st Inline
-str = many1 (noneOf specialChars) >>= return . Str
-
--- endline internal to paragraph
-endline :: GenParser Char st Inline
-endline = try $ newline >> notFollowedBy blankline >> return Space
-
--- math
-math :: GenParser Char st Inline
-math = (math3 >>= return . Math DisplayMath)
- <|> (math1 >>= return . Math InlineMath)
- <|> (math2 >>= return . Math InlineMath)
- <|> (math4 >>= return . Math DisplayMath)
- <|> (math5 >>= return . Math DisplayMath)
- <|> (math6 >>= return . Math DisplayMath)
- <?> "math"
-
-math1 :: GenParser Char st String
-math1 = try $ char '$' >> manyTill anyChar (char '$')
-
-math2 :: GenParser Char st String
-math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)")
-
-math3 :: GenParser Char st String
-math3 = try $ char '$' >> math1 >>~ char '$'
-
-math4 :: GenParser Char st String
-math4 = try $ do
- name <- begin "equation" <|> begin "equation*" <|> begin "displaymath" <|> begin "displaymath*"
- spaces
- manyTill anyChar (end name)
-
-math5 :: GenParser Char st String
-math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]")
-
-math6 :: GenParser Char st String
-math6 = try $ do
- name <- begin "eqnarray" <|> begin "eqnarray*"
- spaces
- res <- manyTill anyChar (end name)
- return $ filter (/= '&') res -- remove eqnarray alignment codes
-
---
--- links and images
---
-
-url :: GenParser Char ParserState Inline
-url = try $ do
- string "\\url"
- url' <- charsInBalanced '{' '}'
- return $ Link [Code url'] (url', "")
-
-link :: GenParser Char ParserState Inline
-link = try $ do
- string "\\href{"
- url' <- manyTill anyChar (char '}')
- char '{'
- label' <- manyTill inline (char '}')
- return $ Link (normalizeSpaces label') (url', "")
-
-image :: GenParser Char ParserState Inline
-image = try $ do
- ("includegraphics", _, args) <- command
- let args' = filter isArg args -- filter out options
- let src = if null args' then
- ("", "")
- else
- (stripFirstAndLast (head args'), "")
- return $ Image [Str "image"] src
-
-footnote :: GenParser Char ParserState Inline
-footnote = try $ do
- (name, _, (contents:[])) <- command
- if ((name == "footnote") || (name == "thanks"))
- then string ""
- else fail "not a footnote or thanks command"
- let contents' = stripFirstAndLast contents
- -- parse the extracted block, which may contain various block elements:
- rest <- getInput
- setInput $ contents'
- blocks <- parseBlocks
- setInput rest
- return $ Note blocks
-
--- | Parse any LaTeX command and return it in a raw TeX inline element.
-rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try $ do
- notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"]
- state <- getState
- if stateParseRaw state
- then do
- (name, star, args) <- command
- return $ TeX ("\\" ++ name ++ star ++ concat args)
- else do -- skip unknown command, leaving arguments to be parsed
- char '\\'
- letter
- many (letter <|> digit)
- optional (try $ string "{}")
- return $ Str ""
diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs
deleted file mode 100644
index 896f5832e..000000000
--- a/Text/Pandoc/Readers/Markdown.hs
+++ /dev/null
@@ -1,1243 +0,0 @@
-{-# LANGUAGE CPP #-}
-{-
-Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Markdown
- Copyright : Copyright (C) 2006-8 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of markdown-formatted plain text to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.Markdown (
- readMarkdown
- ) where
-
-import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex, intercalate )
-import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit, isUpper )
-import Data.Maybe
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
-import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
- anyHtmlInlineTag, anyHtmlTag,
- anyHtmlEndTag, htmlEndTag, extractTagType,
- htmlBlockElement, unsanitaryURI )
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
-import Text.ParserCombinators.Parsec
-import Control.Monad (when)
-
--- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: ParserState -> String -> Pandoc
-readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n")
-
---
--- Constants and data structure definitions
---
-
-spaceChars :: [Char]
-spaceChars = " \t"
-
-bulletListMarkers :: [Char]
-bulletListMarkers = "*+-"
-
-hruleChars :: [Char]
-hruleChars = "*-_"
-
-setextHChars :: [Char]
-setextHChars = "=-"
-
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221"
-
---
--- auxiliary functions
---
-
-indentSpaces :: GenParser Char ParserState [Char]
-indentSpaces = try $ do
- state <- getState
- let tabStop = stateTabStop state
- try (count tabStop (char ' ')) <|>
- (many (char ' ') >> string "\t") <?> "indentation"
-
-nonindentSpaces :: GenParser Char ParserState [Char]
-nonindentSpaces = do
- state <- getState
- let tabStop = stateTabStop state
- sps <- many (char ' ')
- if length sps < tabStop
- then return sps
- else unexpected "indented line"
-
--- | Fail unless we're at beginning of a line.
-failUnlessBeginningOfLine :: GenParser tok st ()
-failUnlessBeginningOfLine = do
- pos <- getPosition
- if sourceColumn pos == 1 then return () else fail "not beginning of line"
-
--- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: GenParser tok ParserState ()
-failUnlessSmart = do
- state <- getState
- if stateSmart state then return () else fail "Smart typography feature"
-
--- | Parse a sequence of inline elements between square brackets,
--- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: GenParser Char ParserState Inline
- -> GenParser Char ParserState [Inline]
-inlinesInBalancedBrackets parser = try $ do
- char '['
- result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
- if res == "["
- then return ()
- else pzero
- bal <- inlinesInBalancedBrackets parser
- return $ [Str "["] ++ bal ++ [Str "]"])
- <|> (count 1 parser))
- (char ']')
- return $ concat result
-
---
--- document structure
---
-
-titleLine :: GenParser Char ParserState [Inline]
-titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
-
-authorsLine :: GenParser Char st [String]
-authorsLine = try $ do
- char '%'
- skipSpaces
- authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
- newline
- return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
-
-dateLine :: GenParser Char st String
-dateLine = try $ do
- char '%'
- skipSpaces
- date <- many (noneOf "\n")
- newline
- return $ decodeCharacterReferences $ removeTrailingSpace date
-
-titleBlock :: GenParser Char ParserState ([Inline], [String], [Char])
-titleBlock = try $ do
- failIfStrict
- title <- option [] titleLine
- author <- option [] authorsLine
- date <- option "" dateLine
- optional blanklines
- return (title, author, date)
-
-parseMarkdown :: GenParser Char ParserState Pandoc
-parseMarkdown = do
- -- markdown allows raw HTML
- updateState (\state -> state { stateParseRaw = True })
- startPos <- getPosition
- -- go through once just to get list of reference keys
- -- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
- return . concat
- setInput docMinusKeys
- setPosition startPos
- st <- getState
- -- go through again for notes unless strict...
- if stateStrict st
- then return ()
- else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
- return . concat
- st' <- getState
- let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
- setInput docMinusNotes
- setPosition startPos
- -- now parse it for real...
- (title, author, date) <- option ([],[],"") titleBlock
- blocks <- parseBlocks
- return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
-
---
--- initial pass for references and notes
---
-
-referenceKey :: GenParser Char ParserState [Char]
-referenceKey = try $ do
- startPos <- getPosition
- nonindentSpaces
- lab <- reference
- char ':'
- skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let sourceURL excludes = many $
- optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' '))
- src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
- tit <- option "" referenceTitle
- blanklines
- endPos <- getPosition
- let newkey = (lab, (intercalate "%20" $ words $ removeTrailingSpace src, tit))
- st <- getState
- let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = newkey : oldkeys }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-referenceTitle :: GenParser Char st String
-referenceTitle = try $ do
- skipSpaces >> optional newline >> skipSpaces
- tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
- <|> do delim <- char '\'' <|> char '"'
- manyTill anyChar (try (char delim >> skipSpaces >>
- notFollowedBy (noneOf ")\n")))
- return $ decodeCharacterReferences tit
-
-noteMarker :: GenParser Char st [Char]
-noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
-
-rawLine :: GenParser Char ParserState [Char]
-rawLine = do
- notFollowedBy blankline
- notFollowedBy' noteMarker
- contents <- many1 nonEndline
- end <- option "" (newline >> optional indentSpaces >> return "\n")
- return $ contents ++ end
-
-rawLines :: GenParser Char ParserState [Char]
-rawLines = many1 rawLine >>= return . concat
-
-noteBlock :: GenParser Char ParserState [Char]
-noteBlock = try $ do
- startPos <- getPosition
- ref <- noteMarker
- char ':'
- optional blankline
- optional indentSpaces
- raw <- sepBy rawLines (try (blankline >> indentSpaces))
- optional blanklines
- endPos <- getPosition
- -- parse the extracted text, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- let newnote = (ref, contents)
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \s -> s { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = manyTill block eof
-
-block :: GenParser Char ParserState Block
-block = do
- st <- getState
- choice (if stateStrict st
- then [ header
- , codeBlockIndented
- , blockQuote
- , hrule
- , bulletList
- , orderedList
- , htmlBlock
- , para
- , plain
- , nullBlock ]
- else [ codeBlockDelimited
- , header
- , table
- , codeBlockIndented
- , lhsCodeBlock
- , blockQuote
- , hrule
- , bulletList
- , orderedList
- , definitionList
- , para
- , rawHtmlBlocks
- , plain
- , nullBlock ]) <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = setextHeader <|> atxHeader <?> "header"
-
-atxHeader :: GenParser Char ParserState Block
-atxHeader = try $ do
- level <- many1 (char '#') >>= return . length
- notFollowedBy (char '.' <|> char ')') -- this would be a list
- skipSpaces
- text <- manyTill inline atxClosing >>= return . normalizeSpaces
- return $ Header level text
-
-atxClosing :: GenParser Char st [Char]
-atxClosing = try $ skipMany (char '#') >> blanklines
-
-setextHeader :: GenParser Char ParserState Block
-setextHeader = try $ do
- text <- many1Till inline newline
- underlineChar <- oneOf setextHChars
- many (char underlineChar)
- blanklines
- let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- return $ Header level (normalizeSpaces text)
-
---
--- hrule block
---
-
-hrule :: GenParser Char st Block
-hrule = try $ do
- skipSpaces
- start <- oneOf hruleChars
- count 2 (skipSpaces >> char start)
- skipMany (oneOf spaceChars <|> char start)
- newline
- optional blanklines
- return HorizontalRule
-
---
--- code blocks
---
-
-indentedLine :: GenParser Char ParserState [Char]
-indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
-
-codeBlockDelimiter :: Maybe Int
- -> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])]))
-codeBlockDelimiter len = try $ do
- size <- case len of
- Just l -> count l (char '~') >> many (char '~') >> return l
- Nothing -> count 3 (char '~') >> many (char '~') >>=
- return . (+ 3) . length
- many spaceChar
- attr <- option ([],[],[]) attributes
- blankline
- return (size, attr)
-
-attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
-attributes = try $ do
- char '{'
- many spaceChar
- attrs <- many (attribute >>~ many spaceChar)
- char '}'
- let (ids, classes, keyvals) = unzip3 attrs
- let id' = if null ids then "" else head ids
- return (id', concat classes, concat keyvals)
-
-attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
-attribute = identifierAttr <|> classAttr <|> keyValAttr
-
-identifier :: GenParser Char st [Char]
-identifier = do
- first <- letter
- rest <- many alphaNum
- return (first:rest)
-
-identifierAttr :: GenParser Char st ([Char], [a], [a1])
-identifierAttr = try $ do
- char '#'
- result <- identifier
- return (result,[],[])
-
-classAttr :: GenParser Char st ([Char], [[Char]], [a])
-classAttr = try $ do
- char '.'
- result <- identifier
- return ("",[result],[])
-
-keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])])
-keyValAttr = try $ do
- key <- identifier
- char '='
- char '"'
- val <- manyTill (noneOf "\n") (char '"')
- return ("",[],[(key,val)])
-
-codeBlockDelimited :: GenParser Char st Block
-codeBlockDelimited = try $ do
- (size, attr) <- codeBlockDelimiter Nothing
- contents <- manyTill anyLine (codeBlockDelimiter (Just size))
- blanklines
- return $ CodeBlock attr $ intercalate "\n" contents
-
-codeBlockIndented :: GenParser Char ParserState Block
-codeBlockIndented = do
- contents <- many1 (indentedLine <|>
- try (do b <- blanklines
- l <- indentedLine
- return $ b ++ l))
- optional blanklines
- return $ CodeBlock ("",[],[]) $ stripTrailingNewlines $ concat contents
-
-lhsCodeBlock :: GenParser Char ParserState Block
-lhsCodeBlock = do
- failUnlessLHS
- contents <- lhsCodeBlockBird <|> lhsCodeBlockLaTeX
- return $ CodeBlock ("",["sourceCode","haskell"],[]) contents
-
-lhsCodeBlockLaTeX :: GenParser Char ParserState String
-lhsCodeBlockLaTeX = try $ do
- string "\\begin{code}"
- manyTill spaceChar newline
- contents <- many1Till anyChar (try $ string "\\end{code}")
- blanklines
- return $ stripTrailingNewlines contents
-
-lhsCodeBlockBird :: GenParser Char ParserState String
-lhsCodeBlockBird = try $ do
- pos <- getPosition
- when (sourceColumn pos /= 1) $ fail "Not in first column"
- lns <- many1 birdTrackLine
- -- if (as is normal) there is always a space after >, drop it
- let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
- else lns
- blanklines
- return $ intercalate "\n" lns'
-
-birdTrackLine :: GenParser Char st [Char]
-birdTrackLine = do
- char '>'
- manyTill anyChar newline
-
-
---
--- block quotes
---
-
-emailBlockQuoteStart :: GenParser Char ParserState Char
-emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
-
-emailBlockQuote :: GenParser Char ParserState [[Char]]
-emailBlockQuote = try $ do
- emailBlockQuoteStart
- raw <- sepBy (many (nonEndline <|>
- (try (endline >> notFollowedBy emailBlockQuoteStart >>
- return '\n'))))
- (try (newline >> emailBlockQuoteStart))
- newline <|> (eof >> return '\n')
- optional blanklines
- return raw
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = do
- raw <- emailBlockQuote
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- return $ BlockQuote contents
-
---
--- list blocks
---
-
-bulletListStart :: GenParser Char ParserState ()
-bulletListStart = try $ do
- optional newline -- if preceded by a Plain block in a list context
- nonindentSpaces
- notFollowedBy' hrule -- because hrules start out just like lists
- oneOf bulletListMarkers
- spaceChar
- skipSpaces
-
-anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim)
-anyOrderedListStart = try $ do
- optional newline -- if preceded by a Plain block in a list context
- nonindentSpaces
- notFollowedBy $ string "p." >> spaceChar >> digit -- page number
- state <- getState
- if stateStrict state
- then do many1 digit
- char '.'
- spaceChar
- return (1, DefaultStyle, DefaultDelim)
- else do (num, style, delim) <- anyOrderedListMarker
- -- if it could be an abbreviated first name, insist on more than one space
- if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
- num `elem` [1, 5, 10, 50, 100, 500, 1000]))
- then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper))
- else spaceChar
- skipSpaces
- return (num, style, delim)
-
-listStart :: GenParser Char ParserState ()
-listStart = bulletListStart <|> (anyOrderedListStart >> return ())
-
--- parse a line of a list item (start = parser for beginning of list item)
-listLine :: GenParser Char ParserState [Char]
-listLine = try $ do
- notFollowedBy' listStart
- notFollowedBy blankline
- notFollowedBy' (do indentSpaces
- many (spaceChar)
- listStart)
- line <- manyTill anyChar newline
- return $ line ++ "\n"
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState [Char]
-rawListItem = try $ do
- listStart
- result <- many1 listLine
- blanks <- many blankline
- return $ concat result ++ blanks
-
--- continuation of a list item - indented and separated by blankline
--- or (in compact lists) endline.
--- note: nested lists are parsed as continuations
-listContinuation :: GenParser Char ParserState [Char]
-listContinuation = try $ do
- lookAhead indentSpaces
- result <- many1 listContinuationLine
- blanks <- many blankline
- return $ concat result ++ blanks
-
-listContinuationLine :: GenParser Char ParserState [Char]
-listContinuationLine = try $ do
- notFollowedBy blankline
- notFollowedBy' listStart
- optional indentSpaces
- result <- manyTill anyChar newline
- return $ result ++ "\n"
-
-listItem :: GenParser Char ParserState [Block]
-listItem = try $ do
- first <- rawListItem
- continuations <- many listContinuation
- -- parsing with ListItemState forces markers at beginning of lines to
- -- count as list item markers, even if not separated by blank space.
- -- see definition of "endline"
- state <- getState
- let oldContext = stateParserContext state
- setState $ state {stateParserContext = ListItemState}
- -- parse the extracted block, which may contain various block elements:
- let raw = concat (first:continuations)
- contents <- parseFromString parseBlocks raw
- updateState (\st -> st {stateParserContext = oldContext})
- return contents
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
- (start, style, delim) <- lookAhead anyOrderedListStart
- items <- many1 listItem
- return $ OrderedList (start, style, delim) $ compactify items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
- lookAhead bulletListStart
- many1 listItem >>= return . BulletList . compactify
-
--- definition lists
-
-definitionListItem :: GenParser Char ParserState ([Inline], [Block])
-definitionListItem = try $ do
- notFollowedBy blankline
- notFollowedBy' indentSpaces
- -- first, see if this has any chance of being a definition list:
- lookAhead (anyLine >> char ':')
- term <- manyTill inline newline
- raw <- many1 defRawBlock
- state <- getState
- let oldContext = stateParserContext state
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ concat raw
- updateState (\st -> st {stateParserContext = oldContext})
- return ((normalizeSpaces term), contents)
-
-defRawBlock :: GenParser Char ParserState [Char]
-defRawBlock = try $ do
- char ':'
- state <- getState
- let tabStop = stateTabStop state
- try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
- firstline <- anyLine
- rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
- trailing <- option "" blanklines
- return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
-
-definitionList :: GenParser Char ParserState Block
-definitionList = do
- items <- many1 definitionListItem
- let (terms, defs) = unzip items
- let defs' = compactify defs
- let items' = zip terms defs'
- return $ DefinitionList items'
-
---
--- paragraph block
---
-
-isHtmlOrBlank :: Inline -> Bool
-isHtmlOrBlank (HtmlInline _) = True
-isHtmlOrBlank (Space) = True
-isHtmlOrBlank (LineBreak) = True
-isHtmlOrBlank _ = False
-
-para :: GenParser Char ParserState Block
-para = try $ do
- result <- many1 inline
- if all isHtmlOrBlank result
- then fail "treat as raw HTML"
- else return ()
- newline
- blanklines <|> do st <- getState
- if stateStrict st
- then lookAhead (blockQuote <|> header) >> return ""
- else pzero
- return $ Para $ normalizeSpaces result
-
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
---
--- raw html
---
-
-htmlElement :: GenParser Char ParserState [Char]
-htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
-
-htmlBlock :: GenParser Char ParserState Block
-htmlBlock = try $ do
- failUnlessBeginningOfLine
- first <- htmlElement
- finalSpace <- many (oneOf spaceChars)
- finalNewlines <- many newline
- return $ RawHtml $ first ++ finalSpace ++ finalNewlines
-
--- True if tag is self-closing
-isSelfClosing :: [Char] -> Bool
-isSelfClosing tag =
- isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
-
-strictHtmlBlock :: GenParser Char ParserState [Char]
-strictHtmlBlock = try $ do
- tag <- anyHtmlBlockTag
- let tag' = extractTagType tag
- if isSelfClosing tag || tag' == "hr"
- then return tag
- else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
- (htmlElement <|> (count 1 anyChar)))
- end <- htmlEndTag tag'
- return $ tag ++ concat contents ++ end
-
-rawHtmlBlocks :: GenParser Char ParserState Block
-rawHtmlBlocks = do
- htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock
- sps <- do sp1 <- many spaceChar
- sp2 <- option "" (blankline >> return "\n")
- sp3 <- many spaceChar
- sp4 <- option "" blanklines
- return $ sp1 ++ sp2 ++ sp3 ++ sp4
- -- note: we want raw html to be able to
- -- precede a code block, when separated
- -- by a blank line
- return $ blk ++ sps
- let combined = concat htmlBlocks
- let combined' = if last combined == '\n' then init combined else combined
- return $ RawHtml combined'
-
---
--- Tables
---
-
--- Parse a dashed line with optional trailing spaces; return its length
--- and the length including trailing space.
-dashedLine :: Char
- -> GenParser Char st (Int, Int)
-dashedLine ch = do
- dashes <- many1 (char ch)
- sp <- many spaceChar
- return $ (length dashes, length $ dashes ++ sp)
-
--- Parse a table header with dashed lines of '-' preceded by
--- one line of text.
-simpleTableHeader :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
-simpleTableHeader = try $ do
- rawContent <- anyLine
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines') = unzip dashes
- let indices = scanl (+) (length initSp) lines'
- let rawHeads = tail $ splitByIndices (init indices) rawContent
- let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
- return (rawHeads, aligns, indices)
-
--- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: GenParser Char ParserState [Char]
-tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
-
--- Parse a table separator - dashed line.
-tableSep :: GenParser Char ParserState String
-tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
-
--- Parse a raw line and split it into chunks by indices.
-rawTableLine :: [Int]
- -> GenParser Char ParserState [String]
-rawTableLine indices = do
- notFollowedBy' (blanklines <|> tableFooter)
- line <- many1Till anyChar newline
- return $ map removeLeadingTrailingSpace $ tail $
- splitByIndices (init indices) line
-
--- Parse a table line and return a list of lists of blocks (columns).
-tableLine :: [Int]
- -> GenParser Char ParserState [[Block]]
-tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
-
--- Parse a multiline table row and return a list of blocks (columns).
-multilineRow :: [Int]
- -> GenParser Char ParserState [[Block]]
-multilineRow indices = do
- colLines <- many1 (rawTableLine indices)
- optional blanklines
- let cols = map unlines $ transpose colLines
- mapM (parseFromString (many plain)) cols
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths = zipWith (-) indices (0:indices)
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
--- Parses a table caption: inlines beginning with 'Table:'
--- and followed by blank lines.
-tableCaption :: GenParser Char ParserState [Inline]
-tableCaption = try $ do
- nonindentSpaces
- string "Table:"
- result <- many1 inline
- blanklines
- return $ normalizeSpaces result
-
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- many1Till (lineParser indices) footerParser
- caption <- option [] tableCaption
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table caption aligns widths heads lines'
-
--- Parse a simple table with '---' header and one line per row.
-simpleTable :: GenParser Char ParserState Block
-simpleTable = tableWith simpleTableHeader tableLine blanklines
-
--- Parse a multiline table: starts with row of '-' on top, then header
--- (which may be multiline), then the rows,
--- which may be multiline, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
-multilineTable :: GenParser Char ParserState Block
-multilineTable = tableWith multilineTableHeader multilineRow tableFooter
-
-multilineTableHeader :: GenParser Char ParserState ([String], [Alignment], [Int])
-multilineTableHeader = try $ do
- tableSep
- rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines') = unzip dashes
- let indices = scanl (+) (length initSp) lines'
- let rawHeadsList = transpose $ map
- (\ln -> tail $ splitByIndices (init indices) ln)
- rawContent
- let rawHeads = map (intercalate " ") rawHeadsList
- let aligns = zipWith alignType rawHeadsList lengths
- return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
-
--- Returns an alignment type for a table, based on a list of strings
--- (the rows of the column header) and a number (the length of the
--- dashed line under the rows.
-alignType :: [String]
- -> Int
- -> Alignment
-alignType [] _ = AlignDefault
-alignType strLst len =
- let s = head $ sortBy (comparing length) $
- map removeTrailingSpace strLst
- leftSpace = if null s then False else (s !! 0) `elem` " \t"
- rightSpace = length s < len || (s !! (len - 1)) `elem` " \t"
- in case (leftSpace, rightSpace) of
- (True, False) -> AlignRight
- (False, True) -> AlignLeft
- (True, True) -> AlignCenter
- (False, False) -> AlignDefault
-
-table :: GenParser Char ParserState Block
-table = simpleTable <|> multilineTable <?> "table"
-
---
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline = choice inlineParsers <?> "inline"
-
-inlineParsers :: [GenParser Char ParserState Inline]
-inlineParsers = [ abbrev
- , str
- , smartPunctuation
- , whitespace
- , endline
- , code
- , charRef
- , strong
- , emph
- , note
- , inlineNote
- , link
-#ifdef _CITEPROC
- , inlineCitation
-#endif
- , image
- , math
- , strikeout
- , superscript
- , subscript
- , autoLink
- , rawHtmlInline'
- , rawLaTeXInline'
- , escapedChar
- , symbol
- , ltSign ]
-
-inlineNonLink :: GenParser Char ParserState Inline
-inlineNonLink = (choice $
- map (\parser -> try (parser >>= failIfLink)) inlineParsers)
- <?> "inline (non-link)"
-
-failIfLink :: Inline -> GenParser tok st Inline
-failIfLink (Link _ _) = pzero
-failIfLink elt = return elt
-
-escapedChar :: GenParser Char ParserState Inline
-escapedChar = do
- char '\\'
- state <- getState
- result <- option '\\' $ if stateStrict state
- then oneOf "\\`*_{}[]()>#+-.!~"
- else satisfy (not . isAlphaNum)
- let result' = if result == ' '
- then '\160' -- '\ ' is a nonbreaking space
- else result
- return $ Str [result']
-
-ltSign :: GenParser Char ParserState Inline
-ltSign = do
- st <- getState
- if stateStrict st
- then char '<'
- else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
- return $ Str ['<']
-
-specialCharsMinusLt :: [Char]
-specialCharsMinusLt = filter (/= '<') specialChars
-
-symbol :: GenParser Char ParserState Inline
-symbol = do
- result <- oneOf specialCharsMinusLt
- return $ Str [result]
-
--- parses inline code, between n `s and n `s
-code :: GenParser Char ParserState Inline
-code = try $ do
- starts <- many1 (char '`')
- skipSpaces
- result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
- (char '\n' >> return " "))
- (try (skipSpaces >> count (length starts) (char '`') >>
- notFollowedBy (char '`')))
- return $ Code $ removeLeadingTrailingSpace $ concat result
-
-mathWord :: GenParser Char st [Char]
-mathWord = many1 ((noneOf " \t\n\\$") <|>
- (try (char '\\') >>~ notFollowedBy (char '$')))
-
-math :: GenParser Char ParserState Inline
-math = (mathDisplay >>= return . Math DisplayMath)
- <|> (mathInline >>= return . Math InlineMath)
-
-mathDisplay :: GenParser Char ParserState String
-mathDisplay = try $ do
- failIfStrict
- string "$$"
- many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$")
-
-mathInline :: GenParser Char ParserState String
-mathInline = try $ do
- failIfStrict
- char '$'
- notFollowedBy space
- words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline)))
- char '$'
- notFollowedBy digit
- return $ intercalate " " words'
-
-emph :: GenParser Char ParserState Inline
-emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|>
- (enclosed (char '_') (notFollowedBy' strong >> char '_' >>
- notFollowedBy alphaNum) inline)) >>=
- return . Emph . normalizeSpaces
-
-strong :: GenParser Char ParserState Inline
-strong = ((enclosed (string "**") (try $ string "**") inline) <|>
- (enclosed (string "__") (try $ string "__") inline)) >>=
- return . Strong . normalizeSpaces
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
- return . Strikeout . normalizeSpaces
-
-superscript :: GenParser Char ParserState Inline
-superscript = failIfStrict >> enclosed (char '^') (char '^')
- (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
- return . Superscript
-
-subscript :: GenParser Char ParserState Inline
-subscript = failIfStrict >> enclosed (char '~') (char '~')
- (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
- return . Subscript
-
-abbrev :: GenParser Char ParserState Inline
-abbrev = failUnlessSmart >>
- (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160")
-
--- an string of letters followed by a period that does not end a sentence
--- is assumed to be an abbreviation. It is assumed that sentences don't
--- start with lowercase letters or numerals.
-assumedAbbrev :: GenParser Char ParserState [Char]
-assumedAbbrev = try $ do
- result <- many1 $ satisfy isAlpha
- string ". "
- lookAhead $ satisfy (\x -> isLower x || isDigit x)
- return result
-
--- these strings are treated as abbreviations even if they are followed
--- by a capital letter (such as a name).
-knownAbbrev :: GenParser Char ParserState [Char]
-knownAbbrev = try $ do
- result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen",
- "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs",
- "Sen", "Rep", "Pres", "Hon", "Rev" ]
- string ". "
- return result
-
-smartPunctuation :: GenParser Char ParserState Inline
-smartPunctuation = failUnlessSmart >>
- choice [ quoted, apostrophe, dash, ellipses ]
-
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-
-withQuoteContext :: QuoteContext
- -> (GenParser Char ParserState Inline)
- -> GenParser Char ParserState Inline
-withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
- return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
- return . Quoted DoubleQuote . normalizeSpaces
-
-failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
-failIfInQuoteContext context = do
- st <- getState
- if stateQuoteContext st == context
- then fail "already inside quotes"
- else return ()
-
-singleQuoteStart :: GenParser Char ParserState Char
-singleQuoteStart = do
- failIfInQuoteContext InSingleQuote
- char '\8216' <|>
- (try $ do char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
- -- possess/contraction
- return '\'')
-
-singleQuoteEnd :: GenParser Char st Char
-singleQuoteEnd = try $ do
- char '\8217' <|> char '\''
- notFollowedBy alphaNum
- return '\''
-
-doubleQuoteStart :: GenParser Char ParserState Char
-doubleQuoteStart = do
- failIfInQuoteContext InDoubleQuote
- char '\8220' <|>
- (try $ do char '"'
- notFollowedBy (oneOf " \t\n")
- return '"')
-
-doubleQuoteEnd :: GenParser Char st Char
-doubleQuoteEnd = char '\8221' <|> char '"'
-
-ellipses :: GenParser Char st Inline
-ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
-
-dash :: GenParser Char st Inline
-dash = enDash <|> emDash
-
-enDash :: GenParser Char st Inline
-enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
-
-emDash :: GenParser Char st Inline
-emDash = oneOfStrings ["---", "--"] >> return EmDash
-
-whitespace :: GenParser Char ParserState Inline
-whitespace = do
- sps <- many1 (oneOf spaceChars)
- if length sps >= 2
- then option Space (endline >> return LineBreak)
- else return Space <?> "whitespace"
-
-nonEndline :: GenParser Char st Char
-nonEndline = satisfy (/='\n')
-
-strChar :: GenParser Char st Char
-strChar = noneOf (specialChars ++ spaceChars ++ "\n")
-
-str :: GenParser Char st Inline
-str = many1 strChar >>= return . Str
-
--- an endline character that can be treated as a space, not a structural break
-endline :: GenParser Char ParserState Inline
-endline = try $ do
- newline
- notFollowedBy blankline
- st <- getState
- if stateStrict st
- then do notFollowedBy emailBlockQuoteStart
- notFollowedBy (char '#') -- atx header
- else return ()
- -- parse potential list-starts differently if in a list:
- if stateParserContext st == ListItemState
- then notFollowedBy' (bulletListStart <|>
- (anyOrderedListStart >> return ()))
- else return ()
- return Space
-
---
--- links
---
-
--- a reference label for a link
-reference :: GenParser Char ParserState [Inline]
-reference = do notFollowedBy' (string "[^") -- footnote reference
- result <- inlinesInBalancedBrackets inlineNonLink
- return $ normalizeSpaces result
-
--- source for a link, with optional title
-source :: GenParser Char st (String, [Char])
-source =
- (try $ charsInBalanced '(' ')' >>= parseFromString source') <|>
- -- the following is needed for cases like: [ref](/url(a).
- (enclosed (char '(') (char ')') anyChar >>=
- parseFromString source')
-
--- auxiliary function for source
-source' :: GenParser Char st (String, [Char])
-source' = do
- skipSpaces
- let sourceURL excludes = many $
- optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' '))
- src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
- tit <- option "" linkTitle
- skipSpaces
- eof
- return (intercalate "%20" $ words $ removeTrailingSpace src, tit)
-
-linkTitle :: GenParser Char st String
-linkTitle = try $ do
- (many1 spaceChar >> option '\n' newline) <|> newline
- skipSpaces
- delim <- oneOf "'\""
- tit <- manyTill (optional (char '\\') >> anyChar)
- (try (char delim >> skipSpaces >> eof))
- return $ decodeCharacterReferences tit
-
-link :: GenParser Char ParserState Inline
-link = try $ do
- lab <- reference
- src <- source <|> referenceLink lab
- sanitize <- getState >>= return . stateSanitizeHTML
- if sanitize && unsanitaryURI (fst src)
- then fail "Unsanitary URI"
- else return $ Link lab src
-
--- a link like [this][ref] or [this][] or [this]
-referenceLink :: [Inline]
- -> GenParser Char ParserState (String, [Char])
-referenceLink lab = do
- ref <- option [] (try (optional (char ' ') >>
- optional (newline >> skipSpaces) >> reference))
- let ref' = if null ref then lab else ref
- state <- getState
- case lookupKeySrc (stateKeys state) ref' of
- Nothing -> fail "no corresponding key"
- Just target -> return target
-
-autoLink :: GenParser Char ParserState Inline
-autoLink = try $ do
- char '<'
- src <- uri <|> (emailAddress >>= (return . ("mailto:" ++)))
- char '>'
- let src' = if "mailto:" `isPrefixOf` src
- then drop 7 src
- else src
- st <- getState
- let sanitize = stateSanitizeHTML st
- if sanitize && unsanitaryURI src
- then fail "Unsanitary URI"
- else return $ if stateStrict st
- then Link [Str src'] (src, "")
- else Link [Code src'] (src, "")
-
-image :: GenParser Char ParserState Inline
-image = try $ do
- char '!'
- (Link lab src) <- link
- return $ Image lab src
-
-note :: GenParser Char ParserState Inline
-note = try $ do
- failIfStrict
- ref <- noteMarker
- state <- getState
- let notes = stateNotes state
- case lookup ref notes of
- Nothing -> fail "note not found"
- Just contents -> return $ Note contents
-
-inlineNote :: GenParser Char ParserState Inline
-inlineNote = try $ do
- failIfStrict
- char '^'
- contents <- inlinesInBalancedBrackets inline
- return $ Note [Para contents]
-
-rawLaTeXInline' :: GenParser Char ParserState Inline
-rawLaTeXInline' = do
- failIfStrict
- (rawConTeXtEnvironment' >>= return . TeX)
- <|> (rawLaTeXEnvironment' >>= return . TeX)
- <|> rawLaTeXInline
-
-rawConTeXtEnvironment' :: GenParser Char st String
-rawConTeXtEnvironment' = try $ do
- string "\\start"
- completion <- inBrackets (letter <|> digit <|> spaceChar)
- <|> (many1 letter)
- contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar))
- (try $ string "\\stop" >> string completion)
- return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
-
-inBrackets :: (GenParser Char st Char) -> GenParser Char st String
-inBrackets parser = do
- char '['
- contents <- many parser
- char ']'
- return $ "[" ++ contents ++ "]"
-
-rawHtmlInline' :: GenParser Char ParserState Inline
-rawHtmlInline' = do
- st <- getState
- result <- if stateStrict st
- then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
- else anyHtmlInlineTag
- return $ HtmlInline result
-
-#ifdef _CITEPROC
-inlineCitation :: GenParser Char ParserState Inline
-inlineCitation = try $ do
- failIfStrict
- cit <- citeMarker
- let citations = readWith parseCitation defaultParserState cit
- mr <- mapM chkCit citations
- if catMaybes mr /= []
- then return $ Cite citations []
- else fail "no citation found"
-
-chkCit :: Target -> GenParser Char ParserState (Maybe Target)
-chkCit t = do
- st <- getState
- case lookupKeySrc (stateKeys st) [Str $ fst t] of
- Just _ -> fail "This is a link"
- Nothing -> if elem (fst t) $ stateCitations st
- then return $ Just t
- else return $ Nothing
-
-citeMarker :: GenParser Char ParserState String
-citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']')
-
-parseCitation :: GenParser Char ParserState [(String,String)]
-parseCitation = try $ sepBy (parseLabel) (oneOf ";")
-
-parseLabel :: GenParser Char ParserState (String,String)
-parseLabel = try $ do
- res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@")
- case res of
- [lab,loc] -> return (lab, loc)
- [lab] -> return (lab, "" )
- _ -> return ("" , "" )
-
-#endif
diff --git a/Text/Pandoc/Readers/RST.hs b/Text/Pandoc/Readers/RST.hs
deleted file mode 100644
index 255054c10..000000000
--- a/Text/Pandoc/Readers/RST.hs
+++ /dev/null
@@ -1,707 +0,0 @@
-{-
-Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-8 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion from reStructuredText to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.RST (
- readRST
- ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.ParserCombinators.Parsec
-import Control.Monad ( when )
-import Data.List ( findIndex, delete, intercalate )
-
--- | Parse reStructuredText string and return Pandoc document.
-readRST :: ParserState -> String -> Pandoc
-readRST state s = (readWith parseRST) state (s ++ "\n\n")
-
---
--- Constants and data structure definitions
----
-
-bulletListMarkers :: [Char]
-bulletListMarkers = "*+-"
-
-underlineChars :: [Char]
-underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
-
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\\`|*_<>$:[-"
-
---
--- parsing documents
---
-
-isHeader :: Int -> Block -> Bool
-isHeader n (Header x _) = x == n
-isHeader _ _ = False
-
--- | Promote all headers in a list of blocks. (Part of
--- title transformation for RST.)
-promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level text):rest) =
- (Header (level - num) text):(promoteHeaders num rest)
-promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
-promoteHeaders _ [] = []
-
--- | If list of blocks starts with a header (or a header and subheader)
--- of level that are not found elsewhere, return it as a title and
--- promote all the other headers.
-titleTransform :: [Block] -- ^ list of blocks
- -> ([Block], [Inline]) -- ^ modified list of blocks, title
-titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
- if (any (isHeader 1) rest) || (any (isHeader 2) rest)
- then ((Header 1 head1):(Header 2 head2):rest, [])
- else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
-titleTransform ((Header 1 head1):rest) = -- title, no subtitle
- if (any (isHeader 1) rest)
- then ((Header 1 head1):rest, [])
- else ((promoteHeaders 1 rest), head1)
-titleTransform blocks = (blocks, [])
-
-parseRST :: GenParser Char ParserState Pandoc
-parseRST = do
- startPos <- getPosition
- -- go through once just to get list of reference keys
- -- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
- setInput docMinusKeys
- setPosition startPos
- st <- getState
- let reversedKeys = stateKeys st
- updateState $ \s -> s { stateKeys = reverse reversedKeys }
- -- now parse it for real...
- blocks <- parseBlocks
- let blocks' = filter (/= Null) blocks
- state <- getState
- let (blocks'', title) = if stateStandalone state
- then titleTransform blocks'
- else (blocks', [])
- let authors = stateAuthors state
- let date = stateDate state
- let title' = if (null title) then (stateTitle state) else title
- return $ Pandoc (Meta title' authors date) blocks''
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = manyTill block eof
-
-block :: GenParser Char ParserState Block
-block = choice [ codeBlock
- , rawHtmlBlock
- , rawLaTeXBlock
- , fieldList
- , blockQuote
- , imageBlock
- , unknownDirective
- , header
- , hrule
- , list
- , lineBlock
- , lhsCodeBlock
- , para
- , plain
- , nullBlock ] <?> "block"
-
---
--- field list
---
-
-fieldListItem :: String -> GenParser Char st ([Char], [Char])
-fieldListItem indent = try $ do
- string indent
- char ':'
- name <- many1 alphaNum
- string ": "
- skipSpaces
- first <- manyTill anyChar newline
- rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
- indentedBlock
- return (name, intercalate " " (first:(lines rest)))
-
-fieldList :: GenParser Char ParserState Block
-fieldList = try $ do
- indent <- lookAhead $ many (oneOf " \t")
- items <- many1 $ fieldListItem indent
- blanklines
- let authors = case lookup "Authors" items of
- Just auth -> [auth]
- Nothing -> map snd (filter (\(x,_) -> x == "Author") items)
- if null authors
- then return ()
- else updateState $ \st -> st {stateAuthors = authors}
- case (lookup "Date" items) of
- Just dat -> updateState $ \st -> st {stateDate = dat}
- Nothing -> return ()
- case (lookup "Title" items) of
- Just tit -> parseFromString (many inline) tit >>=
- \t -> updateState $ \st -> st {stateTitle = t}
- Nothing -> return ()
- let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") &&
- (x /= "Date") && (x /= "Title")) items
- if null remaining
- then return Null
- else do terms <- mapM (return . (:[]) . Str . fst) remaining
- defs <- mapM (parseFromString (many block) . snd)
- remaining
- return $ DefinitionList $ zip terms defs
-
---
--- line block
---
-
-lineBlockLine :: GenParser Char ParserState [Inline]
-lineBlockLine = try $ do
- string "| "
- white <- many (oneOf " \t")
- line <- manyTill inline newline
- return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
-
-lineBlock :: GenParser Char ParserState Block
-lineBlock = try $ do
- lines' <- many1 lineBlockLine
- blanklines
- return $ Para (concat lines')
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
-
-codeBlockStart :: GenParser Char st Char
-codeBlockStart = string "::" >> blankline >> blankline
-
--- paragraph that ends in a :: starting a code block
-paraBeforeCodeBlock :: GenParser Char ParserState Block
-paraBeforeCodeBlock = try $ do
- result <- many1 (notFollowedBy' codeBlockStart >> inline)
- lookAhead (string "::")
- return $ Para $ if last result == Space
- then normalizeSpaces result
- else (normalizeSpaces result) ++ [Str ":"]
-
--- regular paragraph
-paraNormal :: GenParser Char ParserState Block
-paraNormal = try $ do
- result <- many1 inline
- newline
- blanklines
- return $ Para $ normalizeSpaces result
-
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
---
--- image block
---
-
-imageBlock :: GenParser Char st Block
-imageBlock = try $ do
- string ".. image:: "
- src <- manyTill anyChar newline
- fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
- many1 $ fieldListItem indent
- optional blanklines
- case lookup "alt" fields of
- Just alt -> return $ Plain [Image [Str alt] (src, alt)]
- Nothing -> return $ Plain [Image [Str "image"] (src, "")]
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = doubleHeader <|> singleHeader <?> "header"
-
--- a header with lines on top and bottom
-doubleHeader :: GenParser Char ParserState Block
-doubleHeader = try $ do
- c <- oneOf underlineChars
- rest <- many (char c) -- the top line
- let lenTop = length (c:rest)
- skipSpaces
- newline
- txt <- many1 (notFollowedBy blankline >> inline)
- pos <- getPosition
- let len = (sourceColumn pos) - 1
- if (len > lenTop) then fail "title longer than border" else return ()
- blankline -- spaces and newline
- count lenTop (char c) -- the bottom line
- blanklines
- -- check to see if we've had this kind of header before.
- -- if so, get appropriate level. if not, add to list.
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- return $ Header level (normalizeSpaces txt)
-
--- a header with line on the bottom only
-singleHeader :: GenParser Char ParserState Block
-singleHeader = try $ do
- notFollowedBy' whitespace
- txt <- many1 (do {notFollowedBy blankline; inline})
- pos <- getPosition
- let len = (sourceColumn pos) - 1
- blankline
- c <- oneOf underlineChars
- count (len - 1) (char c)
- many (char c)
- blanklines
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- return $ Header level (normalizeSpaces txt)
-
---
--- hrule block
---
-
-hrule :: GenParser Char st Block
-hrule = try $ do
- chr <- oneOf underlineChars
- count 3 (char chr)
- skipMany (char chr)
- blankline
- blanklines
- return HorizontalRule
-
---
--- code blocks
---
-
--- read a line indented by a given string
-indentedLine :: String -> GenParser Char st [Char]
-indentedLine indents = try $ do
- string indents
- result <- manyTill anyChar newline
- return $ result ++ "\n"
-
--- two or more indented lines, possibly separated by blank lines.
--- any amount of indentation will work.
-indentedBlock :: GenParser Char st [Char]
-indentedBlock = do
- indents <- lookAhead $ many1 (oneOf " \t")
- lns <- many $ choice $ [ indentedLine indents,
- try $ do b <- blanklines
- l <- indentedLine indents
- return (b ++ l) ]
- optional blanklines
- return $ concat lns
-
-codeBlock :: GenParser Char st Block
-codeBlock = try $ do
- codeBlockStart
- result <- indentedBlock
- return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result
-
-lhsCodeBlock :: GenParser Char ParserState Block
-lhsCodeBlock = try $ do
- failUnlessLHS
- pos <- getPosition
- when (sourceColumn pos /= 1) $ fail "Not in first column"
- lns <- many1 birdTrackLine
- -- if (as is normal) there is always a space after >, drop it
- let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
- else lns
- blanklines
- return $ CodeBlock ("", ["sourceCode", "haskell"], []) $ intercalate "\n" lns'
-
-birdTrackLine :: GenParser Char st [Char]
-birdTrackLine = do
- char '>'
- manyTill anyChar newline
-
---
--- raw html
---
-
-rawHtmlBlock :: GenParser Char st Block
-rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
- indentedBlock >>= return . RawHtml
-
---
--- raw latex
---
-
-rawLaTeXBlock :: GenParser Char st Block
-rawLaTeXBlock = try $ do
- string ".. raw:: latex"
- blanklines
- result <- indentedBlock
- return $ Para [(TeX result)]
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = do
- raw <- indentedBlock
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n\n"
- return $ BlockQuote contents
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-definitionListItem :: GenParser Char ParserState ([Inline], [Block])
-definitionListItem = try $ do
- -- avoid capturing a directive or comment
- notFollowedBy (try $ char '.' >> char '.')
- term <- many1Till inline endline
- raw <- indentedBlock
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n\n"
- return (normalizeSpaces term, contents)
-
-definitionList :: GenParser Char ParserState Block
-definitionList = many1 definitionListItem >>= return . DefinitionList
-
--- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: GenParser Char st Int
-bulletListStart = try $ do
- notFollowedBy' hrule -- because hrules start out just like lists
- marker <- oneOf bulletListMarkers
- white <- many1 spaceChar
- return $ length (marker:white)
-
--- parses ordered list start and returns its length (inc following whitespace)
-orderedListStart :: ListNumberStyle
- -> ListNumberDelim
- -> GenParser Char st Int
-orderedListStart style delim = try $ do
- (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
- white <- many1 spaceChar
- return $ markerLen + length white
-
--- parse a line of a list item
-listLine :: Int -> GenParser Char ParserState [Char]
-listLine markerLength = try $ do
- notFollowedBy blankline
- indentWith markerLength
- line <- manyTill anyChar newline
- return $ line ++ "\n"
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Int -> GenParser Char ParserState [Char]
-indentWith num = do
- state <- getState
- let tabStop = stateTabStop state
- if (num < tabStop)
- then count num (char ' ')
- else choice [ try (count num (char ' ')),
- (try (char '\t' >> count (num - tabStop) (char ' '))) ]
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState Int
- -> GenParser Char ParserState (Int, [Char])
-rawListItem start = try $ do
- markerLength <- start
- firstLine <- manyTill anyChar newline
- restLines <- many (listLine markerLength)
- return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
-
--- continuation of a list item - indented and separated by blankline or
--- (in compact lists) endline.
--- Note: nested lists are parsed as continuations.
-listContinuation :: Int -> GenParser Char ParserState [Char]
-listContinuation markerLength = try $ do
- blanks <- many1 blankline
- result <- many1 (listLine markerLength)
- return $ blanks ++ concat result
-
-listItem :: GenParser Char ParserState Int
- -> GenParser Char ParserState [Block]
-listItem start = try $ do
- (markerLength, first) <- rawListItem start
- rest <- many (listContinuation markerLength)
- blanks <- choice [ try (many blankline >>~ lookAhead start),
- many1 blankline ] -- whole list must end with blank.
- -- parsing with ListItemState forces markers at beginning of lines to
- -- count as list item markers, even if not separated by blank space.
- -- see definition of "endline"
- state <- getState
- let oldContext = stateParserContext state
- setState $ state {stateParserContext = ListItemState}
- -- parse the extracted block, which may itself contain block elements
- parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
- updateState (\st -> st {stateParserContext = oldContext})
- return parsed
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
- (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
- items <- many1 (listItem (orderedListStart style delim))
- let items' = compactify items
- return $ OrderedList (start, style, delim) items'
-
-bulletList :: GenParser Char ParserState Block
-bulletList = many1 (listItem bulletListStart) >>=
- return . BulletList . compactify
-
---
--- unknown directive (e.g. comment)
---
-
-unknownDirective :: GenParser Char st Block
-unknownDirective = try $ do
- string ".."
- notFollowedBy (noneOf " \t\n")
- manyTill anyChar newline
- many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline)
- return Null
-
---
--- reference key
---
-
-quotedReferenceName :: GenParser Char ParserState [Inline]
-quotedReferenceName = try $ do
- char '`' >> notFollowedBy (char '`') -- `` means inline code!
- label' <- many1Till inline (char '`')
- return label'
-
-unquotedReferenceName :: GenParser Char ParserState [Inline]
-unquotedReferenceName = try $ do
- label' <- many1Till inline (lookAhead $ char ':')
- return label'
-
-isolated :: Char -> GenParser Char st Char
-isolated ch = try $ char ch >>~ notFollowedBy (char ch)
-
-simpleReferenceName :: GenParser Char st [Inline]
-simpleReferenceName = do
- raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|>
- (try $ char '_' >>~ lookAhead alphaNum))
- return [Str raw]
-
-referenceName :: GenParser Char ParserState [Inline]
-referenceName = quotedReferenceName <|>
- (try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
- unquotedReferenceName
-
-referenceKey :: GenParser Char ParserState [Char]
-referenceKey = do
- startPos <- getPosition
- key <- choice [imageKey, anonymousKey, regularKey]
- st <- getState
- let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = key : oldkeys }
- optional blanklines
- endPos <- getPosition
- -- return enough blanks to replace key
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-targetURI :: GenParser Char st [Char]
-targetURI = do
- skipSpaces
- optional newline
- contents <- many1 (try (many spaceChar >> newline >>
- many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
- blanklines
- return contents
-
-imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
-imageKey = try $ do
- string ".. |"
- ref <- manyTill inline (char '|')
- skipSpaces
- string "image::"
- src <- targetURI
- return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
-anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
-anonymousKey = try $ do
- oneOfStrings [".. __:", "__"]
- src <- targetURI
- return ([Str "_"], (removeLeadingTrailingSpace src, ""))
-
-regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
-regularKey = try $ do
- string ".. _"
- ref <- referenceName
- char ':'
- src <- targetURI
- return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
-
- --
- -- inline
- --
-
-inline :: GenParser Char ParserState Inline
-inline = choice [ link
- , str
- , whitespace
- , endline
- , strong
- , emph
- , code
- , image
- , hyphens
- , superscript
- , subscript
- , escapedChar
- , symbol ] <?> "inline"
-
-hyphens :: GenParser Char ParserState Inline
-hyphens = do
- result <- many1 (char '-')
- option Space endline
- -- don't want to treat endline after hyphen or dash as a space
- return $ Str result
-
-escapedChar :: GenParser Char st Inline
-escapedChar = escaped anyChar
-
-symbol :: GenParser Char ParserState Inline
-symbol = do
- result <- oneOf specialChars
- return $ Str [result]
-
--- parses inline code, between codeStart and codeEnd
-code :: GenParser Char ParserState Inline
-code = try $ do
- string "``"
- result <- manyTill anyChar (try (string "``"))
- return $ Code $ removeLeadingTrailingSpace $ intercalate " " $ lines result
-
-emph :: GenParser Char ParserState Inline
-emph = enclosed (char '*') (char '*') inline >>=
- return . Emph . normalizeSpaces
-
-strong :: GenParser Char ParserState Inline
-strong = enclosed (string "**") (try $ string "**") inline >>=
- return . Strong . normalizeSpaces
-
-interpreted :: [Char] -> GenParser Char st [Inline]
-interpreted role = try $ do
- optional $ try $ string "\\ "
- result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
- try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
- return [Str result]
-
-superscript :: GenParser Char ParserState Inline
-superscript = interpreted "sup" >>= (return . Superscript)
-
-subscript :: GenParser Char ParserState Inline
-subscript = interpreted "sub" >>= (return . Subscript)
-
-whitespace :: GenParser Char ParserState Inline
-whitespace = many1 spaceChar >> return Space <?> "whitespace"
-
-str :: GenParser Char ParserState Inline
-str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
-
--- an endline character that can be treated as a space, not a structural break
-endline :: GenParser Char ParserState Inline
-endline = try $ do
- newline
- notFollowedBy blankline
- -- parse potential list-starts at beginning of line differently in a list:
- st <- getState
- if (stateParserContext st) == ListItemState
- then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
- notFollowedBy' bulletListStart
- else return ()
- return Space
-
---
--- links
---
-
-link :: GenParser Char ParserState Inline
-link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-
-explicitLink :: GenParser Char ParserState Inline
-explicitLink = try $ do
- char '`'
- notFollowedBy (char '`') -- `` marks start of inline code
- label' <- manyTill (notFollowedBy (char '`') >> inline)
- (try (spaces >> char '<'))
- src <- manyTill (noneOf ">\n ") (char '>')
- skipSpaces
- string "`_"
- return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "")
-
-referenceLink :: GenParser Char ParserState Inline
-referenceLink = try $ do
- label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
- key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link
- state <- getState
- let keyTable = stateKeys state
- src <- case lookupKeySrc keyTable key of
- Nothing -> fail "no corresponding key"
- Just target -> return target
- -- if anonymous link, remove first anon key so it won't be used again
- let keyTable' = if (key == [Str "_"]) -- anonymous link?
- then delete ([Str "_"], src) keyTable -- remove first anon key
- else keyTable
- setState $ state { stateKeys = keyTable' }
- return $ Link (normalizeSpaces label') src
-
-autoURI :: GenParser Char ParserState Inline
-autoURI = do
- src <- uri
- return $ Link [Str src] (src, "")
-
-autoEmail :: GenParser Char ParserState Inline
-autoEmail = do
- src <- emailAddress
- return $ Link [Str src] ("mailto:" ++ src, "")
-
-autoLink :: GenParser Char ParserState Inline
-autoLink = autoURI <|> autoEmail
-
--- For now, we assume that all substitution references are for images.
-image :: GenParser Char ParserState Inline
-image = try $ do
- char '|'
- ref <- manyTill inline (char '|')
- state <- getState
- let keyTable = stateKeys state
- src <- case lookupKeySrc keyTable ref of
- Nothing -> fail "no corresponding key"
- Just target -> return target
- return $ Image (normalizeSpaces ref) src
-
diff --git a/Text/Pandoc/Readers/TeXMath.hs b/Text/Pandoc/Readers/TeXMath.hs
deleted file mode 100644
index 04b0f3b8f..000000000
--- a/Text/Pandoc/Readers/TeXMath.hs
+++ /dev/null
@@ -1,233 +0,0 @@
-{-
-Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.TeXMath
- Copyright : Copyright (C) 2007 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of TeX math to a list of 'Pandoc' inline elements.
--}
-module Text.Pandoc.Readers.TeXMath (
- readTeXMath
- ) where
-
-import Text.ParserCombinators.Parsec
-import Text.Pandoc.Definition
-
--- | Converts a string of raw TeX math to a list of 'Pandoc' inlines.
-readTeXMath :: String -> [Inline]
-readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of
- Left _ -> [Str inp] -- if unparseable, just include original
- Right res -> res
-
-teXMath :: GenParser Char st [Inline]
-teXMath = manyTill mathPart eof >>= return . concat
-
-mathPart :: GenParser Char st [Inline]
-mathPart = whitespace <|> superscript <|> subscript <|> symbol <|>
- argument <|> digits <|> letters <|> misc
-
-whitespace :: GenParser Char st [Inline]
-whitespace = many1 space >> return []
-
-symbol :: GenParser Char st [Inline]
-symbol = try $ do
- char '\\'
- res <- many1 letter
- case lookup res teXsymbols of
- Just m -> return [Str m]
- Nothing -> return [Str $ "\\" ++ res]
-
-argument :: GenParser Char st [Inline]
-argument = try $ do
- char '{'
- res <- many mathPart
- char '}'
- return $ if null res
- then [Str " "]
- else [Str "{"] ++ concat res ++ [Str "}"]
-
-digits :: GenParser Char st [Inline]
-digits = do
- res <- many1 digit
- return [Str res]
-
-letters :: GenParser Char st [Inline]
-letters = do
- res <- many1 letter
- return [Emph [Str res]]
-
-misc :: GenParser Char st [Inline]
-misc = do
- res <- noneOf "}"
- return [Str [res]]
-
-scriptArg :: GenParser Char st [Inline]
-scriptArg = try $ do
- (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r}))
- <|> symbol
- <|> (do{c <- (letter <|> digit); return [Str [c]]})
-
-superscript :: GenParser Char st [Inline]
-superscript = try $ do
- char '^'
- arg <- scriptArg
- return [Superscript arg]
-
-subscript :: GenParser Char st [Inline]
-subscript = try $ do
- char '_'
- arg <- scriptArg
- return [Subscript arg]
-
-withThinSpace :: String -> String
-withThinSpace str = "\x2009" ++ str ++ "\x2009"
-
-teXsymbols :: [(String, String)]
-teXsymbols =
- [("alpha","\x3B1")
- ,("beta", "\x3B2")
- ,("chi", "\x3C7")
- ,("delta", "\x3B4")
- ,("Delta", "\x394")
- ,("epsilon", "\x3B5")
- ,("varepsilon", "\x25B")
- ,("eta", "\x3B7")
- ,("gamma", "\x3B3")
- ,("Gamma", "\x393")
- ,("iota", "\x3B9")
- ,("kappa", "\x3BA")
- ,("lambda", "\x3BB")
- ,("Lambda", "\x39B")
- ,("mu", "\x3BC")
- ,("nu", "\x3BD")
- ,("omega", "\x3C9")
- ,("Omega", "\x3A9")
- ,("phi", "\x3C6")
- ,("varphi", "\x3D5")
- ,("Phi", "\x3A6")
- ,("pi", "\x3C0")
- ,("Pi", "\x3A0")
- ,("psi", "\x3C8")
- ,("Psi", "\x3A8")
- ,("rho", "\x3C1")
- ,("sigma", "\x3C3")
- ,("Sigma", "\x3A3")
- ,("tau", "\x3C4")
- ,("theta", "\x3B8")
- ,("vartheta", "\x3D1")
- ,("Theta", "\x398")
- ,("upsilon", "\x3C5")
- ,("xi", "\x3BE")
- ,("Xi", "\x39E")
- ,("zeta", "\x3B6")
- ,("ne", "\x2260")
- ,("lt", withThinSpace "<")
- ,("le", withThinSpace "\x2264")
- ,("leq", withThinSpace "\x2264")
- ,("ge", withThinSpace "\x2265")
- ,("geq", withThinSpace "\x2265")
- ,("prec", withThinSpace "\x227A")
- ,("succ", withThinSpace "\x227B")
- ,("preceq", withThinSpace "\x2AAF")
- ,("succeq", withThinSpace "\x2AB0")
- ,("in", withThinSpace "\x2208")
- ,("notin", withThinSpace "\x2209")
- ,("subset", withThinSpace "\x2282")
- ,("supset", withThinSpace "\x2283")
- ,("subseteq", withThinSpace "\x2286")
- ,("supseteq", withThinSpace "\x2287")
- ,("equiv", withThinSpace "\x2261")
- ,("cong", withThinSpace "\x2245")
- ,("approx", withThinSpace "\x2248")
- ,("propto", withThinSpace "\x221D")
- ,("cdot", withThinSpace "\x22C5")
- ,("star", withThinSpace "\x22C6")
- ,("backslash", "\\")
- ,("times", withThinSpace "\x00D7")
- ,("divide", withThinSpace "\x00F7")
- ,("circ", withThinSpace "\x2218")
- ,("oplus", withThinSpace "\x2295")
- ,("otimes", withThinSpace "\x2297")
- ,("odot", withThinSpace "\x2299")
- ,("sum", "\x2211")
- ,("prod", "\x220F")
- ,("wedge", withThinSpace "\x2227")
- ,("bigwedge", withThinSpace "\x22C0")
- ,("vee", withThinSpace "\x2228")
- ,("bigvee", withThinSpace "\x22C1")
- ,("cap", withThinSpace "\x2229")
- ,("bigcap", withThinSpace "\x22C2")
- ,("cup", withThinSpace "\x222A")
- ,("bigcup", withThinSpace "\x22C3")
- ,("neg", "\x00AC")
- ,("implies", withThinSpace "\x21D2")
- ,("iff", withThinSpace "\x21D4")
- ,("forall", "\x2200")
- ,("exists", "\x2203")
- ,("bot", "\x22A5")
- ,("top", "\x22A4")
- ,("vdash", "\x22A2")
- ,("models", withThinSpace "\x22A8")
- ,("uparrow", "\x2191")
- ,("downarrow", "\x2193")
- ,("rightarrow", withThinSpace "\x2192")
- ,("to", withThinSpace "\x2192")
- ,("rightarrowtail", "\x21A3")
- ,("twoheadrightarrow", withThinSpace "\x21A0")
- ,("twoheadrightarrowtail", withThinSpace "\x2916")
- ,("mapsto", withThinSpace "\x21A6")
- ,("leftarrow", withThinSpace "\x2190")
- ,("leftrightarrow", withThinSpace "\x2194")
- ,("Rightarrow", withThinSpace "\x21D2")
- ,("Leftarrow", withThinSpace "\x21D0")
- ,("Leftrightarrow", withThinSpace "\x21D4")
- ,("partial", "\x2202")
- ,("nabla", "\x2207")
- ,("pm", "\x00B1")
- ,("emptyset", "\x2205")
- ,("infty", "\x221E")
- ,("aleph", "\x2135")
- ,("ldots", "...")
- ,("therefore", "\x2234")
- ,("angle", "\x2220")
- ,("quad", "\x00A0\x00A0")
- ,("cdots", "\x22EF")
- ,("vdots", "\x22EE")
- ,("ddots", "\x22F1")
- ,("diamond", "\x22C4")
- ,("Box", "\x25A1")
- ,("lfloor", "\x230A")
- ,("rfloor", "\x230B")
- ,("lceiling", "\x2308")
- ,("rceiling", "\x2309")
- ,("langle", "\x2329")
- ,("rangle", "\x232A")
- ,("{", "{")
- ,("}", "}")
- ,("[", "[")
- ,("]", "]")
- ,("|", "|")
- ,("||", "||")
- ]
-