diff options
| author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-11-03 22:14:03 +0000 | 
|---|---|---|
| committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-11-03 22:14:03 +0000 | 
| commit | 4a841bfc5464907adea4cdd655485565565b40ae (patch) | |
| tree | 36c0a21e3639614c8d25b5fb1909c32d0ab11dcd /src/Text | |
| parent | 3116d30133196e1bb258f7e74e03d4a85f3b21ae (diff) | |
| download | pandoc-4a841bfc5464907adea4cdd655485565565b40ae.tar.gz | |
Use template haskell to avoid the need for templates:
+ Added library Text.Pandoc.Include, with a template haskell
  function $(includeStrFrom fname) to include a file as a string
  constant at compile time.
+ This removes the need for the 'templates' directory or Makefile
  target.  These have been removed.
+ The base source directory has been changed from src to .
+ A new 'data' directory has been added, containing the ASCIIMathML.js
  script, writer headers, and S5 files.
+ The src/wrappers directory has been moved to 'wrappers'.
+ The Text.Pandoc.ASCIIMathML library is no longer needed, since
  Text.Pandoc.Writers.HTML can use includeStrFrom to include the
  ASCIIMathML.js code directly.  It has been removed.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1063 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc.hs | 110 | ||||
| -rw-r--r-- | src/Text/Pandoc/Blocks.hs | 145 | ||||
| -rw-r--r-- | src/Text/Pandoc/CharacterReferences.hs | 327 | ||||
| -rw-r--r-- | src/Text/Pandoc/Definition.hs | 116 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 496 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 651 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 909 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 640 | ||||
| -rw-r--r-- | src/Text/Pandoc/Shared.hs | 792 | ||||
| -rw-r--r-- | src/Text/Pandoc/UTF8.hs | 45 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/ConTeXt.hs | 248 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 299 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 458 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 310 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 293 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 373 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 325 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/RTF.hs | 286 | 
18 files changed, 0 insertions, 6823 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs deleted file mode 100644 index 7633bf7ef..000000000 --- a/src/Text/Pandoc.hs +++ /dev/null @@ -1,110 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha  -   Portability : portable - -This helper module exports the main writers, readers, and data -structure definitions from the Pandoc libraries. - -A typical application will chain together a reader and a writer -to convert strings from one format to another.  For example, the -following simple program will act as a filter converting markdown -fragments to reStructuredText, using reference-style links instead of -inline links: - -> module Main where -> import Text.Pandoc ->  -> markdownToRST :: String -> String -> markdownToRST = toUTF8 . ->   (writeRST defaultWriterOptions {writerReferenceLinks = True}) . ->   (readMarkdown defaultParserState) .  fromUTF8 ->  -> main = interact markdownToRST - --} - -module Text.Pandoc -               (  -               -- * Definitions -               module Text.Pandoc.Definition -               -- * Readers: converting /to/ Pandoc format -               , readMarkdown -               , readRST -               , readLaTeX -               , readHtml -               -- * Parser state used in readers -               , ParserState (..) -               , defaultParserState -               , ParserContext (..) -               , QuoteContext (..) -               , KeyTable -               , NoteTable -               , HeaderType (..) -               -- * Writers: converting /from/ Pandoc format -               , writeMarkdown -               , writeRST -               , writeLaTeX -               , writeConTeXt -               , writeHtml -               , writeHtmlString -               , writeS5 -               , writeS5String -               , writeDocbook -               , writeMan -               , writeRTF -               , prettyPandoc -               -- * Writer options used in writers  -               , WriterOptions (..) -               , defaultWriterOptions -               -- * Default headers for various output formats -               , module Text.Pandoc.Writers.DefaultHeaders -               -- * Functions for converting to and from UTF-8 -               , module Text.Pandoc.UTF8 -               -- * Version -               , pandocVersion -             ) where - -import Text.Pandoc.Definition -import Text.Pandoc.Readers.Markdown -import Text.Pandoc.Readers.RST -import Text.Pandoc.Readers.LaTeX -import Text.Pandoc.Readers.HTML -import Text.Pandoc.Writers.Markdown -import Text.Pandoc.Writers.RST  -import Text.Pandoc.Writers.LaTeX -import Text.Pandoc.Writers.ConTeXt -import Text.Pandoc.Writers.HTML -import Text.Pandoc.Writers.S5 -import Text.Pandoc.Writers.Docbook -import Text.Pandoc.Writers.Man -import Text.Pandoc.Writers.RTF  -import Text.Pandoc.Writers.DefaultHeaders -import Text.Pandoc.UTF8 -import Text.Pandoc.Shared - --- | Version number of pandoc library. -pandocVersion :: String -pandocVersion = "0.45" diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs deleted file mode 100644 index cfc22cb3e..000000000 --- a/src/Text/Pandoc/Blocks.hs +++ /dev/null @@ -1,145 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Blocks -   Copyright   : Copyright (C) 2007 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Functions for the manipulation of fixed-width blocks of text. -These are used in the construction of plain-text tables. --} - -module Text.Pandoc.Blocks -               (  -                TextBlock (..), -                docToBlock, -                blockToDoc, -                widthOfBlock, -                heightOfBlock, -                hcatBlocks, -                hsepBlocks, -                centerAlignBlock, -                leftAlignBlock, -                rightAlignBlock -               ) -where -import Text.PrettyPrint -import Data.List ( intersperse ) - --- | A fixed-width block of text.  Parameters are width of block, --- height of block, and list of lines. -data TextBlock = TextBlock Int Int [String] -instance Show TextBlock where -  show x = show $ blockToDoc x - --- | Break lines in a list of lines so that none are greater than --- a given width. -breakLines :: Int      -- ^ Maximum length of lines. -           -> [String] -- ^ List of lines. -           -> [String] -breakLines width [] = [] -breakLines width (l:ls) = -  if length l > width -     then (take width l):(breakLines width ((drop width l):ls)) -     else l:(breakLines width ls) - --- | Convert a @Doc@ element into a @TextBlock@ with a specified width.  -docToBlock :: Int  -- ^ Width of text block. -           -> Doc  -- ^ @Doc@ to convert. -           -> TextBlock -docToBlock width doc = -  let rendered    = renderStyle (style {lineLength = width,  -                                        ribbonsPerLine = 1}) doc -      lns         = breakLines width $ lines rendered -  in  TextBlock width (length lns) lns - --- | Convert a @TextBlock@ to a @Doc@ element. -blockToDoc :: TextBlock -> Doc -blockToDoc (TextBlock _ _ lns) =  -  if null lns -     then empty -     else vcat $ map text lns - --- | Returns width of a @TextBlock@ (number of columns). -widthOfBlock :: TextBlock -> Int -widthOfBlock (TextBlock width _ _) = width - --- | Returns height of a @TextBlock@ (number of rows). -heightOfBlock :: TextBlock -> Int -heightOfBlock (TextBlock _ height _) = height - --- | Pads a string out to a given width using spaces. -hPad :: Int     -- ^ Desired width. -     -> String  -- ^ String to pad. -     -> String -hPad width line =  -  let lineLength = length line -  in  if lineLength <= width  -         then line ++ replicate (width - lineLength) ' ' -         else take width line - --- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in --- which they appear side by side. -hcatBlocks :: [TextBlock] -> TextBlock -hcatBlocks [] = TextBlock 0 0 [] -hcatBlocks [x] = x -- This is not redundant!  We don't want last item hPad'd. -hcatBlocks ((TextBlock width1 height1 lns1):xs) =  -  let (TextBlock width2 height2 lns2) = hcatBlocks xs -      height = max height1 height2 -      width  = width1 + width2 -      lns1'  = map (hPad width1) $ lns1 ++ replicate (height - height1) "" -      lns2'  = lns2 ++ replicate (height - height2) "" -      lns    = zipWith (++) lns1' lns2' -   in TextBlock width height lns  - --- | Like @hcatBlocks@, but inserts space between the @TextBlock@s. -hsepBlocks :: [TextBlock] -> TextBlock -hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "])) - -isWhitespace x = x `elem` " \t" - --- | Left-aligns the contents of a @TextBlock@ within the block. -leftAlignBlock :: TextBlock -> TextBlock -leftAlignBlock (TextBlock width height lns) = -  TextBlock width height $ map (dropWhile isWhitespace) lns - --- | Right-aligns the contents of a @TextBlock@ within the block. -rightAlignBlock :: TextBlock -> TextBlock -rightAlignBlock (TextBlock width height lns) = -  let rightAlignLine ln =  -        let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln -        in  reverse (rest ++ spaces) -  in  TextBlock width height $ map rightAlignLine lns - --- | Centers the contents of a @TextBlock@ within the block. -centerAlignBlock :: TextBlock -> TextBlock -centerAlignBlock (TextBlock width height lns) =  -  let centerAlignLine ln = -        let ln' = hPad width ln -            (startSpaces, rest) = span isWhitespace ln' -            endSpaces = takeWhile isWhitespace (reverse ln') -            numSpaces = length (startSpaces ++ endSpaces) -            startSpaces' = replicate (quot numSpaces 2) ' ' -        in  startSpaces' ++ rest  -  in  TextBlock width height $ map centerAlignLine lns - diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs deleted file mode 100644 index 466f5d8f4..000000000 --- a/src/Text/Pandoc/CharacterReferences.hs +++ /dev/null @@ -1,327 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.CharacterReferences -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Functions for parsing character references. --} -module Text.Pandoc.CharacterReferences ( -                     characterReference, -                     decodeCharacterReferences, -                    ) where -import Data.Char ( chr ) -import Text.ParserCombinators.Parsec -import qualified Data.Map as Map - --- | Parse character entity. -characterReference :: GenParser Char st Char -characterReference = try $ do -  st <- char '&' -  character <- numRef <|> entity -  end <- char ';' -  return character   - -numRef :: GenParser Char st Char -numRef = do -  char '#' -  num <- hexNum <|> decNum -  return $ chr $ num  - -hexNum :: GenParser Char st Int  -hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++) - -decNum :: GenParser Char st Int  -decNum = many1 digit >>= return . read - -entity :: GenParser Char st Char -entity = do -  body <- many1 alphaNum -  return $ Map.findWithDefault '?' body entityTable - --- | Convert entities in a string to characters. -decodeCharacterReferences :: String -> String -decodeCharacterReferences str =  -  case parse (many (characterReference <|> anyChar)) str str of -	Left err        -> error $ "\nError: " ++ show err -	Right result    -> result - -entityTable :: Map.Map String Char -entityTable = Map.fromList entityTableList - -entityTableList :: [(String, Char)] -entityTableList =  [ -	("quot", chr 34), -	("amp", chr 38), -	("lt", chr 60), -	("gt", chr 62), -	("nbsp", chr 160), -	("iexcl", chr 161), -	("cent", chr 162), -	("pound", chr 163), -	("curren", chr 164), -	("yen", chr 165), -	("brvbar", chr 166), -	("sect", chr 167), -	("uml", chr 168), -	("copy", chr 169), -	("ordf", chr 170), -	("laquo", chr 171), -	("not", chr 172), -	("shy", chr 173), -	("reg", chr 174), -	("macr", chr 175), -	("deg", chr 176), -	("plusmn", chr 177), -	("sup2", chr 178), -	("sup3", chr 179), -	("acute", chr 180), -	("micro", chr 181), -	("para", chr 182), -	("middot", chr 183), -	("cedil", chr 184), -	("sup1", chr 185), -	("ordm", chr 186), -	("raquo", chr 187), -	("frac14", chr 188), -	("frac12", chr 189), -	("frac34", chr 190), -	("iquest", chr 191), -	("Agrave", chr 192), -	("Aacute", chr 193), -	("Acirc", chr 194), -	("Atilde", chr 195), -	("Auml", chr 196), -	("Aring", chr 197), -	("AElig", chr 198), -	("Ccedil", chr 199), -	("Egrave", chr 200), -	("Eacute", chr 201), -	("Ecirc", chr 202), -	("Euml", chr 203), -	("Igrave", chr 204), -	("Iacute", chr 205), -	("Icirc", chr 206), -	("Iuml", chr 207), -	("ETH", chr 208), -	("Ntilde", chr 209), -	("Ograve", chr 210), -	("Oacute", chr 211), -	("Ocirc", chr 212), -	("Otilde", chr 213), -	("Ouml", chr 214), -	("times", chr 215), -	("Oslash", chr 216), -	("Ugrave", chr 217), -	("Uacute", chr 218), -	("Ucirc", chr 219), -	("Uuml", chr 220), -	("Yacute", chr 221), -	("THORN", chr 222), -	("szlig", chr 223), -	("agrave", chr 224), -	("aacute", chr 225), -	("acirc", chr 226), -	("atilde", chr 227), -	("auml", chr 228), -	("aring", chr 229), -	("aelig", chr 230), -	("ccedil", chr 231), -	("egrave", chr 232), -	("eacute", chr 233), -	("ecirc", chr 234), -	("euml", chr 235), -	("igrave", chr 236), -	("iacute", chr 237), -	("icirc", chr 238), -	("iuml", chr 239), -	("eth", chr 240), -	("ntilde", chr 241), -	("ograve", chr 242), -	("oacute", chr 243), -	("ocirc", chr 244), -	("otilde", chr 245), -	("ouml", chr 246), -	("divide", chr 247), -	("oslash", chr 248), -	("ugrave", chr 249), -	("uacute", chr 250), -	("ucirc", chr 251), -	("uuml", chr 252), -	("yacute", chr 253), -	("thorn", chr 254), -	("yuml", chr 255), -	("OElig", chr 338), -	("oelig", chr 339), -	("Scaron", chr 352), -	("scaron", chr 353), -	("Yuml", chr 376), -	("fnof", chr 402), -	("circ", chr 710), -	("tilde", chr 732), -	("Alpha", chr 913), -	("Beta", chr 914), -	("Gamma", chr 915), -	("Delta", chr 916), -	("Epsilon", chr 917), -	("Zeta", chr 918), -	("Eta", chr 919), -	("Theta", chr 920), -	("Iota", chr 921), -	("Kappa", chr 922), -	("Lambda", chr 923), -	("Mu", chr 924), -	("Nu", chr 925), -	("Xi", chr 926), -	("Omicron", chr 927), -	("Pi", chr 928), -	("Rho", chr 929), -	("Sigma", chr 931), -	("Tau", chr 932), -	("Upsilon", chr 933), -	("Phi", chr 934), -	("Chi", chr 935), -	("Psi", chr 936), -	("Omega", chr 937), -	("alpha", chr 945), -	("beta", chr 946), -	("gamma", chr 947), -	("delta", chr 948), -	("epsilon", chr 949), -	("zeta", chr 950), -	("eta", chr 951), -	("theta", chr 952), -	("iota", chr 953), -	("kappa", chr 954), -	("lambda", chr 955), -	("mu", chr 956), -	("nu", chr 957), -	("xi", chr 958), -	("omicron", chr 959), -	("pi", chr 960), -	("rho", chr 961), -	("sigmaf", chr 962), -	("sigma", chr 963), -	("tau", chr 964), -	("upsilon", chr 965), -	("phi", chr 966), -	("chi", chr 967), -	("psi", chr 968), -	("omega", chr 969), -	("thetasym", chr 977), -	("upsih", chr 978), -	("piv", chr 982), -	("ensp", chr 8194), -	("emsp", chr 8195), -	("thinsp", chr 8201), -	("zwnj", chr 8204), -	("zwj", chr 8205), -	("lrm", chr 8206), -	("rlm", chr 8207), -	("ndash", chr 8211), -	("mdash", chr 8212), -	("lsquo", chr 8216), -	("rsquo", chr 8217), -	("sbquo", chr 8218), -	("ldquo", chr 8220), -	("rdquo", chr 8221), -	("bdquo", chr 8222), -	("dagger", chr 8224), -	("Dagger", chr 8225), -	("bull", chr 8226), -	("hellip", chr 8230), -	("permil", chr 8240), -	("prime", chr 8242), -	("Prime", chr 8243), -	("lsaquo", chr 8249), -	("rsaquo", chr 8250), -	("oline", chr 8254), -	("frasl", chr 8260), -	("euro", chr 8364), -	("image", chr 8465), -	("weierp", chr 8472), -	("real", chr 8476), -	("trade", chr 8482), -	("alefsym", chr 8501), -	("larr", chr 8592), -	("uarr", chr 8593), -	("rarr", chr 8594), -	("darr", chr 8595), -	("harr", chr 8596), -	("crarr", chr 8629), -	("lArr", chr 8656), -	("uArr", chr 8657), -	("rArr", chr 8658), -	("dArr", chr 8659), -	("hArr", chr 8660), -	("forall", chr 8704), -	("part", chr 8706), -	("exist", chr 8707), -	("empty", chr 8709), -	("nabla", chr 8711), -	("isin", chr 8712), -	("notin", chr 8713), -	("ni", chr 8715), -	("prod", chr 8719), -	("sum", chr 8721), -	("minus", chr 8722), -	("lowast", chr 8727), -	("radic", chr 8730), -	("prop", chr 8733), -	("infin", chr 8734), -	("ang", chr 8736), -	("and", chr 8743), -	("or", chr 8744), -	("cap", chr 8745), -	("cup", chr 8746), -	("int", chr 8747), -	("there4", chr 8756), -	("sim", chr 8764), -	("cong", chr 8773), -	("asymp", chr 8776), -	("ne", chr 8800), -	("equiv", chr 8801), -	("le", chr 8804), -	("ge", chr 8805), -	("sub", chr 8834), -	("sup", chr 8835), -	("nsub", chr 8836), -	("sube", chr 8838), -	("supe", chr 8839), -	("oplus", chr 8853), -	("otimes", chr 8855), -	("perp", chr 8869), -	("sdot", chr 8901), -	("lceil", chr 8968), -	("rceil", chr 8969), -	("lfloor", chr 8970), -	("rfloor", chr 8971), -	("lang", chr 9001), -	("rang", chr 9002), -	("loz", chr 9674), -	("spades", chr 9824), -	("clubs", chr 9827), -	("hearts", chr 9829), -	("diams", chr 9830) -	] diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs deleted file mode 100644 index 7d1125c5a..000000000 --- a/src/Text/Pandoc/Definition.hs +++ /dev/null @@ -1,116 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Definition -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Definition of 'Pandoc' data structure for format-neutral representation -of documents. --} -module Text.Pandoc.Definition where - -data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show) - --- | Bibliographic information for the document:  title (list of 'Inline'), --- authors (list of strings), date (string). -data Meta = Meta [Inline] -- title -                 [String] -- authors -                 String   -- date -            deriving (Eq, Show, Read) - --- | Alignment of a table column. -data Alignment = AlignLeft  -               | AlignRight  -               | AlignCenter  -               | AlignDefault deriving (Eq, Show, Read) - --- | List attributes. -type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) - --- | Style of list numbers. -data ListNumberStyle = DefaultStyle -                     | Decimal  -                     | LowerRoman  -                     | UpperRoman -                     | LowerAlpha  -                     | UpperAlpha deriving (Eq, Show, Read) - --- | Delimiter of list numbers. -data ListNumberDelim = DefaultDelim -                     | Period -                     | OneParen  -                     | TwoParens deriving (Eq, Show, Read) -                    --- | Block element. -data Block   -    = Plain [Inline]        -- ^ Plain text, not a paragraph -    | Para [Inline]         -- ^ Paragraph -    | CodeBlock String      -- ^ Code block (literal) -    | RawHtml String        -- ^ Raw HTML block (literal) -    | BlockQuote [Block]    -- ^ Block quote (list of blocks) -    | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes -                            -- and a list of items, each a list of blocks) -    | BulletList [[Block]]  -- ^ Bullet list (list of items, each -                            -- a list of blocks) -    | DefinitionList [([Inline],[Block])]  -- ^ Definition list  -                            -- (list of items, each a pair of an inline list, -                            -- the term, and a block list) -    | Header Int [Inline]   -- ^ Header - level (integer) and text (inlines)  -    | HorizontalRule        -- ^ Horizontal rule -    | Table [Inline] [Alignment] [Float] [[Block]] [[[Block]]]  -- ^ Table, -                            -- with caption, column alignments, -                            -- relative column widths, column headers -                            -- (each a list of blocks), and rows -                            -- (each a list of lists of blocks) -    | Null                  -- ^ Nothing -    deriving (Eq, Read, Show) - --- | Type of quotation marks to use in Quoted inline. -data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read) - -type Target = (String, String)  -- ^ Link target (URL, title) - --- | Inline elements. -data Inline  -    = Str String            -- ^ Text (string) -    | Emph [Inline]         -- ^ Emphasized text (list of inlines) -    | Strong [Inline]       -- ^ Strongly emphasized text (list of inlines) -    | Strikeout [Inline]    -- ^ Strikeout text (list of inlines) -    | Superscript [Inline]  -- ^ Superscripted text (list of inlines) -    | Subscript [Inline]    -- ^ Subscripted text (list of inlines) -    | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) -    | Code String           -- ^ Inline code (literal) -    | Space                 -- ^ Inter-word space -    | EmDash                -- ^ Em dash -    | EnDash                -- ^ En dash -    | Apostrophe            -- ^ Apostrophe -    | Ellipses              -- ^ Ellipses -    | LineBreak             -- ^ Hard line break -    | TeX String            -- ^ LaTeX code (literal) -    | HtmlInline String     -- ^ HTML code (literal) -    | Link [Inline] Target  -- ^ Hyperlink: text (list of inlines), target -    | Image [Inline] Target -- ^ Image:  alt text (list of inlines), target -                            -- and target -    | Note [Block]          -- ^ Footnote or endnote  -    deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs deleted file mode 100644 index 70a071152..000000000 --- a/src/Text/Pandoc/Readers/HTML.hs +++ /dev/null @@ -1,496 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Readers.HTML -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha  -   Portability : portable - -Conversion of HTML to 'Pandoc' document. --} -module Text.Pandoc.Readers.HTML (  -                                 readHtml,  -                                 rawHtmlInline,  -                                 rawHtmlBlock,  -                                 anyHtmlBlockTag,  -                                 anyHtmlInlineTag,   -                                 anyHtmlTag, -                                 anyHtmlEndTag, -                                 htmlEndTag, -                                 extractTagType, -                                 htmlBlockElement  -                                ) where - -import Text.ParserCombinators.Parsec -import Text.Pandoc.Definition -import Text.Pandoc.Shared  -import Text.Pandoc.CharacterReferences ( characterReference,  -                                         decodeCharacterReferences ) -import Data.Maybe ( fromMaybe ) -import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf ) -import Data.Char ( toUpper, toLower, isAlphaNum ) - --- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: ParserState   -- ^ Parser state -         -> String        -- ^ String to parse -         -> Pandoc -readHtml = readWith parseHtml - --- --- Constants --- - -eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", -                  "map", "area", "object", "script"] - -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", -                  "br", "cite", "code", "dfn", "em", "font", "i", "img", -                  "input", "kbd", "label", "q", "s", "samp", "select", -                  "small", "span", "strike", "strong", "sub", "sup", -                  "textarea", "tt", "u", "var"] ++ eitherBlockOrInline - -blockHtmlTags = ["address", "blockquote", "center", "dir", "div", -                 "dl", "fieldset", "form", "h1", "h2", "h3", "h4", -                 "h5", "h6", "hr", "isindex", "menu", "noframes", -                 "noscript", "ol", "p", "pre", "table", "ul", "dd", -                 "dt", "frameset", "li", "tbody", "td", "tfoot", -                 "th", "thead", "tr"] ++ eitherBlockOrInline - --- --- HTML utility functions --- - --- | Read blocks until end tag. -blocksTilEnd tag = do -  blocks <- manyTill (block >>~ spaces) (htmlEndTag tag) -  return $ filter (/= Null) blocks - --- | Read inlines until end tag. -inlinesTilEnd tag = manyTill inline (htmlEndTag tag) - --- | Parse blocks between open and close tag. -blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag - --- | Parse inlines between open and close tag. -inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag - --- | Extract type from a tag:  e.g. @br@ from @\<br\>@ -extractTagType :: String -> String -extractTagType ('<':rest) =  -  let isSpaceOrSlash c = c `elem` "/ \n\t" in -  map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest -extractTagType _ = "" - --- | Parse any HTML tag (opening or self-closing) and return text of tag -anyHtmlTag = try $ do -  char '<' -  spaces -  tag <- many1 alphaNum -  attribs <- many htmlAttribute -  spaces -  ender <- option "" (string "/") -  let ender' = if null ender then "" else " /" -  spaces -  char '>' -  return $ "<" ++ tag ++  -           concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" - -anyHtmlEndTag = try $ do -  char '<'    -  spaces -  char '/' -  spaces -  tagType <- many1 alphaNum -  spaces -  char '>' -  return $ "</" ++ tagType ++ ">" - -htmlTag :: String -> GenParser Char st (String, [(String, String)]) -htmlTag tag = try $ do -  char '<' -  spaces -  stringAnyCase tag -  attribs <- many htmlAttribute -  spaces -  optional (string "/") -  spaces -  char '>' -  return (tag, (map (\(name, content, raw) -> (name, content)) attribs)) - --- parses a quoted html attribute value -quoted quoteChar = do -  result <- between (char quoteChar) (char quoteChar)  -                    (many (noneOf [quoteChar])) -  return (result, [quoteChar]) - -htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute - --- minimized boolean attribute -htmlMinimizedAttribute = try $ do -  many1 space -  name <- many1 (choice [letter, oneOf ".-_:"]) -  return (name, name, name) - -htmlRegularAttribute = try $ do -  many1 space -  name <- many1 (choice [letter, oneOf ".-_:"]) -  spaces -  char '=' -  spaces -  (content, quoteStr) <- choice [ (quoted '\''),  -                                  (quoted '"'),  -                                  (do -                                     a <- many (alphaNum <|> (oneOf "-._:")) -                                     return (a,"")) ] -  return (name, content, -          (name ++ "=" ++ quoteStr ++ content ++ quoteStr)) - --- | Parse an end tag of type 'tag' -htmlEndTag tag = try $ do -  char '<'    -  spaces -  char '/' -  spaces -  stringAnyCase tag -  spaces -  char '>' -  return $ "</" ++ tag ++ ">" - --- | Returns @True@ if the tag is (or can be) an inline tag. -isInline tag = (extractTagType tag) `elem` inlineHtmlTags - --- | Returns @True@ if the tag is (or can be) a block tag. -isBlock tag = (extractTagType tag) `elem` blockHtmlTags  - -anyHtmlBlockTag = try $ do -  tag <- anyHtmlTag <|> anyHtmlEndTag -  if isBlock tag then return tag else fail "inline tag" - -anyHtmlInlineTag = try $ do -  tag <- anyHtmlTag <|> anyHtmlEndTag -  if isInline tag then return tag else fail "not an inline tag" - --- | Parses material between script tags. --- Scripts must be treated differently, because they can contain '<>' etc. -htmlScript = try $ do -  open <- string "<script" -  rest <- manyTill anyChar (htmlEndTag "script") -  return $ open ++ rest ++ "</script>" - -htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ] - -rawHtmlBlock = try $ do -  notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") -  body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag -  sp <- many space -  state <- getState -  if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null - --- | Parses an HTML comment. -htmlComment = try $ do -  string "<!--" -  comment <- manyTill anyChar (try (string "-->")) -  return $ "<!--" ++ comment ++ "-->" - --- --- parsing documents --- - -xmlDec = try $ do -  string "<?" -  rest <- manyTill anyChar (char '>') -  return $ "<?" ++ rest ++ ">" - -definition = try $ do -  string "<!" -  rest <- manyTill anyChar (char '>') -  return $ "<!" ++ rest ++ ">" - -nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >> -  ((rawHtmlBlock >> return ' ') <|> anyChar) - -parseTitle = try $ do -  (tag, _) <- htmlTag "title" -  contents <- inlinesTilEnd tag -  spaces -  return contents - --- parse header and return meta-information (for now, just title) -parseHead = try $ do -  htmlTag "head" -  spaces -  skipMany nonTitleNonHead -  contents <- option [] parseTitle -  skipMany nonTitleNonHead -  htmlTag "/head" -  return (contents, [], "") - -skipHtmlTag tag = optional (htmlTag tag) - --- h1 class="title" representation of title in body -bodyTitle = try $ do -  (tag, attribs) <- htmlTag "h1"   -  cl <- case (extractAttribute "class" attribs) of -          Just "title" -> return "" -          otherwise    -> fail "not title" -  inlinesTilEnd "h1" - -parseHtml = do -  sepEndBy (choice [xmlDec, definition, htmlComment]) spaces -  skipHtmlTag "html" -  spaces -  (title, authors, date) <- option ([], [], "") parseHead  -  spaces -  skipHtmlTag "body" -  spaces -  optional bodyTitle  -- skip title in body, because it's represented in meta -  blocks <- parseBlocks -  spaces -  optional (htmlEndTag "body") -  spaces -  optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html> -  eof -  return $ Pandoc (Meta title authors date) blocks - --- --- parsing blocks --- - -parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null)) - -block = choice [ codeBlock -               , header -               , hrule -               , list -               , blockQuote -               , para -               , plain -               , rawHtmlBlock ] <?> "block" - --- --- header blocks --- - -header = choice (map headerLevel (enumFromTo 1 5)) <?> "header" - -headerLevel n = try $ do -    let level = "h" ++ show n -    (tag, attribs) <- htmlTag level -    contents <- inlinesTilEnd level -    return $ Header n (normalizeSpaces contents) - --- --- hrule block --- - -hrule = try  $ do -  (tag, attribs) <- htmlTag "hr" -  state <- getState -  if not (null attribs) && stateParseRaw state -     then unexpected "attributes in hr" -- parse as raw in this case -     else return HorizontalRule - --- --- code blocks --- - --- Note:  HTML tags in code blocks (e.g. for syntax highlighting) are  --- skipped, because they are not portable to output formats other than HTML. -codeBlock = try $ do -    htmlTag "pre"  -    result <- manyTill  -              (many1 (satisfy (/= '<')) <|>  -               ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) -              (htmlEndTag "pre") -    let result' = concat result -    -- drop leading newline if any -    let result'' = if "\n" `isPrefixOf` result' -                      then drop 1 result' -                      else result' -    -- drop trailing newline if any -    let result''' = if "\n" `isSuffixOf` result'' -                       then init result'' -                       else result'' -    return $ CodeBlock $ decodeCharacterReferences result''' - --- --- block quotes --- - -blockQuote = try $ htmlTag "blockquote" >> spaces >>  -                   blocksTilEnd "blockquote" >>= (return . BlockQuote) - --- --- list blocks --- - -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -orderedList = try $ do -  (_, attribs) <- htmlTag "ol" -  (start, style) <- option (1, DefaultStyle) $ -                           do failIfStrict -                              let sta = fromMaybe "1" $  -                                        lookup "start" attribs -                              let sty = fromMaybe (fromMaybe "" $ -                                        lookup "style" attribs) $ -                                        lookup "class" attribs -                              let sty' = case sty of -                                          "lower-roman"  -> LowerRoman -                                          "upper-roman"  -> UpperRoman -                                          "lower-alpha"  -> LowerAlpha -                                          "upper-alpha"  -> UpperAlpha -                                          "decimal"      -> Decimal -                                          _              -> DefaultStyle -                              return (read sta, sty') -  spaces -  items <- sepEndBy1 (blocksIn "li") spaces -  htmlEndTag "ol" -  return $ OrderedList (start, style, DefaultDelim) items - -bulletList = try $ do -  htmlTag "ul" -  spaces -  items <- sepEndBy1 (blocksIn "li") spaces -  htmlEndTag "ul" -  return $ BulletList items - -definitionList = try $ do -  failIfStrict  -- def lists not part of standard markdown -  tag <- htmlTag "dl" -  spaces -  items <- sepEndBy1 definitionListItem spaces -  htmlEndTag "dl" -  return $ DefinitionList items - -definitionListItem = try $ do -  terms <- sepEndBy1 (inlinesIn "dt") spaces -  defs <- sepEndBy1 (blocksIn "dd") spaces -  let term = joinWithSep [LineBreak] terms -  return (term, concat defs) - --- --- paragraph block --- - -para = try $ htmlTag "p" >> inlinesTilEnd "p" >>=  -             return . Para . normalizeSpaces - ---  --- plain block --- - -plain = many1 inline >>= return . Plain . normalizeSpaces - ---  --- inline --- - -inline = choice [ charRef -                , strong -                , emph -                , superscript -                , subscript -                , strikeout -                , spanStrikeout -                , code -                , str -                , linebreak -                , whitespace -                , link -                , image -                , rawHtmlInline -                ] <?> "inline" - -code = try $ do  -  htmlTag "code" -  result <- manyTill anyChar (htmlEndTag "code") -  -- remove internal line breaks, leading and trailing space, -  -- and decode character references -  return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $  -                  joinWithSep " " $ lines result  - -rawHtmlInline = do -  result <- htmlScript <|> htmlComment <|> anyHtmlInlineTag -  state <- getState -  if stateParseRaw state then return (HtmlInline result) else return (Str "") - -betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=  -                        return . normalizeSpaces - -emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph - -strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong - -superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript - -subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript - -strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= -            return . Strikeout - -spanStrikeout = try $ do -  failIfStrict -- strict markdown has no strikeout, so treat as raw HTML -  (tag, attributes) <- htmlTag "span"  -  result <- case (extractAttribute "class" attributes) of -              Just "strikeout" -> inlinesTilEnd "span" -              _                -> fail "not a strikeout" -  return $ Strikeout result - -whitespace = many1 space >> return Space - --- hard line break -linebreak = htmlTag "br" >> optional newline >> return LineBreak - -str = many1 (noneOf "<& \t\n") >>= return . Str - --- --- links and images --- - --- extract contents of attribute (attribute names are case-insensitive) -extractAttribute name [] = Nothing -extractAttribute name ((attrName, contents):rest) =  -  let name'     = map toLower name  -      attrName' = map toLower attrName -  in  if attrName' == name' -         then Just (decodeCharacterReferences contents) -         else extractAttribute name rest - -link = try $ do -  (tag, attributes) <- htmlTag "a"   -  url <- case (extractAttribute "href" attributes) of -           Just url -> return url -           Nothing  -> fail "no href" -  let title = fromMaybe "" $ extractAttribute "title" attributes -  label <- inlinesTilEnd "a" -  return $ Link (normalizeSpaces label) (url, title) - -image = try $ do -  (tag, attributes) <- htmlTag "img"  -  url <- case (extractAttribute "src" attributes) of -           Just url -> return url -           Nothing  -> fail "no src" -  let title = fromMaybe "" $ extractAttribute "title" attributes -  let alt = fromMaybe "" (extractAttribute "alt" attributes) -  return $ Image [Str alt] (url, title) - diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs deleted file mode 100644 index 37cc2bfe4..000000000 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ /dev/null @@ -1,651 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Readers.LaTeX -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Conversion of LaTeX to 'Pandoc' document. --} -module Text.Pandoc.Readers.LaTeX (  -                                  readLaTeX, -                                  rawLaTeXInline, -                                  rawLaTeXEnvironment -                                 ) where - -import Text.ParserCombinators.Parsec -import Text.Pandoc.Definition -import Text.Pandoc.Shared  -import Data.Maybe ( fromMaybe ) -import Data.Char ( chr ) -import Data.List ( isPrefixOf, isSuffixOf ) - --- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: ParserState   -- ^ Parser state, including options for parser -          -> String        -- ^ String to parse -          -> Pandoc -readLaTeX = readWith parseLaTeX - --- characters with special meaning -specialChars = "\\`$%^&_~#{}\n \t|<>'\"-" - --- --- utility functions --- - --- | Returns text between brackets and its matching pair. -bracketedText openB closeB = do -  result <- charsInBalanced' openB closeB -  return $ [openB] ++ result ++ [closeB] - --- | Returns an option or argument of a LaTeX command. -optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']' - --- | True if the string begins with '{'. -isArg ('{':rest) = True -isArg other = False - --- | Returns list of options and arguments of a LaTeX command. -commandArgs = many optOrArg - --- | Parses LaTeX command, returns (name, star, list of options or arguments). -command = do -  char '\\' -  name <- many1 letter -  star <- option "" (string "*")  -- some commands have starred versions -  args <- commandArgs -  return (name, star, args) - -begin name = try $ do -  string $ "\\begin{" ++ name ++ "}" -  optional commandArgs -  spaces -  return name - -end name = try $ do -  string $ "\\end{" ++ name ++ "}" -  spaces -  return name - --- | Returns a list of block elements containing the contents of an --- environment. -environment name = try $ begin name >> spaces >> manyTill block (end name) - -anyEnvironment =  try $ do -  string "\\begin{" -  name <- many letter -  star <- option "" (string "*") -- some environments have starred variants -  char '}' -  optional commandArgs -  spaces -  contents <- manyTill block (end (name ++ star)) -  return $ BlockQuote contents - --- --- parsing documents --- - --- | Process LaTeX preamble, extracting metadata. -processLaTeXPreamble = try $ manyTill  -  (choice [bibliographic, comment, unknownCommand, nullBlock])  -  (try (string "\\begin{document}")) >>  -  spaces - --- | Parse LaTeX and return 'Pandoc'. -parseLaTeX = do -  optional processLaTeXPreamble -- preamble might not be present (fragment) -  spaces -  blocks <- parseBlocks -  spaces -  optional $ try (string "\\end{document}" >> many anyChar)  -  -- might not be present (fragment) -  spaces -  eof -  state <- getState -  let blocks' = filter (/= Null) blocks -  let title' = stateTitle state -  let authors' = stateAuthors state -  let date' = stateDate state -  return $ Pandoc (Meta title' authors' date')  blocks' - --- --- parsing blocks --- - -parseBlocks = spaces >> many block - -block = choice [ hrule -               , codeBlock -               , header -               , list -               , blockQuote -               , mathBlock -               , comment -               , bibliographic -               , para -               , specialEnvironment -               , itemBlock -               , unknownEnvironment -               , unknownCommand ] <?> "block" - --- --- header blocks --- - -header = try $ do -  char '\\' -  subs <- many (try (string "sub")) -  string "section" -  optional (char '*') -  char '{' -  title <- manyTill inline (char '}') -  spaces -  return $ Header (length subs + 1) (normalizeSpaces title) - --- --- hrule block --- - -hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",  -                       "\\newpage" ] >> spaces >> return HorizontalRule - --- --- code blocks --- - -codeBlock = codeBlock1 <|> codeBlock2 - -codeBlock1 = try $ do -  string "\\begin{verbatim}"  -- don't use begin function because it  -                              -- gobbles whitespace -  optional blanklines         -- we want to gobble blank lines, but not  -                              -- leading space -  contents <- manyTill anyChar (try (string "\\end{verbatim}")) -  spaces -  return $ CodeBlock (stripTrailingNewlines contents) - -codeBlock2 = try $ do -  string "\\begin{Verbatim}"  -- used by fancyvrb package -  option "" blanklines -  contents <- manyTill anyChar (try (string "\\end{Verbatim}")) -  spaces -  return $ CodeBlock (stripTrailingNewlines contents) - --- --- block quotes --- - -blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=  -             return . BlockQuote - --- --- math block --- - -mathBlock = mathBlockWith (begin "equation") (end "equation") <|>  -            mathBlockWith (begin "displaymath") (end "displaymath") <|> -            mathBlockWith (string "\\[") (string "\\]") <?> "math block" - -mathBlockWith start end = try $ do -  start -  spaces -  result <- manyTill anyChar end -  spaces -  return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]] - --- --- list blocks --- - -list = bulletList <|> orderedList <|> definitionList <?> "list" - -listItem = try $ do -  ("item", _, args) <- command -  spaces -  state <- getState -  let oldParserContext = stateParserContext state -  updateState (\state -> state {stateParserContext = ListItemState}) -  blocks <- many block -  updateState (\state -> state {stateParserContext = oldParserContext}) -  opt <- case args of -           ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->  -                       parseFromString (many inline) $ tail $ init x -           _        -> return [] -  return (opt, blocks) - -orderedList = try $ do -  string "\\begin{enumerate}" -  (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ -                              try $ do failIfStrict -                                       char '[' -                                       res <- anyOrderedListMarker -                                       char ']' -                                       return res -  spaces -  option "" $ try $ do string "\\setlength{\\itemindent}" -                       char '{' -                       manyTill anyChar (char '}') -  spaces -  start <- option 1 $ try $ do failIfStrict -                               string "\\setcounter{enum" -                               many1 (oneOf "iv") -                               string "}{" -                               num <- many1 digit -                               char '}'  -                               spaces -                               return $ (read num) + 1 -  items <- many listItem -  end "enumerate" -  spaces -  return $ OrderedList (start, style, delim) $ map snd items - -bulletList = try $ do -  begin "itemize" -  spaces -  items <- many listItem -  end "itemize" -  spaces -  return (BulletList $ map snd items) - -definitionList = try $ do -  begin "description" -  spaces -  items <- many listItem -  end "description" -  spaces -  return (DefinitionList items) - --- --- paragraph block --- - -para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces - --- --- title authors date --- - -bibliographic = choice [ maketitle, title, authors, date ] - -maketitle = try (string "\\maketitle") >> spaces >> return Null - -title = try $ do -  string "\\title{" -  tit <- manyTill inline (char '}') -  spaces -  updateState (\state -> state { stateTitle = tit }) -  return Null - -authors = try $ do -  string "\\author{" -  authors <- manyTill anyChar (char '}') -  spaces -  let authors' = map removeLeadingTrailingSpace $ lines $ -                 substitute "\\\\" "\n" authors -  updateState (\state -> state { stateAuthors = authors' }) -  return Null - -date = try $ do -  string "\\date{" -  date' <- manyTill anyChar (char '}') -  spaces -  updateState (\state -> state { stateDate = date' }) -  return Null - --- --- item block --- for use in unknown environments that aren't being parsed as raw latex --- - --- this forces items to be parsed in different blocks -itemBlock = try $ do -  ("item", _, args) <- command -  state <- getState -  if (stateParserContext state == ListItemState) -     then fail "item should be handled by list block" -     else if null args  -             then return Null -             else return $ Plain [Str (stripFirstAndLast (head args))] - --- --- raw LaTeX  --- - -specialEnvironment = do  -- these are always parsed as raw -  lookAhead (choice (map (\name -> begin name)  ["tabular", "figure", -              "tabbing", "eqnarry", "picture", "table", "verse", "theorem"])) -  rawLaTeXEnvironment - --- | Parse any LaTeX environment and return a Para block containing --- the whole literal environment as raw TeX. -rawLaTeXEnvironment :: GenParser Char st Block -rawLaTeXEnvironment = try $ do -  string "\\begin{" -  name <- many1 letter -  star <- option "" (string "*") -- for starred variants -  let name' = name ++ star -  char '}' -  args <- option [] commandArgs -  let argStr = concat args -  contents <- manyTill (choice [ (many1 (noneOf "\\")),  -                                 (do  -                                    (Para [TeX str]) <- rawLaTeXEnvironment -                                    return str), -                                 string "\\" ])  -                       (end name') -  spaces -  return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++  -                 concat contents ++ "\\end{" ++ name' ++ "}"] - -unknownEnvironment = try $ do -  state <- getState -  result <- if stateParseRaw state -- check whether we should include raw TeX  -               then rawLaTeXEnvironment -- if so, get whole raw environment -               else anyEnvironment      -- otherwise just the contents -  return result - -unknownCommand = try $ do -  notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",  -                                     "document"] -  (name, star, args) <- command -  spaces -  let argStr = concat args -  state <- getState -  if name == "item" && (stateParserContext state) == ListItemState -     then fail "should not be parsed as raw" -     else string "" -  if stateParseRaw state -     then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)] -     else return $ Plain [Str (joinWithSep " " args)] - --- latex comment -comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null - ---  --- inline --- - -inline =  choice [ str -                 , endline -                 , whitespace -                 , quoted -                 , apostrophe -                 , spacer -                 , strong -                 , math -                 , ellipses -                 , emDash -                 , enDash -                 , hyphen -                 , emph -                 , strikeout -                 , superscript -                 , subscript -                 , ref -                 , lab -                 , code -                 , url -                 , link -                 , image -                 , footnote -                 , linebreak -                 , accentedChar -                 , specialChar -                 , rawLaTeXInline -                 , escapedChar -                 , unescapedChar -                 ] <?> "inline" - -accentedChar = normalAccentedChar <|> specialAccentedChar - -normalAccentedChar = try $ do -  char '\\' -  accent <- oneOf "'`^\"~" -  character <- (try $ char '{' >> letter >>~ char '}') <|> letter -  let table = fromMaybe [] $ lookup character accentTable  -  let result = case lookup accent table of -                 Just num  -> chr num -                 Nothing   -> '?' -  return $ Str [result] - --- an association list of letters and association list of accents --- and decimal character numbers. -accentTable =  -  [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]), -    ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]), -    ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]), -    ('N', [('~', 209)]), -    ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]), -    ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]), -    ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]), -    ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]), -    ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]), -    ('n', [('~', 241)]), -    ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), -    ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] - -specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, -                               oslash, pound, euro, copyright, sect ] - -ccedil = try $ do -  char '\\' -  letter <- oneOfStrings ["cc", "cC"] -  let num = if letter == "cc" then 231 else 199 -  return $ Str [chr num] - -aring = try $ do -  char '\\' -  letter <- oneOfStrings ["aa", "AA"] -  let num = if letter == "aa" then 229 else 197 -  return $ Str [chr num] - -iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>  -       return (Str [chr 239]) - -icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >> -        return (Str [chr 238]) - -szlig = try (string "\\ss") >> return (Str [chr 223]) - -oslash = try $ do -  char '\\' -  letter <- choice [char 'o', char 'O'] -  let num = if letter == 'o' then 248 else 216 -  return $ Str [chr num] - -aelig = try $ do -  char '\\' -  letter <- oneOfStrings ["ae", "AE"] -  let num = if letter == "ae" then 230 else 198 -  return $ Str [chr num] - -pound = try (string "\\pounds") >> return (Str [chr 163]) - -euro = try (string "\\euro") >> return (Str [chr 8364]) - -copyright = try (string "\\copyright") >> return (Str [chr 169]) - -sect = try (string "\\S") >> return (Str [chr 167]) - -escapedChar = do -  result <- escaped (oneOf " $%&_#{}\n") -  return $ if result == Str "\n" then Str " " else result - --- ignore standalone, nonescaped special characters -unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "") - -specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ] - -backslash = try (string "\\textbackslash") >> return (Str "\\") - -tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~") - -caret = try (string "\\^{}") >> return (Str "^") - -bar = try (string "\\textbar") >> return (Str "\\") - -lt = try (string "\\textless") >> return (Str "<") - -gt = try (string "\\textgreater") >> return (Str ">") - -doubleQuote = char '"' >> return (Str "\"") - -code = code1 <|> code2 - -code1 = try $ do  -  string "\\verb" -  marker <- anyChar -  result <- manyTill anyChar (char marker) -  return $ Code $ removeLeadingTrailingSpace result - -code2 = try $ do -  string "\\texttt{" -  result <- manyTill (noneOf "\\\n~$%^&{}") (char '}') -  return $ Code result - -emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> -             manyTill inline (char '}') >>= return . Emph - -strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>= -                  return . Strikeout - -superscript = try $ string "\\textsuperscript{" >>  -                    manyTill inline (char '}') >>= return . Superscript - --- note: \textsubscript isn't a standard latex command, but we use --- a defined version in pandoc. -subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>= -                  return . Subscript - -apostrophe = char '\'' >> return Apostrophe - -quoted = doubleQuoted <|> singleQuoted - -singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>= -               return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>= -               return . Quoted DoubleQuote . normalizeSpaces - -singleQuoteStart = char '`' - -singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum - -doubleQuoteStart = string "``" - -doubleQuoteEnd = try $ string "''" - -ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >> -                 return Ellipses - -enDash = try (string "--") >> return EnDash - -emDash = try (string "---") >> return EmDash - -hyphen = char '-' >> return (Str "-") - -lab = try $ do -  string "\\label{" -  result <- manyTill anyChar (char '}') -  return $ Str $ "(" ++ result ++ ")" - -ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str - -strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= -         return . Strong - -whitespace = many1 (oneOf "~ \t") >> return Space - --- hard line break -linebreak = try (string "\\\\") >> return LineBreak - -spacer = try (string "\\,") >> return (Str "") - -str = many1 (noneOf specialChars) >>= return . Str - --- endline internal to paragraph -endline = try $ newline >> notFollowedBy blankline >> return Space - --- math -math = math1 <|> math2 <?> "math" - -math1 = try $ do -  char '$' -  result <- many (noneOf "$") -  char '$' -  return $ TeX ("$" ++ result ++ "$") - -math2 = try $ do -  string "\\(" -  result <- many (noneOf "$") -  string "\\)" -  return $ TeX ("$" ++ result ++ "$") - --- --- links and images --- - -url = try $ do -  string "\\url" -  url <- charsInBalanced '{' '}' -  return $ Link [Code url] (url, "") - -link = try $ do -  string "\\href{" -  url <- manyTill anyChar (char '}') -  char '{' -  label <- manyTill inline (char '}')  -  return $ Link (normalizeSpaces label) (url, "") - -image = try $ do -  ("includegraphics", _, args) <- command -  let args' = filter isArg args -- filter out options -  let src = if null args' then -              ("", "") -            else -              (stripFirstAndLast (head args'), "") -  return $ Image [Str "image"] src - -footnote = try $ do -  (name, _, (contents:[])) <- command -  if ((name == "footnote") || (name == "thanks")) -     then string "" -     else fail "not a footnote or thanks command" -  let contents' = stripFirstAndLast contents -  -- parse the extracted block, which may contain various block elements: -  rest <- getInput -  setInput $ contents' -  blocks <- parseBlocks -  setInput rest -  return $ Note blocks - --- | Parse any LaTeX command and return it in a raw TeX inline element. -rawLaTeXInline :: GenParser Char ParserState Inline -rawLaTeXInline = try $ do -  (name, star, args) <- command -  state <- getState -  if ((name == "begin") || (name == "end") || (name == "item")) -     then fail "not an inline command"  -     else string "" -  return $ TeX ("\\" ++ name ++ star ++ concat args) - diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs deleted file mode 100644 index df84c0ac7..000000000 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ /dev/null @@ -1,909 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Readers.Markdown -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Conversion of markdown-formatted plain text to 'Pandoc' document. --} -module Text.Pandoc.Readers.Markdown (  -                                     readMarkdown  -                                    ) where - -import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy ) -import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) -import Network.URI ( isURI ) -import Text.Pandoc.Definition -import Text.Pandoc.Shared  -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) -import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,  -                                  anyHtmlInlineTag, anyHtmlTag, -                                  anyHtmlEndTag, htmlEndTag, extractTagType, -                                  htmlBlockElement ) -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Text.ParserCombinators.Parsec - --- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ParserState -> String -> Pandoc -readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n") - --- --- Constants and data structure definitions --- - -spaceChars = " \t" -bulletListMarkers = "*+-" -hruleChars = "*-_" -setextHChars = "=-" - --- treat these as potentially non-text when parsing inline: -specialChars = "\\[]*_~`<>$!^-.&'\"" - --- --- auxiliary functions --- - -indentSpaces = try $ do -  state <- getState -  let tabStop = stateTabStop state -  try (count tabStop (char ' ')) <|>  -    (many (char ' ') >> string "\t") <?> "indentation" - -nonindentSpaces = do -  state <- getState -  let tabStop = stateTabStop state -  sps <- many (char ' ') -  if length sps < tabStop  -     then return sps -     else unexpected "indented line" - --- | Fail unless we're at beginning of a line. -failUnlessBeginningOfLine = do -  pos <- getPosition -  if sourceColumn pos == 1 then return () else fail "not beginning of line" - --- | Fail unless we're in "smart typography" mode. -failUnlessSmart = do -  state <- getState -  if stateSmart state then return () else fail "Smart typography feature" - --- | Parse an inline Str element with a given content. -inlineString str = try $ do  -  (Str res) <- inline  -  if res == str then return res else fail $ "unexpected Str content" - --- | Parse a sequence of inline elements between a string ---  @opener@ and a string @closer@, including inlines ---  between balanced pairs of @opener@ and a @closer@. -inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] -inlinesInBalanced opener closer = try $ do -  string opener -  result <- manyTill ( (do lookAhead (inlineString opener) -                           -- because it might be a link... -                           bal <- inlinesInBalanced opener closer  -                           return $ [Str opener] ++ bal ++ [Str closer]) -                       <|> (count 1 inline))  -                     (try (string closer)) -  return $ concat result - --- --- document structure --- - -titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline - -authorsLine = try $ do  -  char '%' -  skipSpaces -  authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") -  newline -  return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors - -dateLine = try $ do -  char '%' -  skipSpaces -  date <- many (noneOf "\n") -  newline -  return $ decodeCharacterReferences $ removeTrailingSpace date - -titleBlock = try $ do -  failIfStrict -  title <- option [] titleLine -  author <- option [] authorsLine -  date <- option "" dateLine -  optional blanklines -  return (title, author, date) - -parseMarkdown = do -  -- markdown allows raw HTML -  updateState (\state -> state { stateParseRaw = True }) -  startPos <- getPosition -  -- go through once just to get list of reference keys -  -- docMinusKeys is the raw document with blanks where the keys were... -  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=  -                  return . concat -  setInput docMinusKeys -  setPosition startPos -  st <- getState -  -- go through again for notes unless strict... -  if stateStrict st -     then return () -     else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=  -                              return . concat -             st <- getState -             let reversedNotes = stateNotes st -             updateState $ \st -> st { stateNotes = reverse reversedNotes } -             setInput docMinusNotes -             setPosition startPos -  -- now parse it for real... -  (title, author, date) <- option ([],[],"") titleBlock -  blocks <- parseBlocks  -  return $ Pandoc (Meta title author date) $ filter (/= Null) blocks - ---  --- initial pass for references and notes --- - -referenceKey = try $ do -  startPos <- getPosition -  nonindentSpaces -  label <- reference -  char ':' -  skipSpaces -  optional (char '<') -  src <- many (noneOf "> \n\t") -  optional (char '>') -  tit <- option "" referenceTitle -  blanklines -  endPos <- getPosition -  let newkey = (label, (removeTrailingSpace src,  tit)) -  st <- getState -  let oldkeys = stateKeys st -  updateState $ \st -> st { stateKeys = newkey : oldkeys } -  -- return blanks so line count isn't affected -  return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -referenceTitle = try $ do  -  (many1 spaceChar >> option '\n' newline) <|> newline -  skipSpaces -  tit <-    (charsInBalanced '(' ')' >>= return . unwords . words) -        <|> do delim <- char '\'' <|> char '"' -               manyTill anyChar (try (char delim >> skipSpaces >> -                                      notFollowedBy (noneOf ")\n"))) -  return $ decodeCharacterReferences tit - -noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']') - -rawLine = do -  notFollowedBy blankline -  notFollowedBy' noteMarker -  contents <- many1 nonEndline -  end <- option "" (newline >> optional indentSpaces >> return "\n")  -  return $ contents ++ end - -rawLines = many1 rawLine >>= return . concat - -noteBlock = try $ do -  startPos <- getPosition -  ref <- noteMarker -  char ':' -  optional blankline -  optional indentSpaces -  raw <- sepBy rawLines (try (blankline >> indentSpaces)) -  optional blanklines -  endPos <- getPosition -  -- parse the extracted text, which may contain various block elements: -  contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" -  let newnote = (ref, contents) -  st <- getState -  let oldnotes = stateNotes st -  updateState $ \st -> st { stateNotes = newnote : oldnotes } -  -- return blanks so line count isn't affected -  return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - --- --- parsing blocks --- - -parseBlocks = manyTill block eof - -block = choice [ header  -               , table -               , codeBlock -               , hrule -               , list -               , blockQuote -               , htmlBlock -               , rawLaTeXEnvironment' -               , para -               , plain -               , nullBlock ] <?> "block" - --- --- header blocks --- - -header = atxHeader <|> setextHeader <?> "header" - -atxHeader = try $ do -  level <- many1 (char '#') >>= return . length -  notFollowedBy (char '.' <|> char ')') -- this would be a list -  skipSpaces -  text <- manyTill inline atxClosing >>= return . normalizeSpaces -  return $ Header level text - -atxClosing = try $ skipMany (char '#') >> blanklines - -setextHeader = try $ do -  -- first, see if this block has any chance of being a setextHeader: -  lookAhead (anyLine >> oneOf setextHChars) -  text <- many1Till inline newline >>= return . normalizeSpaces -  level <- choice $ zipWith  -           (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev) -           setextHChars [1..(length setextHChars)] -  return $ Header level text - --- --- hrule block --- - -hrule = try $ do -  skipSpaces -  start <- oneOf hruleChars -  count 2 (skipSpaces >> char start) -  skipMany (skipSpaces >> char start) -  newline -  optional blanklines -  return HorizontalRule - --- --- code blocks --- - -indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") - -codeBlock = do -  contents <- many1 (indentedLine <|>  -                     try (do b <- blanklines -                             l <- indentedLine -                             return $ b ++ l)) -  optional blanklines -  return $ CodeBlock $ stripTrailingNewlines $ concat contents - --- --- block quotes --- - -emacsBoxQuote = try $ do -  failIfStrict -  string ",----" -  manyTill anyChar newline -  raw <- manyTill  -         (try (char '|' >> optional (char ' ') >> manyTill anyChar newline)) -         (try (string "`----")) -  blanklines -  return raw - -emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') - -emailBlockQuote = try $ do -  emailBlockQuoteStart -  raw <- sepBy (many (nonEndline <|>  -                      (try (endline >> notFollowedBy emailBlockQuoteStart >> -                       return '\n')))) -               (try (newline >> emailBlockQuoteStart)) -  newline <|> (eof >> return '\n') -  optional blanklines -  return raw - -blockQuote = do  -  raw <- emailBlockQuote <|> emacsBoxQuote -  -- parse the extracted block, which may contain various block elements: -  contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" -  return $ BlockQuote contents -  --- --- list blocks --- - -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -bulletListStart = try $ do -  optional newline -- if preceded by a Plain block in a list context -  nonindentSpaces -  notFollowedBy' hrule     -- because hrules start out just like lists -  oneOf bulletListMarkers -  spaceChar -  skipSpaces - -anyOrderedListStart = try $ do -  optional newline -- if preceded by a Plain block in a list context -  nonindentSpaces -  notFollowedBy $ string "p." >> spaceChar >> digit  -- page number -  state <- getState -  if stateStrict state -     then do many1 digit -             char '.' -             spaceChar -             return (1, DefaultStyle, DefaultDelim) -     else anyOrderedListMarker >>~ spaceChar - -orderedListStart style delim = try $ do -  optional newline -- if preceded by a Plain block in a list context -  nonindentSpaces -  state <- getState -  num <- if stateStrict state -            then do many1 digit -                    char '.' -                    return 1 -            else orderedListMarker style delim  -  if delim == Period && (style == UpperAlpha || (style == UpperRoman && -     num `elem` [1, 5, 10, 50, 100, 500, 1000])) -     then char '\t' <|> (spaceChar >> spaceChar) -     else spaceChar -  skipSpaces - --- parse a line of a list item (start = parser for beginning of list item) -listLine start = try $ do -  notFollowedBy' start -  notFollowedBy blankline -  notFollowedBy' (do indentSpaces -                     many (spaceChar) -                     bulletListStart <|> (anyOrderedListStart >> return ())) -  line <- manyTill anyChar newline -  return $ line ++ "\n" - --- parse raw text for one list item, excluding start marker and continuations -rawListItem start = try $ do -  start -  result <- many1 (listLine start) -  blanks <- many blankline -  return $ concat result ++ blanks - --- continuation of a list item - indented and separated by blankline  --- or (in compact lists) endline. --- note: nested lists are parsed as continuations -listContinuation start = try $ do -  lookAhead indentSpaces -  result <- many1 (listContinuationLine start) -  blanks <- many blankline -  return $ concat result ++ blanks - -listContinuationLine start = try $ do -  notFollowedBy blankline -  notFollowedBy' start -  optional indentSpaces -  result <- manyTill anyChar newline -  return $ result ++ "\n" - -listItem start = try $ do  -  first <- rawListItem start -  continuations <- many (listContinuation start) -  -- parsing with ListItemState forces markers at beginning of lines to -  -- count as list item markers, even if not separated by blank space. -  -- see definition of "endline" -  state <- getState -  let oldContext = stateParserContext state -  setState $ state {stateParserContext = ListItemState} -  -- parse the extracted block, which may contain various block elements: -  let raw = concat (first:continuations) -  contents <- parseFromString parseBlocks raw -  updateState (\st -> st {stateParserContext = oldContext}) -  return contents - -orderedList = try $ do -  (start, style, delim) <- lookAhead anyOrderedListStart -  items <- many1 (listItem (orderedListStart style delim)) -  return $ OrderedList (start, style, delim) $ compactify items - -bulletList = many1 (listItem bulletListStart) >>=  -             return . BulletList . compactify - --- definition lists - -definitionListItem = try $ do -  notFollowedBy blankline -  notFollowedBy' indentSpaces -  -- first, see if this has any chance of being a definition list: -  lookAhead (anyLine >> char ':') -  term <- manyTill inline newline -  raw <- many1 defRawBlock -  state <- getState -  let oldContext = stateParserContext state -  -- parse the extracted block, which may contain various block elements: -  contents <- parseFromString parseBlocks $ concat raw -  updateState (\st -> st {stateParserContext = oldContext}) -  return ((normalizeSpaces term), contents) - -defRawBlock = try $ do -  char ':' -  state <- getState -  let tabStop = stateTabStop state -  try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") -  firstline <- anyLine -  rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) -  trailing <- option "" blanklines -  return $ firstline ++ "\n" ++ unlines rawlines ++ trailing - -definitionList = do -  failIfStrict -  items <- many1 definitionListItem -  let (terms, defs) = unzip items -  let defs' = compactify defs -  let items' = zip terms defs' -  return $ DefinitionList items' - --- --- paragraph block --- - -para = try $ do  -  result <- many1 inline -  newline -  blanklines <|> do st <- getState -                    if stateStrict st -                       then lookAhead (blockQuote <|> header) >> return "" -                       else lookAhead emacsBoxQuote >> return "" -  return $ Para $ normalizeSpaces result - -plain = many1 inline >>= return . Plain . normalizeSpaces  - ---  --- raw html --- - -htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element" - -htmlBlock = do -  st <- getState -  if stateStrict st -    then try $ do failUnlessBeginningOfLine -                  first <- htmlElement -                  finalSpace <- many (oneOf spaceChars) -                  finalNewlines <- many newline -                  return $ RawHtml $ first ++ finalSpace ++ finalNewlines -    else rawHtmlBlocks - --- True if tag is self-closing -isSelfClosing tag =  -  isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag - -strictHtmlBlock = try $ do -  tag <- anyHtmlBlockTag  -  let tag' = extractTagType tag -  if isSelfClosing tag || tag' == "hr"  -     then return tag -     else do contents <- many (notFollowedBy' (htmlEndTag tag') >>  -                               (htmlElement <|> (count 1 anyChar))) -             end <- htmlEndTag tag' -             return $ tag ++ concat contents ++ end - -rawHtmlBlocks = do -  htmlBlocks <- many1 rawHtmlBlock     -  let combined = concatMap (\(RawHtml str) -> str) htmlBlocks -  let combined' = if not (null combined) && last combined == '\n' -                     then init combined  -- strip extra newline  -                     else combined  -  return $ RawHtml combined' - --- --- LaTeX --- - -rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment - --- --- Tables ---  - --- Parse a dashed line with optional trailing spaces; return its length --- and the length including trailing space. -dashedLine ch = do -  dashes <- many1 (char ch) -  sp     <- many spaceChar -  return $ (length dashes, length $ dashes ++ sp) - --- Parse a table header with dashed lines of '-' preceded by  --- one line of text. -simpleTableHeader = try $ do -  rawContent  <- anyLine -  initSp      <- nonindentSpaces -  dashes      <- many1 (dashedLine '-') -  newline -  let (lengths, lines) = unzip dashes -  let indices  = scanl (+) (length initSp) lines -  let rawHeads = tail $ splitByIndices (init indices) rawContent -  let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths -  return (rawHeads, aligns, indices) - --- Parse a table footer - dashed lines followed by blank line. -tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines - --- Parse a table separator - dashed line. -tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" - --- Parse a raw line and split it into chunks by indices. -rawTableLine indices = do -  notFollowedBy' (blanklines <|> tableFooter) -  line <- many1Till anyChar newline -  return $ map removeLeadingTrailingSpace $ tail $  -           splitByIndices (init indices) line - --- Parse a table line and return a list of lists of blocks (columns). -tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) - --- Parse a multiline table row and return a list of blocks (columns). -multilineRow indices = do -  colLines <- many1 (rawTableLine indices) -  optional blanklines -  let cols = map unlines $ transpose colLines -  mapM (parseFromString (many plain)) cols - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int     -- Number of columns on terminal -                  -> [Int]   -- Indices -                  -> [Float] -- Fractional relative sizes of columns -widthsFromIndices _ [] = []   -widthsFromIndices numColumns indices =  -  let lengths = zipWith (-) indices (0:indices) -      totLength = sum lengths -      quotient = if totLength > numColumns -                   then fromIntegral totLength -                   else fromIntegral numColumns -      fracs = map (\l -> (fromIntegral l) / quotient) lengths in -  tail fracs - --- Parses a table caption:  inlines beginning with 'Table:' --- and followed by blank lines. -tableCaption = try $ do -  nonindentSpaces -  string "Table:" -  result <- many1 inline -  blanklines -  return $ normalizeSpaces result - --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith headerParser lineParser footerParser = try $ do -    (rawHeads, aligns, indices) <- headerParser -    lines <- many1Till (lineParser indices) footerParser -    caption <- option [] tableCaption -    heads <- mapM (parseFromString (many plain)) rawHeads -    state <- getState -    let numColumns = stateColumns state -    let widths = widthsFromIndices numColumns indices -    return $ Table caption aligns widths heads lines - --- Parse a simple table with '---' header and one line per row. -simpleTable = tableWith simpleTableHeader tableLine blanklines - --- Parse a multiline table:  starts with row of '-' on top, then header --- (which may be multiline), then the rows, --- which may be multiline, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). -multilineTable = tableWith multilineTableHeader multilineRow tableFooter - -multilineTableHeader = try $ do -  tableSep  -  rawContent  <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) -  initSp      <- nonindentSpaces -  dashes      <- many1 (dashedLine '-') -  newline -  let (lengths, lines) = unzip dashes -  let indices  = scanl (+) (length initSp) lines -  let rawHeadsList = transpose $ map  -                     (\ln -> tail $ splitByIndices (init indices) ln) -                     rawContent -  let rawHeads = map (joinWithSep " ") rawHeadsList -  let aligns   = zipWith alignType rawHeadsList lengths -  return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) - --- Returns an alignment type for a table, based on a list of strings --- (the rows of the column header) and a number (the length of the --- dashed line under the rows. -alignType :: [String] -> Int -> Alignment -alignType []  len = AlignDefault -alignType strLst len = -  let str        = head $ sortBy (comparing length) $  -                          map removeTrailingSpace strLst -      leftSpace  = if null str then False else (str !! 0) `elem` " \t" -      rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" -  in  case (leftSpace, rightSpace) of -        (True,  False)   -> AlignRight -        (False, True)    -> AlignLeft -        (True,  True)    -> AlignCenter -        (False, False)   -> AlignDefault - -table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table" - ---  --- inline --- - -inline = choice [ str -                , smartPunctuation -                , whitespace -                , endline -                , code -                , charRef -                , strong -                , emph -                , note -                , inlineNote -                , link -                , image -                , math -                , strikeout -                , superscript -                , subscript -                , autoLink -                , rawHtmlInline' -                , rawLaTeXInline' -                , escapedChar -                , symbol -                , ltSign ] <?> "inline" - -escapedChar = do -  char '\\' -  state <- getState -  result <- option '\\' $ if stateStrict state  -                             then oneOf "\\`*_{}[]()>#+-.!~" -                             else satisfy (not . isAlphaNum) -  return $ Str [result] - -ltSign = do -  st <- getState -  if stateStrict st -     then char '<' -     else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html -  return $ Str ['<'] - -specialCharsMinusLt = filter (/= '<') specialChars - -symbol = do  -  result <- oneOf specialCharsMinusLt -  return $ Str [result] - --- parses inline code, between n `s and n `s -code = try $ do  -  starts <- many1 (char '`') -  skipSpaces -  result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> -                       (char '\n' >> return " "))  -                      (try (skipSpaces >> count (length starts) (char '`') >>  -                      notFollowedBy (char '`'))) -  return $ Code $ removeLeadingTrailingSpace $ concat result - -mathWord = many1 ((noneOf " \t\n\\$") <|> -                  (try (char '\\') >>~ notFollowedBy (char '$'))) - -math = try $ do -  failIfStrict -  char '$' -  notFollowedBy space -  words <- sepBy1 mathWord (many1 space) -  char '$' -  return $ TeX ("$" ++ (joinWithSep " " words) ++ "$") - -emph = ((enclosed (char '*') (char '*') inline) <|> -        (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=  -        return . Emph . normalizeSpaces - -strong = ((enclosed (string "**") (try $ string "**") inline) <|>  -          (enclosed (string "__") (try $ string "__") inline)) >>= -         return . Strong . normalizeSpaces - -strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= -            return . Strikeout . normalizeSpaces - -superscript = failIfStrict >> enclosed (char '^') (char '^')  -              (notFollowedBy' whitespace >> inline) >>= -- may not contain Space -              return . Superscript - -subscript = failIfStrict >> enclosed (char '~') (char '~') -            (notFollowedBy' whitespace >> inline) >>=  -- may not contain Space -            return . Subscript  - -smartPunctuation = failUnlessSmart >>  -                   choice [ quoted, apostrophe, dash, ellipses ] - -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe - -quoted = doubleQuoted <|> singleQuoted  - -withQuoteContext context parser = do -  oldState <- getState -  let oldQuoteContext = stateQuoteContext oldState -  setState oldState { stateQuoteContext = context } -  result <- parser -  newState <- getState -  setState newState { stateQuoteContext = oldQuoteContext } -  return result - -singleQuoted = try $ do -  singleQuoteStart -  withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= -    return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted = try $ do  -  doubleQuoteStart -  withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= -    return . Quoted DoubleQuote . normalizeSpaces - -failIfInQuoteContext context = do -  st <- getState -  if stateQuoteContext st == context -     then fail "already inside quotes" -     else return () - -singleQuoteStart = do  -  failIfInQuoteContext InSingleQuote -  char '\8216' <|>  -     do char '\''   -        notFollowedBy (oneOf ")!],.;:-? \t\n") -        notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> -                            satisfy (not . isAlphaNum))) -- possess/contraction -        return '\'' - -singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum - -doubleQuoteStart = failIfInQuoteContext InDoubleQuote >> -                   (char '"' <|> char '\8220') >> -                   notFollowedBy (oneOf " \t\n") - -doubleQuoteEnd = char '"' <|> char '\8221' - -ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses - -dash = enDash <|> emDash - -enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash - -emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >> -               skipSpaces >> return EmDash - -whitespace = do -  sps <- many1 (oneOf spaceChars) -  if length sps >= 2 -     then option Space (endline >> return LineBreak) -     else return Space <?> "whitespace" - -nonEndline = satisfy (/='\n') - -strChar = noneOf (specialChars ++ spaceChars ++ "\n") - -str = many1 strChar >>= return . Str - --- an endline character that can be treated as a space, not a structural break -endline = try $ do -  newline -  notFollowedBy blankline -  st <- getState -  if stateStrict st  -    then do notFollowedBy emailBlockQuoteStart -            notFollowedBy (char '#')  -- atx header -    else return ()  -  -- parse potential list-starts differently if in a list: -  if stateParserContext st == ListItemState -     then notFollowedBy' (bulletListStart <|>  -                          (anyOrderedListStart >> return ())) -     else return () -  return Space - --- --- links --- - --- a reference label for a link -reference = notFollowedBy' (string "[^") >>  -- footnote reference -            inlinesInBalanced "[" "]" >>= (return . normalizeSpaces) - --- source for a link, with optional title -source = try $ do  -  char '(' -  optional (char '<') -  src <- many (noneOf ")> \t\n") -  optional (char '>') -  tit <- option "" linkTitle -  skipSpaces -  char ')' -  return (removeTrailingSpace src, tit) - -linkTitle = try $ do  -  (many1 spaceChar >> option '\n' newline) <|> newline -  skipSpaces -  delim <- char '\'' <|> char '"' -  tit <-   manyTill anyChar (try (char delim >> skipSpaces >> -                                  notFollowedBy (noneOf ")\n"))) -  return $ decodeCharacterReferences tit - -link = try $ do -  label <- reference -  src <- source <|> referenceLink label -  return $ Link label src - --- a link like [this][ref] or [this][] or [this] -referenceLink label = do -  ref <- option [] (try (optional (char ' ') >>  -                         optional (newline >> skipSpaces) >> reference)) -  let ref' = if null ref then label else ref -  state <- getState -  case lookupKeySrc (stateKeys state) ref' of -     Nothing     -> fail "no corresponding key"  -     Just target -> return target  - -emailAddress = try $ do -  name <- many1 (alphaNum <|> char '+') -  char '@' -  first <- many1 alphaNum -  rest <- many1 (char '.' >> many1 alphaNum) -  return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest) - -uri = try $ do -  str <- many1 (noneOf "\n\t >") -  if isURI str -     then return str -     else fail "not a URI" - -autoLink = try $ do -  char '<' -  src <- uri <|> emailAddress -  char '>' -  let src' = if "mailto:" `isPrefixOf` src -                then drop 7 src -                else src  -  st <- getState -  return $ if stateStrict st -              then Link [Str src'] (src, "") -              else Link [Code src'] (src, "") - -image = try $ do -  char '!' -  (Link label src) <- link -  return $ Image label src - -note = try $ do -  failIfStrict -  ref <- noteMarker -  state <- getState -  let notes = stateNotes state -  case lookup ref notes of -    Nothing       -> fail "note not found" -    Just contents -> return $ Note contents - -inlineNote = try $ do -  failIfStrict -  char '^' -  contents <- inlinesInBalanced "[" "]" -  return $ Note [Para contents] - -rawLaTeXInline' = failIfStrict >> rawLaTeXInline - -rawHtmlInline' = do -  st <- getState -  result <- choice $ if stateStrict st -                        then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]  -                        else [htmlBlockElement, anyHtmlInlineTag] -  return $ HtmlInline result - diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs deleted file mode 100644 index 1239eb688..000000000 --- a/src/Text/Pandoc/Readers/RST.hs +++ /dev/null @@ -1,640 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Readers.RST  -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Conversion from reStructuredText to 'Pandoc' document. --} -module Text.Pandoc.Readers.RST (  -                                readRST -                               ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared  -import Text.ParserCombinators.Parsec -import Data.List ( findIndex, delete ) - --- | Parse reStructuredText string and return Pandoc document. -readRST :: ParserState -> String -> Pandoc -readRST state str = (readWith parseRST) state (str ++ "\n\n") - --- --- Constants and data structure definitions ---- - -bulletListMarkers = "*+-" -underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~" - --- treat these as potentially non-text when parsing inline: -specialChars = "\\`|*_<>$:[-" - --- --- parsing documents --- - -isAnonKey (ref, src) = ref == [Str "_"] - -isHeader :: Int -> Block -> Bool -isHeader n (Header x _) = x == n -isHeader _ _ = False - --- | Promote all headers in a list of blocks.  (Part of --- title transformation for RST.) -promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level text):rest) =  -    (Header (level - num) text):(promoteHeaders num rest) -promoteHeaders num (other:rest) = other:(promoteHeaders num rest) -promoteHeaders num [] = [] - --- | If list of blocks starts with a header (or a header and subheader) --- of level that are not found elsewhere, return it as a title and --- promote all the other headers.  -titleTransform :: [Block]              -- ^ list of blocks -               -> ([Block], [Inline])  -- ^ modified list of blocks, title -titleTransform ((Header 1 head1):(Header 2 head2):rest) =  -- title subtitle -    if (any (isHeader 1) rest) || (any (isHeader 2) rest) -       then ((Header 1 head1):(Header 2 head2):rest, []) -       else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) -titleTransform ((Header 1 head1):rest) =       -- title, no subtitle -    if (any (isHeader 1) rest) -       then ((Header 1 head1):rest, []) -       else ((promoteHeaders 1 rest), head1) -titleTransform blocks = (blocks, []) - -parseRST = do -  startPos <- getPosition -  -- go through once just to get list of reference keys -  -- docMinusKeys is the raw document with blanks where the keys were... -  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat -  setInput docMinusKeys -  setPosition startPos -  st <- getState -  let reversedKeys = stateKeys st -  updateState $ \st -> st { stateKeys = reverse reversedKeys } -  -- now parse it for real... -  blocks <- parseBlocks  -  let blocks' = filter (/= Null) blocks -  state <- getState -  let (blocks'', title) = if stateStandalone state -                              then titleTransform blocks' -                              else (blocks', []) -  let authors = stateAuthors state -  let date = stateDate state -  let title' = if (null title) then (stateTitle state) else title -  return $ Pandoc (Meta title' authors date) blocks'' - --- --- parsing blocks --- - -parseBlocks = manyTill block eof - -block = choice [ codeBlock -               , rawHtmlBlock -               , rawLaTeXBlock -               , fieldList -               , blockQuote -               , imageBlock -               , unknownDirective -               , header -               , hrule -               , list -               , lineBlock -               , para -               , plain -               , nullBlock ] <?> "block" - --- --- field list --- - -fieldListItem indent = try $ do -  string indent -  char ':' -  name <- many1 alphaNum -  string ": " -  skipSpaces -  first <- manyTill anyChar newline -  rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>  -                            indentedBlock -  return (name, joinWithSep " " (first:(lines rest))) - -fieldList = try $ do -  indent <- lookAhead $ many (oneOf " \t") -  items <- many1 $ fieldListItem indent -  blanklines -  let authors = case lookup "Authors" items of -                  Just auth -> [auth] -                  Nothing  -> map snd (filter (\(x,y) -> x == "Author") items) -  if null authors  -     then return ()  -     else updateState $ \st -> st {stateAuthors = authors} -  case (lookup "Date" items) of -           Just dat -> updateState $ \st -> st {stateDate = dat} -           Nothing  -> return () -  case (lookup "Title" items) of -           Just tit -> parseFromString (many inline) tit >>= -                       \t -> updateState $ \st -> st {stateTitle = t} -           Nothing  -> return () -  let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") &&  -                  (x /= "Date") && (x /= "Title")) items -  if null remaining -              then return Null -              else do terms <- mapM (return . (:[]) . Str . fst) remaining -                      defs  <- mapM (parseFromString (many block) . snd)  -                                    remaining -                      return $ DefinitionList $ zip terms defs - --- --- line block --- - -lineBlockLine = try $ do -  string "| " -  white <- many (oneOf " \t") -  line <- manyTill inline newline -  return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] - -lineBlock = try $ do -  lines <- many1 lineBlockLine -  blanklines -  return $ Para (concat lines) - --- --- paragraph block --- - -para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" - -codeBlockStart = string "::" >> blankline >> blankline - --- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock = try $ do -  result <- many1 (notFollowedBy' codeBlockStart >> inline) -  lookAhead (string "::") -  return $ Para $ if last result == Space -                     then normalizeSpaces result -                     else (normalizeSpaces result) ++ [Str ":"] - --- regular paragraph -paraNormal = try $ do  -  result <- many1 inline -  newline -  blanklines -  return $ Para $ normalizeSpaces result - -plain = many1 inline >>= return . Plain . normalizeSpaces  - --- --- image block --- - -imageBlock = try $ do -  string ".. image:: " -  src <- manyTill anyChar newline -  fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t") -                           many1 $ fieldListItem indent -  optional blanklines -  case lookup "alt" fields of -        Just alt -> return $ Plain [Image [Str alt] (src, alt)] -        Nothing  -> return $ Plain [Image [Str "image"] (src, "")] --- --- header blocks --- - -header = doubleHeader <|> singleHeader <?> "header" - --- a header with lines on top and bottom -doubleHeader = try $ do -  c <- oneOf underlineChars -  rest <- many (char c)  -- the top line -  let lenTop = length (c:rest) -  skipSpaces -  newline -  txt <- many1 (notFollowedBy blankline >> inline) -  pos <- getPosition -  let len = (sourceColumn pos) - 1 -  if (len > lenTop) then fail "title longer than border" else return () -  blankline              -- spaces and newline -  count lenTop (char c)  -- the bottom line -  blanklines -  -- check to see if we've had this kind of header before.   -  -- if so, get appropriate level.  if not, add to list. -  state <- getState -  let headerTable = stateHeaderTable state -  let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of -        Just ind -> (headerTable, ind + 1) -        Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) -  setState (state { stateHeaderTable = headerTable' }) -  return $ Header level (normalizeSpaces txt) - --- a header with line on the bottom only -singleHeader = try $ do  -  notFollowedBy' whitespace -  txt <- many1 (do {notFollowedBy blankline; inline}) -  pos <- getPosition -  let len = (sourceColumn pos) - 1 -  blankline -  c <- oneOf underlineChars -  rest <- count (len - 1) (char c) -  many (char c) -  blanklines -  state <- getState -  let headerTable = stateHeaderTable state -  let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of -        Just ind -> (headerTable, ind + 1) -        Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) -  setState (state { stateHeaderTable = headerTable' }) -  return $ Header level (normalizeSpaces txt) - --- --- hrule block --- - -hrule = try $ do -  chr <- oneOf underlineChars -  count 3 (char chr) -  skipMany (char chr) -  blankline -  blanklines -  return HorizontalRule - --- --- code blocks --- - --- read a line indented by a given string -indentedLine indents = try $ do -  string indents -  result <- manyTill anyChar newline -  return $ result ++ "\n" - --- two or more indented lines, possibly separated by blank lines. --- any amount of indentation will work. -indentedBlock = do  -  indents <- lookAhead $ many1 (oneOf " \t") -  lns <- many $ choice $ [ indentedLine indents, -                           try $ do b <- blanklines -                                    l <- indentedLine indents -                                    return (b ++ l) ] -  optional blanklines  -  return $ concat lns - -codeBlock = try $ do -  codeBlockStart -  result <- indentedBlock -  return $ CodeBlock $ stripTrailingNewlines result - --- --- raw html --- - -rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> -                     indentedBlock >>= return . RawHtml - --- --- raw latex --- - -rawLaTeXBlock = try $ do -  string ".. raw:: latex" -  blanklines -  result <- indentedBlock -  return $ Para [(TeX result)] - --- --- block quotes --- - -blockQuote = do -  raw <- indentedBlock -  -- parse the extracted block, which may contain various block elements: -  contents <- parseFromString parseBlocks $ raw ++ "\n\n" -  return $ BlockQuote contents - --- --- list blocks --- - -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -definitionListItem = try $ do -  term <- many1Till inline endline -  raw <- indentedBlock -  -- parse the extracted block, which may contain various block elements: -  contents <- parseFromString parseBlocks $ raw ++ "\n\n" -  return (normalizeSpaces term, contents) - -definitionList = many1 definitionListItem >>= return . DefinitionList - --- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart = try $ do -  notFollowedBy' hrule  -- because hrules start out just like lists -  marker <- oneOf bulletListMarkers -  white <- many1 spaceChar -  return $ length (marker:white) - --- parses ordered list start and returns its length (inc following whitespace) -orderedListStart style delim = try $ do -  (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) -  white <- many1 spaceChar -  return $ markerLen + length white - --- parse a line of a list item -listLine markerLength = try $ do -  notFollowedBy blankline -  indentWith markerLength -  line <- manyTill anyChar newline -  return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith num = do -  state <- getState -  let tabStop = stateTabStop state -  if (num < tabStop) -     then count num  (char ' ') -     else choice [ try (count num (char ' ')),  -                   (try (char '\t' >> count (num - tabStop) (char ' '))) ]  - --- parse raw text for one list item, excluding start marker and continuations -rawListItem start = do -  markerLength <- start -  firstLine <- manyTill anyChar newline -  restLines <- many (listLine markerLength) -  return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) - --- continuation of a list item - indented and separated by blankline or  --- (in compact lists) endline.   --- Note: nested lists are parsed as continuations. -listContinuation markerLength = try $ do -  blanks <- many1 blankline -  result <- many1 (listLine markerLength) -  return $ blanks ++ concat result - -listItem start = try $ do  -  (markerLength, first) <- rawListItem start -  rest <- many (listContinuation markerLength) -  blanks <- choice [ try (many blankline >>~ lookAhead start), -                     many1 blankline ]  -- whole list must end with blank. -  -- parsing with ListItemState forces markers at beginning of lines to -  -- count as list item markers, even if not separated by blank space. -  -- see definition of "endline" -  state <- getState -  let oldContext = stateParserContext state -  setState $ state {stateParserContext = ListItemState} -  -- parse the extracted block, which may itself contain block elements -  parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks -  updateState (\st -> st {stateParserContext = oldContext}) -  return parsed - -orderedList = do -  (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) -  items <- many1 (listItem (orderedListStart style delim)) -  let items' = compactify items -  return $ OrderedList (start, style, delim) items' - -bulletList = many1 (listItem bulletListStart) >>=  -             return . BulletList . compactify - --- --- unknown directive (e.g. comment) --- - -unknownDirective = try $ do -  string ".. " -  manyTill anyChar newline -  many (string "   :" >> many1 (noneOf "\n:") >> char ':' >> -        many1 (noneOf "\n") >> newline) -  optional blanklines -  return Null - ---  --- reference key --- - -referenceKey = do -  startPos <- getPosition -  key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] -  st <- getState -  let oldkeys = stateKeys st -  updateState $ \st -> st { stateKeys = key : oldkeys } -  optional blanklines -  endPos <- getPosition -  -- return enough blanks to replace key -  return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -targetURI = do -  skipSpaces -  optional newline -  contents <- many1 (try (many spaceChar >> newline >>  -                          many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") -  blanklines -  return contents - -imageKey = try $ do -  string ".. |" -  ref <- manyTill inline (char '|') -  skipSpaces -  string "image::" -  src <- targetURI -  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - -anonymousKey = try $ do -  oneOfStrings [".. __:", "__"] -  src <- targetURI -  state <- getState -  return ([Str "_"], (removeLeadingTrailingSpace src, "")) - -regularKeyQuoted = try $ do -  string ".. _`" -  ref <- manyTill inline (char '`') -  char ':' -  src <- targetURI -  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - -regularKey = try $ do -  string ".. _" -  ref <- manyTill inline (char ':') -  src <- targetURI -  return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - - --  - -- inline - -- - -inline = choice [ link -                , str -                , whitespace -                , endline -                , strong -                , emph -                , code -                , image -                , hyphens -                , superscript -                , subscript -                , escapedChar -                , symbol ] <?> "inline" - -hyphens = do -  result <- many1 (char '-') -  option Space endline  -  -- don't want to treat endline after hyphen or dash as a space -  return $ Str result - -escapedChar = escaped anyChar - -symbol = do  -  result <- oneOf specialChars -  return $ Str [result] - --- parses inline code, between codeStart and codeEnd -code = try $ do  -  string "``" -  result <- manyTill anyChar (try (string "``")) -  return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result - -emph = enclosed (char '*') (char '*') inline >>=  -       return . Emph . normalizeSpaces - -strong = enclosed (string "**") (try $ string "**") inline >>=  -         return . Strong . normalizeSpaces - -interpreted role = try $ do -  optional $ try $ string "\\ " -  result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar -  nextChar <- lookAhead anyChar -  try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") -  return [Str result] - -superscript = interpreted "sup" >>= (return . Superscript) - -subscript = interpreted "sub" >>= (return . Subscript) - -whitespace = many1 spaceChar >> return Space <?> "whitespace" - -str = notFollowedBy' oneWordReference >>  -      many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str - --- an endline character that can be treated as a space, not a structural break -endline = try $ do -  newline -  notFollowedBy blankline -  -- parse potential list-starts at beginning of line differently in a list: -  st <- getState -  if (stateParserContext st) == ListItemState -     then notFollowedBy (anyOrderedListMarker >> spaceChar) >> -          notFollowedBy' bulletListStart -     else return () -  return Space - --- --- links --- - -link = choice [explicitLink, referenceLink, autoLink]  <?> "link" - -explicitLink = try $ do -  char '`' -  notFollowedBy (char '`') -- `` is marks start of inline code -  label <- manyTill inline (try (do {spaces; char '<'})) -  src <- manyTill (noneOf ">\n ") (char '>') -  skipSpaces -  string "`_" -  return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "") - -reference = try $ do -  char '`' -  notFollowedBy (char '`') -  label <- many1Till inline (char '`')  -  char '_' -  return label - -oneWordReference = do -  raw <- many1 alphaNum -  char '_' -  notFollowedBy alphaNum    -- because this_is_not a link -  return [Str raw] - -referenceLink = try $ do -  label <- reference <|> oneWordReference -  key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link -  state <- getState -  let keyTable = stateKeys state -  src <- case lookupKeySrc keyTable key of -           Nothing     -> fail "no corresponding key" -           Just target -> return target -  -- if anonymous link, remove first anon key so it won't be used again -  let keyTable' = if (key == [Str "_"]) -- anonymous link?  -                    then delete ([Str "_"], src) keyTable -- remove first anon key  -                    else keyTable                     -  setState $ state { stateKeys = keyTable' } -  return $ Link (normalizeSpaces label) src  - -uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",  -                           "mailto:", "news:", "telnet:" ] - -uri = try $ do -  scheme <- uriScheme -  identifier <- many1 (noneOf " \t\n") -  return $ scheme ++ identifier - -autoURI = do -  src <- uri -  return $ Link [Str src] (src, "") - -emailChar = alphaNum <|> oneOf "-+_." - -emailAddress = try $ do -  firstLetter <- alphaNum -  restAddr <- many emailChar -  let addr = firstLetter:restAddr -  char '@' -  dom <- domain -  return $ addr ++ '@':dom - -domainChar = alphaNum <|> char '-' - -domain = do -  first <- many1 domainChar -  dom <- many1 (try (do{ char '.'; many1 domainChar })) -  return $ joinWithSep "." (first:dom) - -autoEmail = do -  src <- emailAddress -  return $ Link [Str src] ("mailto:" ++ src, "") - -autoLink = autoURI <|> autoEmail - --- For now, we assume that all substitution references are for images. -image = try $ do -  char '|' -  ref <- manyTill inline (char '|') -  state <- getState -  let keyTable = stateKeys state -  src <- case lookupKeySrc keyTable ref of -           Nothing     -> fail "no corresponding key" -           Just target -> return target -  return $ Image (normalizeSpaces ref) src - diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs deleted file mode 100644 index f27c3ae75..000000000 --- a/src/Text/Pandoc/Shared.hs +++ /dev/null @@ -1,792 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Shared -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Utility functions and definitions used by the various Pandoc modules. --} -module Text.Pandoc.Shared (  -                     -- * List processing -                     splitBy, -                     splitByIndices, -                     substitute, -                     joinWithSep, -                     -- * Text processing -                     backslashEscapes, -                     escapeStringUsing, -                     stripTrailingNewlines, -                     removeLeadingTrailingSpace, -                     removeLeadingSpace, -                     removeTrailingSpace, -                     stripFirstAndLast, -                     camelCaseToHyphenated, -                     toRomanNumeral, -                     wrapped, -                     wrapIfNeeded, -                     -- * Parsing -                     (>>~), -                     anyLine, -                     many1Till, -                     notFollowedBy', -                     oneOfStrings, -                     spaceChar, -                     skipSpaces, -                     blankline, -                     blanklines, -                     enclosed, -                     stringAnyCase, -                     parseFromString, -                     lineClump, -                     charsInBalanced, -                     charsInBalanced', -                     romanNumeral, -                     withHorizDisplacement, -                     nullBlock, -                     failIfStrict, -                     escaped, -                     anyOrderedListMarker, -                     orderedListMarker, -                     charRef, -                     readWith, -                     testStringWith, -                     ParserState (..), -                     defaultParserState, -                     HeaderType (..), -                     ParserContext (..), -                     QuoteContext (..), -                     NoteTable, -                     KeyTable, -                     lookupKeySrc, -                     refsMatch, -                     -- * Native format prettyprinting -                     prettyPandoc, -                     -- * Pandoc block and inline list processing -                     orderedListMarkers, -                     normalizeSpaces, -                     compactify, -                     Element (..), -                     hierarchicalize, -                     isHeaderBlock, -                     -- * Writer options -                     WriterOptions (..), -                     defaultWriterOptions -                    ) where - -import Text.Pandoc.Definition -import Text.ParserCombinators.Parsec -import Text.PrettyPrint.HughesPJ ( Doc, fsep ) -import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) -import Data.List ( find, isPrefixOf ) -import Control.Monad ( join ) - --- --- List processing --- - --- | Split list by groups of one or more sep. -splitBy :: (Eq a) => a -> [a] -> [[a]] -splitBy _ [] = [] -splitBy sep lst =  -  let (first, rest) = break (== sep) lst -      rest'         = dropWhile (== sep) rest -  in  first:(splitBy sep rest') - --- | Split list into chunks divided at specified indices. -splitByIndices :: [Int] -> [a] -> [[a]] -splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = -    let (first, rest) = splitAt x lst in -    first:(splitByIndices (map (\y -> y - x)  xs) rest) - --- | Replace each occurrence of one sublist in a list with another. -substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] -substitute _ _ [] = [] -substitute [] _ lst = lst -substitute target replacement lst =  -    if target `isPrefixOf` lst -       then replacement ++ (substitute target replacement $ drop (length target) lst) -       else (head lst):(substitute target replacement $ tail lst) - --- | Joins a list of lists, separated by another list. -joinWithSep :: [a]    -- ^ List to use as separator -            -> [[a]]  -- ^ Lists to join -            -> [a] -joinWithSep _ [] = [] -joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst - --- --- Text processing --- - --- | Returns an association list of backslash escapes for the --- designated characters. -backslashEscapes :: [Char]    -- ^ list of special characters to escape -                 -> [(Char, String)] -backslashEscapes = map (\ch -> (ch, ['\\',ch])) - --- | Escape a string of characters, using an association list of --- characters and strings. -escapeStringUsing :: [(Char, String)] -> String -> String -escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) =  -  case (lookup x escapeTable) of -       Just str  -> str ++ rest -       Nothing   -> x:rest -  where rest = escapeStringUsing escapeTable xs - --- | Strip trailing newlines from string. -stripTrailingNewlines :: String -> String -stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse - --- | Remove leading and trailing space (including newlines) from string. -removeLeadingTrailingSpace :: String -> String -removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace - --- | Remove leading space (including newlines) from string. -removeLeadingSpace :: String -> String -removeLeadingSpace = dropWhile (`elem` " \n\t") - --- | Remove trailing space (including newlines) from string. -removeTrailingSpace :: String -> String -removeTrailingSpace = reverse . removeLeadingSpace . reverse - --- | Strip leading and trailing characters from string -stripFirstAndLast :: String -> String -stripFirstAndLast str = -  drop 1 $ take ((length str) - 1) str - --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).  -camelCaseToHyphenated :: String -> String -camelCaseToHyphenated [] = "" -camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = -  a:'-':(toLower b):(camelCaseToHyphenated rest) -camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) - --- | Convert number < 4000 to uppercase roman numeral. -toRomanNumeral :: Int -> String -toRomanNumeral x = -  if x >= 4000 || x < 0 -     then "?" -     else case x of -              _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) -              _ | x >= 900  -> "CM" ++ toRomanNumeral (x - 900) -              _ | x >= 500  -> "D" ++ toRomanNumeral (x - 500) -              _ | x >= 400  -> "CD" ++ toRomanNumeral (x - 400) -              _ | x >= 100  -> "C" ++ toRomanNumeral (x - 100) -              _ | x >= 90   -> "XC" ++ toRomanNumeral (x - 90) -              _ | x >= 50   -> "L"  ++ toRomanNumeral (x - 50) -              _ | x >= 40   -> "XL" ++ toRomanNumeral (x - 40) -              _ | x >= 10   -> "X" ++ toRomanNumeral (x - 10) -              _ | x >= 9    -> "IX" ++ toRomanNumeral (x - 5) -              _ | x >= 5    -> "V" ++ toRomanNumeral (x - 5) -              _ | x >= 4    -> "IV" ++ toRomanNumeral (x - 4) -              _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1) -              _             -> "" - --- | Wrap inlines to line length. -wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc -wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=  -                          return . fsep - -wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->  -                           [Inline] -> m Doc -wrapIfNeeded opts = if writerWrapText opts -                       then wrapped  -                       else ($) - --- --- Parsing --- - --- | Like >>, but returns the operation on the left. --- (Suggested by Tillmann Rendel on Haskell-cafe list.) -(>>~) :: (Monad m) => m a -> m b -> m a -a >>~ b = a >>= \x -> b >> return x - --- | Parse any line of text -anyLine :: GenParser Char st [Char] -anyLine = manyTill anyChar newline - --- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a -	     -> GenParser tok st end -	     -> GenParser tok st [a] -many1Till p end = do -         first <- p -         rest <- manyTill p end -         return (first:rest) - --- | A more general form of @notFollowedBy@.  This one allows any  --- type of parser to be specified, and succeeds only if that parser fails. --- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () -notFollowedBy' p  = try $ join $  do  a <- try p -                                      return (unexpected (show a)) -                                  <|> -                                  return (return ()) --- (This version due to Andrew Pimlott on the Haskell mailing list.) - --- | Parses one of a list of strings (tried in order).   -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings - --- | Parses a space or tab. -spaceChar :: CharParser st Char -spaceChar = char ' ' <|> char '\t' - --- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () -skipSpaces = skipMany spaceChar - --- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char -blankline = try $ skipSpaces >> newline - --- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] -blanklines = many1 blankline - --- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t   -- ^ start parser -	    -> GenParser Char st end  -- ^ end parser -	    -> GenParser Char st a    -- ^ content parser (to be used repeatedly) -	    -> GenParser Char st [a] -enclosed start end parser = try $  -  start >> notFollowedBy space >> many1Till parser end - --- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = do -  firstChar <- char (toUpper x) <|> char (toLower x) -  rest <- stringAnyCase xs -  return (firstChar:rest) - --- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a -parseFromString parser str = do -  oldPos <- getPosition -  oldInput <- getInput -  setInput str -  result <- parser -  setInput oldInput -  setPosition oldPos -  return result - --- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = blanklines  -          <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) - --- | Parse a string of characters between an open character --- and a close character, including text between balanced --- pairs of open and close, which must be different. For example, --- @charsInBalanced '(' ')'@ will parse "(hello (there))" --- and return "hello (there)".  Stop if a blank line is --- encountered. -charsInBalanced :: Char -> Char -> GenParser Char st String -charsInBalanced open close = try $ do -  char open -  raw <- many $     (many1 (noneOf [open, close, '\n'])) -                <|> (do res <- charsInBalanced open close -                        return $ [open] ++ res ++ [close]) -                <|> try (string "\n" >>~ notFollowedBy' blanklines) -  char close -  return $ concat raw - --- | Like @charsInBalanced@, but allow blank lines in the content. -charsInBalanced' :: Char -> Char -> GenParser Char st String -charsInBalanced' open close = try $ do -  char open -  raw <- many $       (many1 (noneOf [open, close])) -                  <|> (do res <- charsInBalanced' open close -                          return $ [open] ++ res ++ [close]) -  char close -  return $ concat raw - --- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Bool                  -- ^ Uppercase if true -             -> GenParser Char st Int -romanNumeral upperCase = do -    let charAnyCase c = char (if upperCase then toUpper c else c) -    let one = charAnyCase 'i' -    let five = charAnyCase 'v' -    let ten = charAnyCase 'x' -    let fifty = charAnyCase 'l' -    let hundred = charAnyCase 'c' -    let fivehundred = charAnyCase 'd' -    let thousand = charAnyCase 'm' -    thousands <- many thousand >>= (return . (1000 *) . length) -    ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 -    fivehundreds <- many fivehundred >>= (return . (500 *) . length) -    fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 -    hundreds <- many hundred >>= (return . (100 *) . length) -    nineties <- option 0 $ try $ ten >> hundred >> return 90 -    fifties <- many fifty >>= (return . (50 *) . length) -    forties <- option 0 $ try $ ten >> fifty >> return 40 -    tens <- many ten >>= (return . (10 *) . length) -    nines <- option 0 $ try $ one >> ten >> return 9 -    fives <- many five >>= (return . (5 *) . length) -    fours <- option 0 $ try $ one >> five >> return 4 -    ones <- many one >>= (return . length) -    let total = thousands + ninehundreds + fivehundreds + fourhundreds + -                hundreds + nineties + fifties + forties + tens + nines + -                fives + fours + ones -    if total == 0 -       then fail "not a roman numeral" -       else return total - --- | Applies a parser, returns tuple of its results and its horizontal --- displacement (the difference between the source column at the end --- and the source column at the beginning). Vertical displacement --- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a  -- ^ Parser to apply -                      -> GenParser Char st (a, Int) -- ^ (result, displacement) -withHorizDisplacement parser = do -  pos1 <- getPosition -  result <- parser -  pos2 <- getPosition -  return (result, sourceColumn pos2 - sourceColumn pos1) - --- | Parses a character and returns 'Null' (so that the parser can move on --- if it gets stuck). -nullBlock :: GenParser Char st Block -nullBlock = anyChar >> return Null - --- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser Char ParserState () -failIfStrict = do -  state <- getState -  if stateStrict state then fail "strict mode" else return () - --- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char  -- ^ Parser for character to escape -        -> GenParser Char st Inline -escaped parser = try $ do -  char '\\' -  result <- parser -  return (Str [result]) - --- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) -upperRoman = do -  num <- romanNumeral True -  return (UpperRoman, num) - --- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) -lowerRoman = do -  num <- romanNumeral False -  return (LowerRoman, num) - --- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) -decimal = do -  num <- many1 digit -  return (Decimal, read num) - --- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) -defaultNum = do -  char '#' -  return (DefaultStyle, 1) - --- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) -lowerAlpha = do -  ch <- oneOf ['a'..'z'] -  return (LowerAlpha, ord ch - ord 'a' + 1) - --- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) -upperAlpha = do -  ch <- oneOf ['A'..'Z'] -  return (UpperAlpha, ord ch - ord 'A' + 1) - --- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) -romanOne = (char 'i' >> return (LowerRoman, 1)) <|> -           (char 'I' >> return (UpperRoman, 1)) - --- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char st ListAttributes  -anyOrderedListMarker = choice $  -  [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], -                           numParser <- [decimal, defaultNum, romanOne, -                           lowerAlpha, lowerRoman, upperAlpha, upperRoman]] - --- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) -         -> GenParser Char st ListAttributes  -inPeriod num = try $ do -  (style, start) <- num -  char '.' -  let delim = if style == DefaultStyle -                 then DefaultDelim -                 else Period -  return (start, style, delim) -  --- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) -           -> GenParser Char st ListAttributes  -inOneParen num = try $ do -  (style, start) <- num -  char ')' -  return (start, style, OneParen) - --- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) -            -> GenParser Char st ListAttributes  -inTwoParens num = try $ do -  char '(' -  (style, start) <- num -  char ')' -  return (start, style, TwoParens) - --- | Parses an ordered list marker with a given style and delimiter, --- returns number. -orderedListMarker :: ListNumberStyle  -                  -> ListNumberDelim  -                  -> GenParser Char st Int -orderedListMarker style delim = do -  let num = case style of -               DefaultStyle -> decimal <|> defaultNum -               Decimal      -> decimal -               UpperRoman   -> upperRoman -               LowerRoman   -> lowerRoman -               UpperAlpha   -> upperAlpha -               LowerAlpha   -> lowerAlpha -  let context = case delim of -               DefaultDelim -> inPeriod -               Period       -> inPeriod -               OneParen     -> inOneParen -               TwoParens    -> inTwoParens -  (start, _, _) <- context num -  return start - --- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline -charRef = do -  c <- characterReference -  return $ Str [c] - --- | Parse a string with a given parser and state. -readWith :: GenParser Char ParserState a      -- ^ parser -         -> ParserState                    -- ^ initial state -         -> String                         -- ^ input string -         -> a -readWith parser state input =  -    case runParser parser state "source" input of -      Left err     -> error $ "\nError:\n" ++ show err -      Right result -> result - --- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a -               -> String -               -> IO () -testStringWith parser str = putStrLn $ show $  -                            readWith parser defaultParserState str - --- | Parsing options. -data ParserState = ParserState -    { stateParseRaw        :: Bool,          -- ^ Parse raw HTML and LaTeX? -      stateParserContext   :: ParserContext, -- ^ Inside list? -      stateQuoteContext    :: QuoteContext,  -- ^ Inside quoted environment? -      stateKeys            :: KeyTable,      -- ^ List of reference keys -      stateNotes           :: NoteTable,     -- ^ List of notes -      stateTabStop         :: Int,           -- ^ Tab stop -      stateStandalone      :: Bool,          -- ^ Parse bibliographic info? -      stateTitle           :: [Inline],      -- ^ Title of document -      stateAuthors         :: [String],      -- ^ Authors of document -      stateDate            :: String,        -- ^ Date of document -      stateStrict          :: Bool,          -- ^ Use strict markdown syntax? -      stateSmart           :: Bool,          -- ^ Use smart typography? -      stateColumns         :: Int,           -- ^ Number of columns in terminal -      stateHeaderTable     :: [HeaderType]   -- ^ Ordered list of header types used -    } -    deriving Show - -defaultParserState :: ParserState -defaultParserState =  -    ParserState { stateParseRaw        = False, -                  stateParserContext   = NullState, -                  stateQuoteContext    = NoQuote, -                  stateKeys            = [], -                  stateNotes           = [], -                  stateTabStop         = 4, -                  stateStandalone      = False, -                  stateTitle           = [], -                  stateAuthors         = [], -                  stateDate            = [], -                  stateStrict          = False, -                  stateSmart           = False, -                  stateColumns         = 80, -                  stateHeaderTable     = [] } - -data HeaderType  -    = SingleHeader Char  -- ^ Single line of characters underneath -    | DoubleHeader Char  -- ^ Lines of characters above and below -    deriving (Eq, Show) - -data ParserContext  -    = ListItemState   -- ^ Used when running parser on list item contents -    | NullState       -- ^ Default state -    deriving (Eq, Show) - -data QuoteContext -    = InSingleQuote   -- ^ Used when parsing inside single quotes -    | InDoubleQuote   -- ^ Used when parsing inside double quotes -    | NoQuote         -- ^ Used when not parsing inside quotes -    deriving (Eq, Show) - -type NoteTable = [(String, [Block])] - -type KeyTable = [([Inline], Target)] - --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable  -- ^ Key table -             -> [Inline]  -- ^ Key -             -> Maybe Target -lookupKeySrc table key = case find (refsMatch key . fst) table of -                           Nothing       -> Nothing -                           Just (_, src) -> Just src - --- | Returns @True@ if keys match (case insensitive). -refsMatch :: [Inline] -> [Inline] -> Bool -refsMatch ((Str x):restx) ((Str y):resty) =  -    ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Emph x):restx) ((Emph y):resty) =  -    refsMatch x y && refsMatch restx resty -refsMatch ((Strong x):restx) ((Strong y):resty) =  -    refsMatch x y && refsMatch restx resty -refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =  -    refsMatch x y && refsMatch restx resty -refsMatch ((Superscript x):restx) ((Superscript y):resty) =  -    refsMatch x y && refsMatch restx resty -refsMatch ((Subscript x):restx) ((Subscript y):resty) =  -    refsMatch x y && refsMatch restx resty -refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =  -    t == u && refsMatch x y && refsMatch restx resty -refsMatch ((Code x):restx) ((Code y):resty) =  -    ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((TeX x):restx) ((TeX y):resty) =  -    ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =  -    ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty -refsMatch [] x = null x -refsMatch x [] = null x - --- --- Native format prettyprinting --- -  --- | Indent string as a block. -indentBy :: Int    -- ^ Number of spaces to indent the block  -         -> Int    -- ^ Number of spaces (rel to block) to indent first line -         -> String -- ^ Contents of block to indent -         -> String -indentBy _ _ [] = "" -indentBy num first str =  -  let (firstLine:restLines) = lines str  -      firstLineIndent = num + first -  in  (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++  -      (joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines) - --- | Prettyprint list of Pandoc blocks elements. -prettyBlockList :: Int       -- ^ Number of spaces to indent list of blocks -                -> [Block]   -- ^ List of blocks -                -> String -prettyBlockList indent [] = indentBy indent 0 "[]" -prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++  -  (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" - --- | Prettyprint Pandoc block element. -prettyBlock :: Block -> String -prettyBlock (BlockQuote blocks) = "BlockQuote\n  " ++  -                                  (prettyBlockList 2 blocks)  -prettyBlock (OrderedList attribs blockLists) =  -  "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++  -  (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks)  -  blockLists)) ++ " ]" -prettyBlock (BulletList blockLists) = "BulletList\n" ++  -  indentBy 2 0 ("[ " ++ (joinWithSep ", "  -  (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"  -prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++  -  indentBy 2 0 ("[" ++ (joinWithSep ",\n"  -  (map (\(term, blocks) -> "  (" ++ show term ++ ",\n" ++  -  indentBy 1 2 (prettyBlockList 2 blocks) ++ "  )") blockLists))) ++ " ]"  -prettyBlock (Table caption aligns widths header rows) =  -  "Table " ++ show caption ++ " " ++ show aligns ++ " " ++  -  show widths ++ "\n" ++ prettyRow header ++ " [\n" ++   -  (joinWithSep ",\n" (map prettyRow rows)) ++ " ]" -  where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", " -                         (map (\blocks -> prettyBlockList 2 blocks)  -                         cols))) ++ " ]" -prettyBlock block = show block - --- | Prettyprint Pandoc document. -prettyPandoc :: Pandoc -> String -prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++  -  ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" - --- --- Pandoc block and inline list processing --- - --- | Generate infinite lazy list of markers for an ordered list, --- depending on list attributes. -orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] -orderedListMarkers (start, numstyle, numdelim) =  -  let singleton c = [c] -      nums = case numstyle of -                     DefaultStyle -> map show [start..] -                     Decimal      -> map show [start..] -                     UpperAlpha   -> drop (start - 1) $ cycle $  -                                     map singleton ['A'..'Z'] -                     LowerAlpha   -> drop (start - 1) $ cycle $ -                                     map singleton ['a'..'z'] -                     UpperRoman   -> map toRomanNumeral [start..] -                     LowerRoman   -> map (map toLower . toRomanNumeral) [start..] -      inDelim str = case numdelim of -                            DefaultDelim -> str ++ "." -                            Period       -> str ++ "." -                            OneParen     -> str ++ ")" -                            TwoParens    -> "(" ++ str ++ ")" -  in  map inDelim nums - --- | Normalize a list of inline elements: remove leading and trailing --- @Space@ elements, collapse double @Space@s into singles, and --- remove empty Str elements. -normalizeSpaces :: [Inline] -> [Inline] -normalizeSpaces [] = [] -normalizeSpaces list =  -    let removeDoubles [] = [] -        removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) -        removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest) -        removeDoubles ((Str ""):rest) = removeDoubles rest  -        removeDoubles (x:rest) = x:(removeDoubles rest) -        removeLeading (Space:xs) = removeLeading xs -        removeLeading x = x -        removeTrailing [] = [] -        removeTrailing lst = if (last lst == Space) -                                then init lst -                                else lst -    in  removeLeading $ removeTrailing $ removeDoubles list - --- | Change final list item from @Para@ to @Plain@ if the list should  --- be compact. -compactify :: [[Block]]  -- ^ List of list items (each a list of blocks) -           -> [[Block]] -compactify [] = [] -compactify items = -    let final  = last items -        others = init items -    in  case final of -          [Para a]  -> if any containsPara others -                          then items -                          else others ++ [[Plain a]] -          _         -> items - -containsPara :: [Block] -> Bool -containsPara [] = False -containsPara ((Para _):_) = True -containsPara ((BulletList items):rest) =  any containsPara items || -                                          containsPara rest -containsPara ((OrderedList _ items):rest) = any containsPara items || -                                            containsPara rest -containsPara ((DefinitionList items):rest) = any containsPara (map snd items) || -                                             containsPara rest -containsPara (_:rest) = containsPara rest - --- | Data structure for defining hierarchical Pandoc documents -data Element = Blk Block  -             | Sec [Inline] [Element] deriving (Eq, Read, Show) - --- | Returns @True@ on Header block with at least the specified level -headerAtLeast :: Int -> Block -> Bool -headerAtLeast level (Header x _) = x <= level -headerAtLeast _ _ = False - --- | Convert list of Pandoc blocks into (hierarchical) list of Elements -hierarchicalize :: [Block] -> [Element] -hierarchicalize [] = [] -hierarchicalize (block:rest) =  -  case block of -    (Header level title) ->  -         let (thisSection, rest') = break (headerAtLeast level) rest -         in  (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')  -    x -> (Blk x):(hierarchicalize rest) - --- | True if block is a Header block. -isHeaderBlock :: Block -> Bool -isHeaderBlock (Header _ _) = True -isHeaderBlock _ = False - --- --- Writer options --- - --- | Options for writers -data WriterOptions = WriterOptions -  { writerStandalone      :: Bool   -- ^ Include header and footer -  , writerHeader          :: String -- ^ Header for the document -  , writerTitlePrefix     :: String -- ^ Prefix for HTML titles -  , writerTabStop         :: Int    -- ^ Tabstop for conversion btw spaces and tabs -  , writerTableOfContents :: Bool   -- ^ Include table of contents -  , writerS5              :: Bool   -- ^ We're writing S5  -  , writerUseASCIIMathML  :: Bool   -- ^ Use ASCIIMathML -  , writerASCIIMathMLURL  :: Maybe String -- ^ URL to asciiMathML.js  -  , writerIgnoreNotes     :: Bool   -- ^ Ignore footnotes (used in making toc) -  , writerIncremental     :: Bool   -- ^ Incremental S5 lists -  , writerNumberSections  :: Bool   -- ^ Number sections in LaTeX -  , writerIncludeBefore   :: String -- ^ String to include before the body -  , writerIncludeAfter    :: String -- ^ String to include after the body -  , writerStrictMarkdown  :: Bool   -- ^ Use strict markdown syntax -  , writerReferenceLinks  :: Bool   -- ^ Use reference links in writing markdown, rst -  , writerWrapText        :: Bool   -- ^ Wrap text to line length -  } deriving Show - --- | Default writer options. -defaultWriterOptions :: WriterOptions -defaultWriterOptions =  -  WriterOptions { writerStandalone      = False -                , writerHeader          = "" -                , writerTitlePrefix     = "" -                , writerTabStop         = 4 -                , writerTableOfContents = False -                , writerS5              = False -                , writerUseASCIIMathML  = False -                , writerASCIIMathMLURL  = Nothing -                , writerIgnoreNotes     = False -                , writerIncremental     = False -                , writerNumberSections  = False -                , writerIncludeBefore   = "" -                , writerIncludeAfter    = "" -                , writerStrictMarkdown  = False -                , writerReferenceLinks  = False -                , writerWrapText        = True -                } diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs deleted file mode 100644 index 16bdb9218..000000000 --- a/src/Text/Pandoc/UTF8.hs +++ /dev/null @@ -1,45 +0,0 @@ --- | Functions for converting Unicode strings to UTF-8 and vice versa. ---  --- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>. --- (c) 2003, OGI School of Science & Engineering, Oregon Health and --- Science University.   --- --- Modified by Martin Norbaeck --- to pass illegal UTF-8 sequences through unchanged. -module Text.Pandoc.UTF8 (  -             fromUTF8,  -             toUTF8  -            ) where - --- From the Char module supplied with HBC. - --- | Take a UTF-8 string and decode it into a Unicode string. -fromUTF8 :: String -> String -fromUTF8 "" = "" -fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker) -fromUTF8 (c:c':cs) | '\xc0' <= c  && c  <= '\xdf' &&  -		             '\x80' <= c' && c' <= '\xbf' = -	toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs -fromUTF8 (c:c':c'':cs) | '\xe0' <= c   && c   <= '\xef' &&  -		          '\x80' <= c'  && c'  <= '\xbf' && -		          '\x80' <= c'' && c'' <= '\xbf' = -	toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs -fromUTF8 (c:cs) = c : fromUTF8 cs - --- | Take a Unicode string and encode it as a UTF-8 string. -toUTF8 :: String -> String -toUTF8 "" = "" -toUTF8 (c:cs) = -	if c > '\x0000' && c < '\x0080' then -	    c : toUTF8 cs -	else if c < toEnum 0x0800 then -	    let i = fromEnum c -	    in  toEnum (0xc0 + i `div` 0x40) :  -	        toEnum (0x80 + i `mod` 0x40) :  -		toUTF8 cs -	else -	    let i = fromEnum c -	    in  toEnum (0xe0 + i `div` 0x1000) :  -	        toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :  -		toEnum (0x80 + i `mod` 0x40) :  -		toUTF8 cs diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs deleted file mode 100644 index 13912a9f3..000000000 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ /dev/null @@ -1,248 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Writers.ConTeXt -   Copyright   : Copyright (C) 2007 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha  -   Portability : portable - -Conversion of 'Pandoc' format into ConTeXt. --} -module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Printf ( printf ) -import Data.List ( (\\), intersperse ) -import Control.Monad.State - -type WriterState = Int -- number of next URL reference  - --- | Convert Pandoc to ConTeXt. -writeConTeXt :: WriterOptions -> Pandoc -> String -writeConTeXt options document = evalState (pandocToConTeXt options document) 1  - -pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String -pandocToConTeXt options (Pandoc meta blocks) = do -  main    <- blockListToConTeXt blocks  -  let body = writerIncludeBefore options ++ main ++ writerIncludeAfter options -  head    <- if writerStandalone options -                then contextHeader options meta -                else return "" -  let toc  = if writerTableOfContents options -                then "\\placecontent\n\n" -                else ""  -  let foot = if writerStandalone options -                then "\n\\stoptext\n" -                else "" -  return $ head ++ toc ++ body ++ foot - --- | Insert bibliographic information into ConTeXt header. -contextHeader :: WriterOptions -- ^ Options, including ConTeXt header -              -> Meta          -- ^ Meta with bibliographic information -              -> State WriterState String -contextHeader options (Meta title authors date) = do -  titletext    <- if null title -                     then return ""  -                     else inlineListToConTeXt title -  let authorstext = if null authors -                       then "" -                       else if length authors == 1 -                            then stringToConTeXt $ head authors -                            else stringToConTeXt $ (joinWithSep ", " $ -                                 init authors) ++ " & " ++ last authors -  let datetext   = if date == "" -                       then ""  -                       else stringToConTeXt date -  let titleblock = "\\doctitle{" ++ titletext ++ "}\n\ -                   \ \\author{" ++ authorstext ++ "}\n\ -                   \ \\date{" ++ datetext ++ "}\n\n" -  let setupheads = if (writerNumberSections options) -                      then "\\setupheads[sectionnumber=yes, style=\\bf]\n"  -                      else "\\setupheads[sectionnumber=no, style=\\bf]\n" -  let header     = writerHeader options -  return $ header ++ setupheads ++ titleblock ++ "\\starttext\n\\maketitle\n\n" - --- escape things as needed for ConTeXt - -escapeCharForConTeXt :: Char -> String -escapeCharForConTeXt ch = - case ch of -    '{'  -> "\\letteropenbrace{}" -    '}'  -> "\\letterclosebrace{}" -    '\\' -> "\\letterbackslash{}" -    '$'  -> "\\$" -    '|'  -> "\\letterbar{}" -    '^'  -> "\\letterhat{}" -    '%'  -> "\\%" -    '~'  -> "\\lettertilde{}" -    '&'  -> "\\&" -    '#'  -> "\\#" -    '<'  -> "\\letterless{}" -    '>'  -> "\\lettermore{}" -    '_'  -> "\\letterunderscore{}" -    x    -> [x] - --- | Escape string for ConTeXt -stringToConTeXt :: String -> String -stringToConTeXt = concatMap escapeCharForConTeXt - --- | Convert Pandoc block element to ConTeXt. -blockToConTeXt :: Block -> State WriterState String  -blockToConTeXt Null = return "" -blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n") -blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n") -blockToConTeXt (BlockQuote lst) = do -  contents <- blockListToConTeXt lst -  return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n" -blockToConTeXt (CodeBlock str) =  -  return $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n" -blockToConTeXt (RawHtml str) = return "" -blockToConTeXt (BulletList lst) = do  -  contents <- mapM listItemToConTeXt lst -  return $ "\\startltxitem\n" ++ concat contents ++ "\\stopltxitem\n" -blockToConTeXt (OrderedList attribs lst) = case attribs of -  (1, DefaultStyle, DefaultDelim) -> do -    contents <- mapM listItemToConTeXt lst -    return $  "\\startltxenum\n" ++ concat contents ++ "\\stopltxenum\n" -  _ -> do -    let markers = take (length lst) $ orderedListMarkers attribs -    contents <- zipWithM orderedListItemToConTeXt markers lst -    let markerWidth = maximum $ map length markers  -    let markerWidth' = if markerWidth < 3 -                          then "" -                          else "[width=" ++  -                               show ((markerWidth + 2) `div` 2)  ++ "em]" -    return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++  -             "\\stopitemize\n" -blockToConTeXt (DefinitionList lst) = -  mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat -blockToConTeXt HorizontalRule = return "\\thinrule\n\n" -blockToConTeXt (Header level lst) = do -  contents <- inlineListToConTeXt lst -  return $ if level > 0 && level <= 3 -              then "\\" ++ concat (replicate (level - 1) "sub") ++  -                   "section{" ++ contents ++ "}\n\n" -              else contents ++ "\n\n" -blockToConTeXt (Table caption aligns widths heads rows) = do -    let colWidths = map printDecimal widths -    let colDescriptor colWidth alignment = (case alignment of -                                               AlignLeft    -> 'l'  -                                               AlignRight   -> 'r' -                                               AlignCenter  -> 'c' -                                               AlignDefault -> 'l'): -                                           "p(" ++ colWidth ++ "\\textwidth)|" -    let colDescriptors = "|" ++ (concat $  -                                 zipWith colDescriptor colWidths aligns) -    headers <- tableRowToConTeXt heads  -    captionText <- inlineListToConTeXt caption  -    let captionText' = if null caption then "none" else captionText -    rows' <- mapM tableRowToConTeXt rows  -    return $ "\\placetable[here]{" ++ captionText' ++ "}\n\\starttable[" ++  -             colDescriptors ++ "]\n" ++ "\\HL\n" ++ headers ++ "\\HL\n" ++  -             concat rows' ++ "\\HL\n\\stoptable\n\n"  - -printDecimal :: Float -> String -printDecimal = printf "%.2f"  - -tableRowToConTeXt cols = do -  cols' <- mapM blockListToConTeXt cols -  return $ "\\NC " ++ (concat $ intersperse "\\NC " cols') ++ "\\NC\\AR\n" - -listItemToConTeXt list = do -  contents <- blockListToConTeXt list -  return $ "\\item " ++ contents - -orderedListItemToConTeXt marker list = do -  contents <- blockListToConTeXt list -  return $ "\\sym{" ++ marker ++ "} " ++ contents - -defListItemToConTeXt (term, def) = do -  term' <- inlineListToConTeXt term -  def'  <- blockListToConTeXt def -  return $ "\\startdescr{" ++ term' ++ "}\n" ++ -           def' ++ "\n\\stopdescr\n" - --- | Convert list of block elements to ConTeXt. -blockListToConTeXt :: [Block] -> State WriterState String -blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat - --- | Convert list of inline elements to ConTeXt. -inlineListToConTeXt :: [Inline]  -- ^ Inlines to convert -                    -> State WriterState String -inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat - -isQuoted :: Inline -> Bool -isQuoted (Quoted _ _) = True -isQuoted Apostrophe = True -isQuoted _ = False - --- | Convert inline element to ConTeXt -inlineToConTeXt :: Inline    -- ^ Inline to convert -                -> State WriterState String -inlineToConTeXt (Emph lst) = do -  contents <- inlineListToConTeXt lst -  return $ "{\\em " ++ contents ++ "}"  -inlineToConTeXt (Strong lst) = do -  contents <- inlineListToConTeXt lst -  return $ "{\\bf " ++ contents ++ "}"  -inlineToConTeXt (Strikeout lst) = do -  contents <- inlineListToConTeXt lst -  return $ "\\overstrikes{" ++ contents ++ "}"  -inlineToConTeXt (Superscript lst) = do -  contents <- inlineListToConTeXt lst -  return $ "\\high{" ++ contents ++ "}"  -inlineToConTeXt (Subscript lst) = do -  contents <- inlineListToConTeXt lst -  return $ "\\low{" ++ contents ++ "}"  -inlineToConTeXt (Code str) = return $ "\\type{" ++ str ++ "}" -inlineToConTeXt (Quoted SingleQuote lst) = do -  contents <- inlineListToConTeXt lst -  return $ "\\quote{" ++ contents ++ "}" -inlineToConTeXt (Quoted DoubleQuote lst) = do -  contents <- inlineListToConTeXt lst -  return $ "\\quotation{" ++ contents ++ "}" -inlineToConTeXt Apostrophe = return "'" -inlineToConTeXt EmDash = return "---" -inlineToConTeXt EnDash = return "--" -inlineToConTeXt Ellipses = return "\\ldots{}" -inlineToConTeXt (Str str) = return $ stringToConTeXt str -inlineToConTeXt (TeX str) = return str -inlineToConTeXt (HtmlInline str) = return "" -inlineToConTeXt (LineBreak) = return "\\crlf\n" -inlineToConTeXt Space = return " " -inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own  -  inlineToConTeXt (Link [Str str] (src, tit))  -- way of printing links...  -inlineToConTeXt (Link text (src, _)) = do -  next <- get -  put (next + 1) -  let ref = show next -  label <- inlineListToConTeXt text -  return $ "\\useurl[" ++ ref ++ "][" ++ src ++ "][][" ++ label ++  -           "]\\from[" ++ ref ++ "]"  -inlineToConTeXt (Image alternate (src, tit)) = do -  alt <- inlineListToConTeXt alternate -  return $ "\\placefigure\n[]\n[fig:" ++ alt ++ "]\n{" ++ -           tit ++ "}\n{\\externalfigure[" ++ src ++ "]}"  -inlineToConTeXt (Note contents) = do -  contents' <- blockListToConTeXt contents -  return $ "\\footnote{" ++ contents' ++ "}" - diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs deleted file mode 100644 index 13dc8585d..000000000 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ /dev/null @@ -1,299 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Writers.Docbook -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Conversion of 'Pandoc' documents to Docbook XML. --} -module Text.Pandoc.Writers.Docbook ( writeDocbook) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Data.List ( isPrefixOf, drop ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) - --- --- code to format XML --- - --- | Escape one character as needed for XML. -escapeCharForXML :: Char -> String -escapeCharForXML x = case x of -                       '&'  -> "&" -                       '<'  -> "<" -                       '>'  -> ">" -                       '"'  -> """ -                       '\160' -> " " -                       c    -> [c]  - --- | True if the character needs to be escaped. -needsEscaping :: Char -> Bool -needsEscaping c = c `elem` "&<>\"\160" - --- | Escape string as needed for XML.  Entity references are not preserved. -escapeStringForXML :: String -> String -escapeStringForXML ""  = "" -escapeStringForXML str =  -  case break needsEscaping str of -    (okay, "")     -> okay -    (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs  - --- | Return a text object with a string of formatted XML attributes.  -attributeList :: [(String, String)] -> Doc -attributeList = text .  concatMap  -  (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++  -  escapeStringForXML b ++ "\"")  - --- | Put the supplied contents between start and end tags of tagType, ---   with specified attributes and (if specified) indentation. -inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc -inTags isIndented tagType attribs contents =  -  let openTag = char '<' <> text tagType <> attributeList attribs <>  -                char '>' -      closeTag  = text "</" <> text tagType <> char '>' -  in  if isIndented -         then openTag $$ nest 2 contents $$ closeTag -         else openTag <> contents <> closeTag - --- | Return a self-closing tag of tagType with specified attributes -selfClosingTag :: String -> [(String, String)] -> Doc -selfClosingTag tagType attribs =  -  char '<' <> text tagType <> attributeList attribs <> text " />"  -  --- | Put the supplied contents between start and end tags of tagType. -inTagsSimple :: String -> Doc -> Doc -inTagsSimple tagType = inTags False tagType [] - --- | Put the supplied contents in indented block btw start and end tags. -inTagsIndented :: String -> Doc -> Doc -inTagsIndented tagType = inTags True tagType [] - --- --- Docbook writer --- - --- | Convert list of authors to a docbook <author> section -authorToDocbook :: [Char] -> Doc -authorToDocbook name = inTagsIndented "author" $  -  if ',' `elem` name -    then -- last name first -         let (lastname, rest) = break (==',') name  -             firstname = removeLeadingSpace rest in -         inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>  -         inTagsSimple "surname" (text $ escapeStringForXML lastname)  -    else -- last name last -         let namewords = words name -             lengthname = length namewords  -             (firstname, lastname) = case lengthname of -               0  -> ("","")  -               1  -> ("", name) -               n  -> (joinWithSep " " (take (n-1) namewords), last namewords) -          in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$  -             inTagsSimple "surname" (text $ escapeStringForXML lastname)  - --- | Convert Pandoc document to string in Docbook format. -writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc (Meta title authors date) blocks) =  -  let head     = if writerStandalone opts -                    then text (writerHeader opts) -                    else empty -      meta     = if writerStandalone opts -                    then inTagsIndented "articleinfo" $ -                         (inTagsSimple "title" (wrap opts title)) $$  -                         (vcat (map authorToDocbook authors)) $$  -                         (inTagsSimple "date" (text $ escapeStringForXML date))  -                    else empty -      elements = hierarchicalize blocks -      before   = writerIncludeBefore opts -      after    = writerIncludeAfter opts -      body     = (if null before then empty else text before) $$ -                 vcat (map (elementToDocbook opts) elements) $$ -                 (if null after then empty else text after) -      body'    = if writerStandalone opts -                   then inTagsIndented "article" (meta $$ body) -                   else body  -  in  render $ head $$ body' $$ text "" - --- | Convert an Element to Docbook. -elementToDocbook :: WriterOptions -> Element -> Doc -elementToDocbook opts (Blk block) = blockToDocbook opts block  -elementToDocbook opts (Sec title elements) = -  -- Docbook doesn't allow sections with no content, so insert some if needed -  let elements' = if null elements -                    then [Blk (Para [])] -                    else elements -  in  inTagsIndented "section" $ -      inTagsSimple "title" (wrap opts title) $$ -      vcat (map (elementToDocbook opts) elements')  - --- | Convert a list of Pandoc blocks to Docbook. -blocksToDocbook :: WriterOptions -> [Block] -> Doc -blocksToDocbook opts = vcat . map (blockToDocbook opts) - --- | Auxiliary function to convert Plain block to Para. -plainToPara (Plain x) = Para x -plainToPara x = x - --- | Convert a list of pairs of terms and definitions into a list of  --- Docbook varlistentrys. -deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc -deflistItemsToDocbook opts items =  -  vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items - --- | Convert a term and a list of blocks into a Docbook varlistentry. -deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc -deflistItemToDocbook opts term def = -  let def' = map plainToPara def -  in  inTagsIndented "varlistentry" $ -      inTagsIndented "term" (inlinesToDocbook opts term) $$ -      inTagsIndented "listitem" (blocksToDocbook opts def') - --- | Convert a list of lists of blocks to a list of Docbook list items. -listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items - --- | Convert a list of blocks into a Docbook list item. -listItemToDocbook :: WriterOptions -> [Block] -> Doc -listItemToDocbook opts item = -  inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item - --- | Convert a Pandoc block element to Docbook. -blockToDocbook :: WriterOptions -> Block -> Doc -blockToDocbook opts Null = empty -blockToDocbook opts (Plain lst) = wrap opts lst -blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst -blockToDocbook opts (BlockQuote blocks) = -  inTagsIndented "blockquote" $ blocksToDocbook opts blocks -blockToDocbook opts (CodeBlock str) =  -  text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>" -blockToDocbook opts (BulletList lst) =  -  inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst  -blockToDocbook opts (OrderedList _ []) = empty  -blockToDocbook opts (OrderedList (start, numstyle, numdelim) (first:rest)) = -  let attribs  = case numstyle of -                       DefaultStyle -> [] -                       Decimal      -> [("numeration", "arabic")] -                       UpperAlpha   -> [("numeration", "upperalpha")] -                       LowerAlpha   -> [("numeration", "loweralpha")] -                       UpperRoman   -> [("numeration", "upperroman")] -                       LowerRoman   -> [("numeration", "lowerroman")] -      items    = if start == 1 -                    then listItemsToDocbook opts (first:rest) -                    else (inTags True "listitem" [("override",show start)] -                         (blocksToDocbook opts $ map plainToPara first)) $$  -                         listItemsToDocbook opts rest  -  in  inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) =  -  inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst  -blockToDocbook opts (RawHtml str) = text str -- raw XML block  -blockToDocbook opts HorizontalRule = empty -- not semantic -blockToDocbook opts (Table caption aligns widths headers rows) = -  let alignStrings = map alignmentToString aligns -      captionDoc   = if null caption -                      then empty -                      else inTagsIndented "caption"  -                           (inlinesToDocbook opts caption) -      tableType    = if isEmpty captionDoc then "informaltable" else "table" -  in  inTagsIndented tableType $ captionDoc $$ -     (colHeadsToDocbook opts alignStrings widths headers) $$  -     (vcat $ map (tableRowToDocbook opts alignStrings) rows) - -colHeadsToDocbook opts alignStrings widths headers = -  let heads = zipWith3 (\align width item ->  -              tableItemToDocbook opts "th" align width item)  -              alignStrings widths headers -  in  inTagsIndented "tr" $ vcat heads - -alignmentToString alignment = case alignment of -                                 AlignLeft -> "left" -                                 AlignRight -> "right" -                                 AlignCenter -> "center" -                                 AlignDefault -> "left" - -tableRowToDocbook opts aligns cols = inTagsIndented "tr" $  -  vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols - -tableItemToDocbook opts tag align width item = -  let attrib = [("align", align)] ++  -               if width /= 0 -                  then [("style", "{width: " ++  -                        show (truncate (100*width)) ++ "%;}")] -                  else []  -  in  inTags True tag attrib $ vcat $ map (blockToDocbook opts) item - --- | Take list of inline elements and return wrapped doc. -wrap :: WriterOptions -> [Inline] -> Doc -wrap opts lst = if writerWrapText opts -                   then fsep $ map (inlinesToDocbook opts) (splitBy Space lst) -                   else inlinesToDocbook opts lst - --- | Convert a list of inline elements to Docbook. -inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst - --- | Convert an inline element to Docbook. -inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook opts (Str str) = text $ escapeStringForXML str  -inlineToDocbook opts (Emph lst) =  -  inTagsSimple "emphasis" $ inlinesToDocbook opts lst -inlineToDocbook opts (Strong lst) =  -  inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Strikeout lst) =  -  inTags False "emphasis" [("role", "strikethrough")] $ -  inlinesToDocbook opts lst -inlineToDocbook opts (Superscript lst) =  -  inTagsSimple "superscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (Subscript lst) =  -  inTagsSimple "subscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (Quoted _ lst) =  -  inTagsSimple "quote" $ inlinesToDocbook opts lst -inlineToDocbook opts Apostrophe = char '\'' -inlineToDocbook opts Ellipses = text "…" -inlineToDocbook opts EmDash = text "—"  -inlineToDocbook opts EnDash = text "–"  -inlineToDocbook opts (Code str) =  -  inTagsSimple "literal" $ text (escapeStringForXML str) -inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str) -inlineToDocbook opts (HtmlInline str) = empty -inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>"  -inlineToDocbook opts Space = char ' ' -inlineToDocbook opts (Link txt (src, tit)) = -  if isPrefixOf "mailto:" src -     then let src' = drop 7 src -              emailLink = inTagsSimple "email" $ text $  -                          escapeStringForXML $ src' -          in  if txt == [Code src'] -                 then emailLink -                 else inlinesToDocbook opts txt <+> char '(' <> emailLink <>  -                      char ')' -     else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt -inlineToDocbook opts (Image alt (src, tit)) =  -  let titleDoc = if null tit -                   then empty -                   else inTagsIndented "objectinfo" $ -                        inTagsIndented "title" (text $ escapeStringForXML tit) -  in  inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ -      titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]  -inlineToDocbook opts (Note contents) =  -  inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs deleted file mode 100644 index 7ec95d8ef..000000000 --- a/src/Text/Pandoc/Writers/HTML.hs +++ /dev/null @@ -1,458 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Writers.HTML  -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Conversion of 'Pandoc' documents to HTML. --} -module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where -import Text.Pandoc.Definition -import Text.Pandoc.ASCIIMathML -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Text.Pandoc.Shared -import Text.Regex ( mkRegex, matchRegex ) -import Numeric ( showHex ) -import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, intersperse ) -import qualified Data.Set as S -import Control.Monad.State -import Text.XHtml.Transitional - -data WriterState = WriterState -    { stNotes            :: [Html]       -- ^ List of notes -    , stIds              :: [String]     -- ^ List of header identifiers -    , stMath             :: Bool         -- ^ Math is used in document -    , stCSS              :: S.Set String -- ^ CSS to include in header -    } deriving Show - -defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stIds = [],  -                                  stMath = False, stCSS = S.empty} - --- Helpers to render HTML with the appropriate function. -render opts    = if writerWrapText opts then renderHtml else showHtml -renderFragment opts = if writerWrapText opts -                         then renderHtmlFragment -                         else showHtmlFragment - --- | Convert Pandoc document to Html string. -writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts =  -  if writerStandalone opts -     then render opts . writeHtml opts -     else renderFragment opts . writeHtml opts - --- | Convert Pandoc document to Html structure. -writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts (Pandoc (Meta tit authors date) blocks) =  -  let titlePrefix = writerTitlePrefix opts -      topTitle    = evalState (inlineListToHtml opts tit) defaultWriterState -      topTitle'   = if null titlePrefix -                        then topTitle -                        else titlePrefix +++ " - " +++ topTitle -      metadata    = thetitle topTitle' +++  -                    meta ! [httpequiv "Content-Type",  -                            content "text/html; charset=UTF-8"] +++ -                    meta ! [name "generator", content "pandoc"] +++ -                    (toHtmlFromList $  -                    map (\a -> meta ! [name "author", content a]) authors) +++ -                    (if null date -                       then noHtml -                       else meta ! [name "date", content date]) -      titleHeader = if writerStandalone opts && not (null tit) &&  -                    not (writerS5 opts) -                        then h1 ! [theclass "title"] $ topTitle -                        else noHtml -      headerBlocks = filter isHeaderBlock blocks -      ids          = uniqueIdentifiers $  -                     map (\(Header _ lst) -> lst) headerBlocks -      toc          = if writerTableOfContents opts  -                        then tableOfContents opts headerBlocks ids -                        else noHtml -      (blocks', newstate) =  -                     runState (blockListToHtml opts blocks) -                     (defaultWriterState {stIds = ids}) -      cssLines     = stCSS newstate -      css          = if S.null cssLines -                        then noHtml -                        else style ! [thetype "text/css"] $ primHtml $ -                             '\n':(unlines $ S.toList cssLines) -      math         = if stMath newstate -                        then case writerASCIIMathMLURL opts of -                                   Just path -> script !  [src path,  -                                                thetype "text/javascript"] $ -                                                noHtml -                                   Nothing   -> primHtml asciiMathMLScript -                        else noHtml -      head         = header $ metadata +++ math +++ css +++  -                              primHtml (writerHeader opts) -      notes        = reverse (stNotes newstate) -      before       = primHtml $ writerIncludeBefore opts -      after        = primHtml $ writerIncludeAfter opts -      thebody      = before +++ titleHeader +++ toc +++ blocks' +++ -                     footnoteSection opts notes +++ after -  in  if writerStandalone opts -         then head +++ body thebody -         else thebody - --- | Construct table of contents from list of header blocks and identifiers. --- Assumes there are as many identifiers as header blocks. -tableOfContents :: WriterOptions -> [Block] -> [String] -> Html -tableOfContents _ [] _ = noHtml -tableOfContents opts headers ids = -  let opts'        = opts { writerIgnoreNotes = True } -      contentsTree = hierarchicalize headers -      contents     = evalState (mapM (elementToListItem opts') contentsTree)  -                     (defaultWriterState {stIds = ids}) -  in  thediv ! [identifier "toc"] $ unordList contents - --- | Converts an Element to a list item for a table of contents, --- retrieving the appropriate identifier from state. -elementToListItem :: WriterOptions -> Element -> State WriterState Html -elementToListItem opts (Blk _) = return noHtml -elementToListItem opts (Sec headerText subsecs) = do -  st <- get -  let ids = stIds st -  let (id, rest) = if null ids -                      then ("", []) -                      else (head ids, tail ids) -  put $ st {stIds = rest} -  txt <- inlineListToHtml opts headerText -  subHeads <- mapM (elementToListItem opts) subsecs -  let subList = if null subHeads -                   then noHtml -                   else unordList subHeads  -  return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++  -           subList - --- | Convert list of Note blocks to a footnote <div>. --- Assumes notes are sorted. -footnoteSection :: WriterOptions -> [Html] -> Html -footnoteSection opts notes = -  if null notes  -     then noHtml -     else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) - --- | Obfuscate a "mailto:" link using Javascript. -obfuscateLink :: WriterOptions -> String -> String -> Html -obfuscateLink opts text src = -  let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$" -      src'       = map toLower src -  in  case (matchRegex emailRegex src') of -        (Just [name, domain]) -> -          let domain'  = substitute "." " dot " domain -              at'      = obfuscateChar '@' -              (linkText, altText) =  -                 if text == drop 7 src' -- autolink -                    then ("'<code>'+e+'</code>'", name ++ " at " ++ domain') -                    else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++  -                          domain' ++ ")") -          in  if writerStrictMarkdown opts -                then -- need to use primHtml or &'s are escaped to & in URL -                     primHtml $ "<a href=\"" ++ (obfuscateString src') -                     ++ "\">" ++ (obfuscateString text) ++ "</a>" -                else (script ! [thetype "text/javascript"] $ -                     primHtml ("\n<!--\nh='" ++  -                     obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++  -                     obfuscateString name ++ "';e=n+a+h;\n" ++ -                     "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++  -                     linkText  ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++   -                     noscript (primHtml $ obfuscateString altText) -        _ -> anchor ! [href src] $ primHtml text  -- malformed email - --- | Obfuscate character as entity. -obfuscateChar :: Char -> String -obfuscateChar char =  -  let num    = ord char -      numstr = if even num then show num else "x" ++ showHex num "" -  in  "&#" ++ numstr ++ ";" - --- | Obfuscate string using entities. -obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . decodeCharacterReferences - --- | True if character is a punctuation character (unicode). -isPunctuation :: Char -> Bool -isPunctuation c = -  let c' = ord c -  in  if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || -         c' >= 0xE000 && c' <= 0xE0FF -         then True -         else False - --- | Add CSS for document header. -addToCSS :: String -> State WriterState () -addToCSS item = do -  st <- get -  let current = stCSS st -  put $ st {stCSS = S.insert item current} - --- | Convert Pandoc inline list to plain text identifier. -inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier [] = "" -inlineListToIdentifier (x:xs) =  -  xAsText ++ inlineListToIdentifier xs -  where xAsText = case x of -          Str s          -> filter (\c -> c == '-' || not (isPunctuation c)) $ -                            concat $ intersperse "-" $ words $ map toLower s -          Emph lst       -> inlineListToIdentifier lst -          Strikeout lst  -> inlineListToIdentifier lst -          Superscript lst -> inlineListToIdentifier lst -          Subscript lst  -> inlineListToIdentifier lst -          Strong lst     -> inlineListToIdentifier lst -          Quoted _ lst   -> inlineListToIdentifier lst -          Code s         -> s -          Space          -> "-" -          EmDash         -> "-" -          EnDash         -> "-" -          Apostrophe     -> "" -          Ellipses       -> "" -          LineBreak      -> "-" -          TeX _          -> "" -          HtmlInline _   -> "" -          Link lst _     -> inlineListToIdentifier lst -          Image lst _    -> inlineListToIdentifier lst -          Note _         -> "" - --- | Return unique identifiers for list of inline lists. -uniqueIdentifiers :: [[Inline]] -> [String] -uniqueIdentifiers ls = -  let addIdentifier (nonuniqueIds, uniqueIds) l = -        let new = inlineListToIdentifier l -            matches = length $ filter (== new) nonuniqueIds -            new' = new ++ if matches > 0 then ("-" ++ show matches) else "" -        in  (new:nonuniqueIds, new':uniqueIds) -  in  reverse $ snd $ foldl addIdentifier ([],[]) ls - --- | Convert Pandoc block element to HTML. -blockToHtml :: WriterOptions -> Block -> State WriterState Html -blockToHtml opts Null = return $ noHtml  -blockToHtml opts (Plain lst) = inlineListToHtml opts lst -blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) -blockToHtml opts (RawHtml str) = return $ primHtml str -blockToHtml opts (HorizontalRule) = return $ hr -blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n")  -                       -- the final \n for consistency with Markdown.pl -blockToHtml opts (BlockQuote blocks) = -  -- in S5, treat list in blockquote specially -  -- if default is incremental, make it nonincremental;  -  -- otherwise incremental -  if writerS5 opts -     then let inc = not (writerIncremental opts) in -          case blocks of  -             [BulletList lst]  -> blockToHtml (opts {writerIncremental = inc}) -                                  (BulletList lst) -             [OrderedList attribs lst] ->  -                                  blockToHtml (opts {writerIncremental = inc}) -                                  (OrderedList attribs lst) -             otherwise         -> blockListToHtml opts blocks >>=  -                                  (return . blockquote) -     else blockListToHtml opts blocks >>= (return . blockquote) -blockToHtml opts (Header level lst) = do  -  contents <- inlineListToHtml opts lst -  st <- get -  let ids = stIds st -  let (id, rest) = if null ids -                      then ("", []) -                      else (head ids, tail ids) -  put $ st {stIds = rest} -  let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts) -                   then [] -                   else [identifier id] -  let contents'  = if writerTableOfContents opts -                      then anchor ! [href ("#TOC-" ++ id)] $ contents -                      else contents -  return $ case level of -              1 -> h1 contents' ! attribs -              2 -> h2 contents' ! attribs -              3 -> h3 contents' ! attribs -              4 -> h4 contents' ! attribs -              5 -> h5 contents' ! attribs -              6 -> h6 contents' ! attribs -              _ -> paragraph contents' ! attribs -blockToHtml opts (BulletList lst) = do -  contents <- mapM (blockListToHtml opts) lst -  let attribs = if writerIncremental opts -                   then [theclass "incremental"] -                   else [] -  return $ unordList ! attribs $ contents -blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do -  contents <- mapM (blockListToHtml opts) lst -  let numstyle' = camelCaseToHyphenated $ show numstyle -  let attribs = (if writerIncremental opts -                   then [theclass "incremental"] -                   else []) ++ -                (if startnum /= 1 -                   then [start startnum] -                   else []) ++ -                (if numstyle /= DefaultStyle -                   then [theclass numstyle'] -                   else []) -  if numstyle /= DefaultStyle -     then addToCSS $ "ol." ++ numstyle' ++  -                     " { list-style-type: " ++  -                     numstyle' ++ "; }" -     else return () -  return $ ordList ! attribs $ contents -blockToHtml opts (DefinitionList lst) = do -  contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term -                                       def' <- blockListToHtml opts def -                                       return $ (term', def')) lst -  let attribs = if writerIncremental opts -                   then [theclass "incremental"] -                   else [] -  return $ defList ! attribs $ contents -blockToHtml opts (Table capt aligns widths headers rows) = do -  let alignStrings = map alignmentToString aligns -  captionDoc <- if null capt -                   then return noHtml -                   else inlineListToHtml opts capt >>= return . caption -  colHeads <- colHeadsToHtml opts alignStrings  -                             widths headers -  rows' <- mapM (tableRowToHtml opts alignStrings) rows -  return $ table $ captionDoc +++ colHeads +++ rows' - -colHeadsToHtml opts alignStrings widths headers = do -  heads <- sequence $ zipWith3  -           (\align width item -> tableItemToHtml opts th align width item)  -           alignStrings widths headers -  return $ tr $ toHtmlFromList heads - -alignmentToString alignment = case alignment of -                                 AlignLeft    -> "left" -                                 AlignRight   -> "right" -                                 AlignCenter  -> "center" -                                 AlignDefault -> "left" - -tableRowToHtml opts aligns cols =  -  (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>= -  return . tr . toHtmlFromList - -tableItemToHtml opts tag align' width item = do -  contents <- blockListToHtml opts item -  let attrib = [align align'] ++  -               if width /= 0  -                  then [thestyle ("width: " ++ show (truncate (100*width)) ++  -                        "%;")] -                  else []  -  return $ tag ! attrib $ contents - -blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html -blockListToHtml opts lst =  -  mapM (blockToHtml opts) lst >>= return . toHtmlFromList - --- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html -inlineListToHtml opts lst =  -  mapM (inlineToHtml opts) lst >>= return . toHtmlFromList - --- | Convert Pandoc inline element to HTML. -inlineToHtml :: WriterOptions -> Inline -> State WriterState Html -inlineToHtml opts inline = -  case inline of   -    (Str str)        -> return $ stringToHtml str -    (Space)          -> return $ stringToHtml " " -    (LineBreak)      -> return $ br -    (EmDash)         -> return $ primHtmlChar "mdash" -    (EnDash)         -> return $ primHtmlChar "ndash" -    (Ellipses)       -> return $ primHtmlChar "hellip" -    (Apostrophe)     -> return $ primHtmlChar "rsquo" -    (Emph lst)       -> inlineListToHtml opts lst >>= return . emphasize -    (Strong lst)     -> inlineListToHtml opts lst >>= return . strong -    (Code str)       -> return $ thecode << str -    (Strikeout lst)  -> addToCSS  -                        ".strikeout { text-decoration: line-through; }" >>  -                        inlineListToHtml opts lst >>= -                        return . (thespan ! [theclass "strikeout"]) -    (Superscript lst) -> inlineListToHtml opts lst >>= return . sup -    (Subscript lst)   -> inlineListToHtml opts lst >>= return . sub -    (Quoted quoteType lst) -> -                        let (leftQuote, rightQuote) = case quoteType of -                              SingleQuote -> (primHtmlChar "lsquo",  -                                              primHtmlChar "rsquo") -                              DoubleQuote -> (primHtmlChar "ldquo",  -                                              primHtmlChar "rdquo") -                        in  do contents <- inlineListToHtml opts lst -                               return $ leftQuote +++ contents +++ rightQuote -    (TeX str)        -> (if writerUseASCIIMathML opts -                            then modify (\st -> st {stMath = True}) -                            else return ()) >> return (stringToHtml str) -    (HtmlInline str) -> return $ primHtml str  -    (Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src -> -                        return $ obfuscateLink opts str src -    (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do -                        linkText <- inlineListToHtml opts txt   -                        return $ obfuscateLink opts (show linkText) src -    (Link txt (src,tit)) -> do -                        linkText <- inlineListToHtml opts txt -                        return $ anchor ! ([href src] ++  -                                 if null tit then [] else [title tit]) $  -                                 linkText -    (Image txt (source,tit)) -> do -                        alternate <- inlineListToHtml opts txt -                        let alternate' = renderFragment opts alternate -                        let attributes = [src source] ++ -                                         (if null tit  -                                            then []  -                                            else [title tit]) ++  -                                         if null txt  -                                            then []  -                                            else [alt alternate'] -                        return $ image ! attributes  -                        -- note:  null title included, as in Markdown.pl  -    (Note contents)          -> do  -                        st <- get -                        let notes = stNotes st -                        let number = (length notes) + 1 -                        let ref = show number -                        htmlContents <- blockListToNote opts ref contents  -                        -- push contents onto front of notes -                        put $ st {stNotes = (htmlContents:notes)}  -                        return $ anchor ! [href ("#fn" ++ ref), -                                          theclass "footnoteRef", -                                          identifier ("fnref" ++ ref)] <<  -                                          sup << ref - -blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html -blockListToNote opts ref blocks = -  -- If last block is Para or Plain, include the backlink at the end of -  -- that block. Otherwise, insert a new Plain block with the backlink. -  let backlink = [HtmlInline $ " <a href=\"#fnref" ++ ref ++  -                 "\" class=\"footnoteBackLink\"" ++ -                 " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] -      blocks'  = if null blocks -                    then [] -                    else let lastBlock   = last blocks -                             otherBlocks = init blocks -                         in  case lastBlock of -                                  (Para lst)  -> otherBlocks ++ -                                                 [Para (lst ++ backlink)] -                                  (Plain lst) -> otherBlocks ++ -                                                 [Plain (lst ++ backlink)] -                                  _           -> otherBlocks ++ [lastBlock, -                                                 Plain backlink] -  in  do contents <- blockListToHtml opts blocks' -         return $ li ! [identifier ("fn" ++ ref)] $ contents - diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs deleted file mode 100644 index f64e06e24..000000000 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ /dev/null @@ -1,310 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Writers.LaTeX -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha  -   Portability : portable - -Conversion of 'Pandoc' format into LaTeX. --} -module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Printf ( printf ) -import Data.List ( (\\), isInfixOf, isSuffixOf, intersperse ) -import Data.Char ( toLower ) -import qualified Data.Set as S -import Control.Monad.State -import Text.PrettyPrint.HughesPJ hiding ( Str ) - -data WriterState =  -  WriterState { stIncludes :: S.Set String -- strings to include in header -              , stInNote   :: Bool         -- @True@ if we're in a note -              , stOLLevel  :: Int }        -- level of ordered list nesting  - --- | Add line to header. -addToHeader :: String -> State WriterState () -addToHeader str = do -  st <- get -  let includes = stIncludes st -  put st {stIncludes = S.insert str includes} - --- | Convert Pandoc to LaTeX. -writeLaTeX :: WriterOptions -> Pandoc -> String -writeLaTeX options document =  -  render $ evalState (pandocToLaTeX options document) $  -  WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1 }  - -pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToLaTeX options (Pandoc meta blocks) = do -  main     <- blockListToLaTeX blocks -  head     <- if writerStandalone options -                 then latexHeader options meta -                 else return empty -  let before = if null (writerIncludeBefore options) -                  then empty -                  else text (writerIncludeBefore options) -  let after  = if null (writerIncludeAfter options) -                  then empty -                  else text (writerIncludeAfter options) -  let body = before $$ main $$ after -  let toc  =  if writerTableOfContents options -                 then text "\\tableofcontents\n" -                 else empty  -  let foot = if writerStandalone options -                then text "\\end{document}" -                else empty  -  return $ head $$ toc $$ body $$ foot - --- | Insert bibliographic information into LaTeX header. -latexHeader :: WriterOptions -- ^ Options, including LaTeX header -            -> Meta          -- ^ Meta with bibliographic information -            -> State WriterState Doc -latexHeader options (Meta title authors date) = do -  titletext <- if null title -                  then return empty -                  else inlineListToLaTeX title >>= return . inCmd "title" -  headerIncludes <- get >>= return . S.toList . stIncludes -  let extras = text $ unlines headerIncludes -  let verbatim  = if "\\usepackage{fancyvrb}" `elem` headerIncludes -                     then text "\\VerbatimFootnotes % allows verbatim text in footnotes" -                     else empty -  let authorstext = text $ "\\author{" ++  -                    joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}" -  let datetext  = if date == "" -                     then empty  -                     else text $ "\\date{" ++ stringToLaTeX date ++ "}" -  let maketitle = if null title then empty else text "\\maketitle" -  let secnumline = if (writerNumberSections options) -                      then empty  -                      else text "\\setcounter{secnumdepth}{0}" -  let baseHeader = text $ writerHeader options -  let header     = baseHeader $$ extras -  return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$ -           datetext $$ text "\\begin{document}" $$ maketitle $$ text "" - --- escape things as needed for LaTeX - -stringToLaTeX :: String -> String -stringToLaTeX = escapeStringUsing latexEscapes -  where latexEscapes = backslashEscapes "{}$%&_#" ++  -                       [ ('^', "\\^{}") -                       , ('\\', "\\textbackslash{}") -                       , ('~', "\\ensuremath{\\sim}") -                       , ('|', "\\textbar{}") -                       , ('<', "\\textless{}") -                       , ('>', "\\textgreater{}") -                       ] - --- | Puts contents into LaTeX command. -inCmd :: String -> Doc -> Doc -inCmd cmd contents = char '\\' <> text cmd <> braces contents - --- | Remove all code elements from list of inline elements --- (because it's illegal to have verbatim inside some command arguments) -deVerb :: [Inline] -> [Inline] -deVerb [] = [] -deVerb ((Code str):rest) =  -  (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest) -deVerb (other:rest) = other:(deVerb rest) - --- | Convert Pandoc block element to LaTeX. -blockToLaTeX :: Block     -- ^ Block to convert -             -> State WriterState Doc -blockToLaTeX Null = return empty -blockToLaTeX (Plain lst) = wrapped inlineListToLaTeX lst >>= return  -blockToLaTeX (Para lst) =  -  wrapped inlineListToLaTeX lst >>= return . (<> char '\n') -blockToLaTeX (BlockQuote lst) = do -  contents <- blockListToLaTeX lst -  return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}" -blockToLaTeX (CodeBlock str) = do -  st <- get -  env <- if stInNote st -            then do addToHeader "\\usepackage{fancyvrb}" -                    return "Verbatim" -            else return "verbatim" -  return $ text ("\\begin{" ++ env ++ "}\n") <> text str <>  -           text ("\n\\end{" ++ env ++ "}") -blockToLaTeX (RawHtml str) = return empty -blockToLaTeX (BulletList lst) = do -  items <- mapM listItemToLaTeX lst -  return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}" -blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do -  st <- get -  let oldlevel = stOLLevel st -  put $ st {stOLLevel = oldlevel + 1} -  items <- mapM listItemToLaTeX lst -  modify (\st -> st {stOLLevel = oldlevel}) -  exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim -                 then do addToHeader "\\usepackage{enumerate}" -                         return $ char '[' <>  -                                  text (head (orderedListMarkers (1, numstyle, -                                  numdelim))) <> char ']' -                 else return empty -  let resetcounter = if start /= 1 && oldlevel <= 4 -                        then text $ "\\setcounter{enum" ++  -                             map toLower (toRomanNumeral oldlevel) ++ -                             "}{" ++ show (start - 1) ++ "}" -                        else empty  -  return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$ -           vcat items $$ text "\\end{enumerate}" -blockToLaTeX (DefinitionList lst) = do -  items <- mapM defListItemToLaTeX lst -  return $ text "\\begin{description}" $$ vcat items $$ -           text "\\end{description}" -blockToLaTeX HorizontalRule = return $ text $ -    "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n" -blockToLaTeX (Header level lst) = do -  txt <- inlineListToLaTeX (deVerb lst) -  return $ if (level > 0) && (level <= 3) -              then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++  -                   "section{") <> txt <> text "}\n" -              else txt <> char '\n' -blockToLaTeX (Table caption aligns widths heads rows) = do -  headers <- tableRowToLaTeX heads -  captionText <- inlineListToLaTeX caption -  rows' <- mapM tableRowToLaTeX rows -  let colWidths = map (printf "%.2f") widths -  let colDescriptors = concat $ zipWith -                                (\width align -> ">{\\PBS" ++  -                                (case align of  -                                       AlignLeft -> "\\raggedright" -                                       AlignRight -> "\\raggedleft" -                                       AlignCenter -> "\\centering" -                                       AlignDefault -> "\\raggedright") ++ -                                "\\hspace{0pt}}p{" ++ width ++  -                                "\\columnwidth}") -                                colWidths aligns -  let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$ -                  headers $$ text "\\hline" $$ vcat rows' $$  -                  text "\\end{tabular}"  -  let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}" -  addToHeader "\\usepackage{array}\n\ -    \% This is needed because raggedright in table elements redefines \\\\:\n\ -    \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\ -    \\\let\\PBS=\\PreserveBackslash" -  return $ if isEmpty captionText -              then centered tableBody <> char '\n' -              else text "\\begin{table}[h]" $$ centered tableBody $$  -                   inCmd "caption" captionText $$ text "\\end{table}\n"  - -blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat - -tableRowToLaTeX cols = mapM blockListToLaTeX cols >>=  -  return . ($$ text "\\\\") . foldl (\row item -> row $$ -  (if isEmpty row then empty else text " & ") <> item) empty - -listItemToLaTeX lst = blockListToLaTeX lst >>= return .  (text "\\item " $$) . -                      (nest 2) - -defListItemToLaTeX (term, def) = do -    term' <- inlineListToLaTeX $ deVerb term -    def'  <- blockListToLaTeX def -    return $ text "\\item[" <> term' <> text "]" $$ def' - --- | Convert list of inline elements to LaTeX. -inlineListToLaTeX :: [Inline]  -- ^ Inlines to convert -                  -> State WriterState Doc -inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat - -isQuoted :: Inline -> Bool -isQuoted (Quoted _ _) = True -isQuoted Apostrophe = True -isQuoted _ = False - --- | Convert inline element to LaTeX -inlineToLaTeX :: Inline    -- ^ Inline to convert -              -> State WriterState Doc -inlineToLaTeX (Emph lst) = -  inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph" -inlineToLaTeX (Strong lst) =  -  inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf"  -inlineToLaTeX (Strikeout lst) = do -  contents <- inlineListToLaTeX $ deVerb lst -  addToHeader "\\usepackage[normalem]{ulem}" -  return $ inCmd "sout" contents -inlineToLaTeX (Superscript lst) =  -  inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript" -inlineToLaTeX (Subscript lst) = do -  contents <- inlineListToLaTeX $ deVerb lst -  -- oddly, latex includes \textsuperscript but not \textsubscript -  -- so we have to define it: -  addToHeader "\\newcommand{\\textsubscript}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}" -  return $ inCmd "textsubscript" contents -inlineToLaTeX (Code str) = do -  st <- get -  if stInNote st -     then do addToHeader "\\usepackage{fancyvrb}" -     else return () -  let chr = ((enumFromTo '!' '~') \\ str) !! 0 -  return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] -inlineToLaTeX (Quoted SingleQuote lst) = do -  contents <- inlineListToLaTeX lst -  let s1 = if (not (null lst)) && (isQuoted (head lst)) -              then text "\\," -              else empty  -  let s2 = if (not (null lst)) && (isQuoted (last lst)) -              then text "\\," -              else empty -  return $ char '`' <> s1 <> contents <> s2 <> char '\'' -inlineToLaTeX (Quoted DoubleQuote lst) = do -  contents <- inlineListToLaTeX lst -  let s1 = if (not (null lst)) && (isQuoted (head lst)) -              then text "\\," -              else empty  -  let s2 = if (not (null lst)) && (isQuoted (last lst)) -              then text "\\," -              else empty -  return $ text "``" <> s1 <> contents <> s2 <> text "''" -inlineToLaTeX Apostrophe = return $ char '\'' -inlineToLaTeX EmDash = return $ text "---" -inlineToLaTeX EnDash = return $ text "--" -inlineToLaTeX Ellipses = return $ text "\\ldots{}" -inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str -inlineToLaTeX (TeX str) = return $ text str -inlineToLaTeX (HtmlInline str) = return empty -inlineToLaTeX (LineBreak) = return $ text "\\\\"  -inlineToLaTeX Space = return $ char ' ' -inlineToLaTeX (Link txt (src, _)) = do -  addToHeader "\\usepackage[breaklinks=true]{hyperref}" -  case txt of -        [Code x] | x == src ->  -- autolink -             do addToHeader "\\usepackage{url}"  -                return $ text $ "\\url{" ++ x ++ "}" -        _ -> do contents <- inlineListToLaTeX $ deVerb txt -                return $ text ("\\href{" ++ src ++ "}{") <> contents <>  -                         char '}' -inlineToLaTeX (Image alternate (source, tit)) = do -  addToHeader "\\usepackage{graphicx}" -  return $ text $ "\\includegraphics{" ++ source ++ "}"  -inlineToLaTeX (Note contents) = do -  st <- get -  put (st {stInNote = True}) -  contents' <- blockListToLaTeX contents -  modify (\st -> st {stInNote = False}) -  let rawnote = stripTrailingNewlines $ render contents' -  -- note: a \n before } is needed when note ends with a Verbatim environment -  let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote -  return $ text "%\n\\footnote{" <>  -           text rawnote <> (if optNewline then char '\n' else empty) <> char '}' diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs deleted file mode 100644 index 8e14c2bf0..000000000 --- a/src/Text/Pandoc/Writers/Man.hs +++ /dev/null @@ -1,293 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Writers.Man  -   Copyright   : Copyright (C) 2007 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Conversion of 'Pandoc' documents to groff man page format. - --} -module Text.Pandoc.Writers.Man ( writeMan) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared  -import Text.Printf ( printf ) -import Data.List ( isPrefixOf, drop, nub, intersperse ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State - -type Notes = [[Block]] -type Preprocessors = [String] -- e.g. "t" for tbl -type WriterState = (Notes, Preprocessors) - --- | Convert Pandoc to Man. -writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = render $ evalState (pandocToMan opts document) ([],[])  - --- | Return groff man representation of document. -pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToMan opts (Pandoc meta blocks) = do -  let before  = writerIncludeBefore opts -  let after   = writerIncludeAfter opts -  let before' = if null before then empty else text before -  let after'  = if null after then empty else text after -  (head, foot) <- metaToMan opts meta -  body <- blockListToMan opts blocks -  (notes, preprocessors) <- get -  let preamble = if null preprocessors || not (writerStandalone opts) -                    then empty -                    else text $ ".\\\" " ++ concat (nub preprocessors) -  notes' <- notesToMan opts (reverse notes) -  return $ preamble $$ head $$ before' $$ body $$ notes' $$ foot $$ after' - --- | Insert bibliographic information into Man header and footer. -metaToMan :: WriterOptions -- ^ Options, including Man header -          -> Meta          -- ^ Meta with bibliographic information -          -> State WriterState (Doc, Doc) -metaToMan options (Meta title authors date) = do -  titleText <- inlineListToMan options title -  let (cmdName, rest) = break (== ' ') $ render titleText -  let (title', section) = case reverse cmdName of -                            (')':d:'(':xs) | d `elem` ['0'..'9'] ->  -                                  (text (reverse xs), char d) -                            xs -> (text (reverse xs), doubleQuotes empty)                     -  let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $ -               splitBy '|' rest -  let head = (text ".TH") <+> title' <+> section <+>  -             doubleQuotes (text date) <+> hsep extras -  let foot = case length authors of -                0 -> empty -                1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors) -                2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors) -  return $ if writerStandalone options -              then (head, foot) -              else (empty, empty) - --- | Return man representation of notes. -notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToMan opts notes = -  if null notes -     then return empty -     else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=  -          return . (text ".SH NOTES" $$) . vcat - --- | Return man representation of a note. -noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToMan opts num note = do -  contents <- blockListToMan opts note -  let marker = text "\n.SS [" <> text (show num) <> char ']' -  return $ marker $$ contents  - --- | Association list of characters to escape. -manEscapes :: [(Char, String)] -manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\" - --- | Escape special characters for Man. -escapeString :: String -> String -escapeString = escapeStringUsing manEscapes - --- | Escape a literal (code) section for Man. -escapeCode :: String -> String -escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ") - --- | Convert Pandoc block element to man. -blockToMan :: WriterOptions -- ^ Options -                -> Block         -- ^ Block element -                -> State WriterState Doc  -blockToMan opts Null = return empty -blockToMan opts (Plain inlines) =  -  wrapIfNeeded opts (inlineListToMan opts) inlines -blockToMan opts (Para inlines) = do -  contents <- wrapIfNeeded opts (inlineListToMan opts) inlines -  return $ text ".PP" $$ contents  -blockToMan opts (RawHtml str) = return $ text str -blockToMan opts HorizontalRule = return $ text $ ".PP\n   *   *   *   *   *" -blockToMan opts (Header level inlines) = do -  contents <- inlineListToMan opts inlines -  let heading = case level of -                  1 -> ".SH " -                  _ -> ".SS " -  return $ text heading <> contents  -blockToMan opts (CodeBlock str) = return $ -  text ".PP" $$ text "\\f[CR]" $$  -  text ((unlines . map ("      " ++) . lines) (escapeCode str)) <> text "\\f[]" -blockToMan opts (BlockQuote blocks) = do   -  contents <- blockListToMan opts blocks -  return $ text ".RS" $$ contents $$ text ".RE" -blockToMan opts (Table caption alignments widths headers rows) =  -  let aligncode AlignLeft    = "l" -      aligncode AlignRight   = "r" -      aligncode AlignCenter  = "c" -      aligncode AlignDefault = "l" -  in do -  caption' <- inlineListToMan opts caption -  modify (\(notes, preprocessors) -> (notes, "t":preprocessors)) -  let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths  -  -- 78n default width - 8n indent = 70n -  let coldescriptions = text $ joinWithSep " "  -                        (zipWith (\align width -> aligncode align ++ width)  -                        alignments iwidths) ++ "." -  colheadings <- mapM (blockListToMan opts) headers -  let makeRow cols = text "T{" $$  -                     (vcat $ intersperse (text "T}@T{") cols) $$  -                     text "T}" -  let colheadings' = makeRow colheadings -  body <- mapM (\row -> do  -                         cols <- mapM (blockListToMan opts) row -                         return $ makeRow cols) rows -  return $ text ".PP" $$ caption' $$  -           text ".TS" $$ text "tab(@);" $$ coldescriptions $$  -           colheadings' $$ char '_' $$ vcat body $$ text ".TE" - -blockToMan opts (BulletList items) = do -  contents <- mapM (bulletListItemToMan opts) items -  return (vcat contents)  -blockToMan opts (OrderedList attribs items) = do -  let markers = take (length items) $ orderedListMarkers attribs   -  let indent = 1 + (maximum $ map length markers) -  contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ -              zip markers items   -  return (vcat contents) -blockToMan opts (DefinitionList items) = do   -  contents <- mapM (definitionListItemToMan opts) items -  return (vcat contents) - --- | Convert bullet list item (list of blocks) to man. -bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToMan opts [] = return empty -bulletListItemToMan opts ((Para first):rest) =  -  bulletListItemToMan opts ((Plain first):rest) -bulletListItemToMan opts ((Plain first):rest) = do -  first' <- blockToMan opts (Plain first)  -  rest' <- blockListToMan opts rest -  let first'' = text ".IP \\[bu] 2" $$ first' -  let rest''  = if null rest -                   then empty -                   else text ".RS 2" $$ rest' $$ text ".RE" -  return (first'' $$ rest'')  -bulletListItemToMan opts (first:rest) = do -  first' <- blockToMan opts first -  rest' <- blockListToMan opts rest -  return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" -  --- | Convert ordered list item (a list of blocks) to man. -orderedListItemToMan :: WriterOptions -- ^ options -                          -> String   -- ^ order marker for list item -                          -> Int      -- ^ number of spaces to indent -                          -> [Block]  -- ^ list item (list of blocks) -                          -> State WriterState Doc -orderedListItemToMan _ _ _ [] = return empty -orderedListItemToMan opts num indent ((Para first):rest) =  -  orderedListItemToMan opts num indent ((Plain first):rest) -orderedListItemToMan opts num indent (first:rest) = do -  first' <- blockToMan opts first -  rest' <- blockListToMan opts rest -  let num' = printf ("%" ++ show (indent - 1) ++ "s") num -  let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' -  let rest''  = if null rest -                   then empty -                   else text ".RS 4" $$ rest' $$ text ".RE" -  return $ first'' $$ rest''  - --- | Convert definition list item (label, list of blocks) to man. -definitionListItemToMan :: WriterOptions -                             -> ([Inline],[Block])  -                             -> State WriterState Doc -definitionListItemToMan opts (label, items) = do -  labelText <- inlineListToMan opts label -  contents <- if null items -                 then return empty -                 else do  -                        let (first, rest) = case items of -                             ((Para x):y) -> (Plain x,y) -                             (x:y)        -> (x,y) -                        rest' <- mapM (\item -> blockToMan opts item) -                                 rest >>= (return . vcat) -                        first' <- blockToMan opts first -                        return $ first' $$ text ".RS" $$ rest' $$ text ".RE" -  return $ text ".TP\n.B " <> labelText $+$ contents - --- | Convert list of Pandoc block elements to man. -blockListToMan :: WriterOptions -- ^ Options -                    -> [Block]       -- ^ List of block elements -                    -> State WriterState Doc  -blockListToMan opts blocks = -  mapM (blockToMan opts) blocks >>= (return . vcat) - --- | Convert list of Pandoc inline elements to man. -inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) - --- | Convert Pandoc inline element to man. -inlineToMan :: WriterOptions -> Inline -> State WriterState Doc -inlineToMan opts (Emph lst) = do  -  contents <- inlineListToMan opts lst -  return $ text "\\f[I]" <> contents <> text "\\f[]" -inlineToMan opts (Strong lst) = do -  contents <- inlineListToMan opts lst -  return $ text "\\f[B]" <> contents <> text "\\f[]" -inlineToMan opts (Strikeout lst) = do -  contents <- inlineListToMan opts lst -  return $ text "[STRIKEOUT:" <> contents <> char ']' -inlineToMan opts (Superscript lst) = do -  contents <- inlineListToMan opts lst -  return $ char '^' <> contents <> char '^' -inlineToMan opts (Subscript lst) = do -  contents <- inlineListToMan opts lst -  return $ char '~' <> contents <> char '~' -inlineToMan opts (Quoted SingleQuote lst) = do -  contents <- inlineListToMan opts lst -  return $ char '`' <> contents <> char '\'' -inlineToMan opts (Quoted DoubleQuote lst) = do -  contents <- inlineListToMan opts lst -  return $ text "\\[lq]" <> contents <> text "\\[rq]" -inlineToMan opts EmDash = return $ text "\\[em]" -inlineToMan opts EnDash = return $ text "\\[en]" -inlineToMan opts Apostrophe = return $ char '\'' -inlineToMan opts Ellipses = return $ text "\\&..." -inlineToMan opts (Code str) = -  return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]" -inlineToMan opts (Str str) = return $ text $ escapeString str -inlineToMan opts (TeX str) = return $ text $ escapeCode str -inlineToMan opts (HtmlInline str) = return $ text $ escapeCode str  -inlineToMan opts (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n" -inlineToMan opts Space = return $ char ' ' -inlineToMan opts (Link txt (src, _)) = do -  linktext <- inlineListToMan opts txt -  let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src -  return $ if txt == [Code srcSuffix] -              then char '<' <> text srcSuffix <> char '>'  -              else linktext <> text " (" <> text src <> char ')'  -inlineToMan opts (Image alternate (source, tit)) = do -  let txt = if (null alternate) || (alternate == [Str ""]) ||  -               (alternate == [Str source]) -- to prevent autolinks -               then [Str "image"] -               else alternate -  linkPart <- inlineToMan opts (Link txt (source, tit))  -  return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' -inlineToMan opts (Note contents) = do  -  modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state -  (notes, _) <- get -  let ref = show $ (length notes) -  return $ char '[' <> text ref <> char ']' - diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs deleted file mode 100644 index 4cecaae5d..000000000 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ /dev/null @@ -1,373 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Writers.Markdown  -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Conversion of 'Pandoc' documents to markdown-formatted plain text. - -Markdown:  <http://daringfireball.net/projects/markdown/> --} -module Text.Pandoc.Writers.Markdown ( writeMarkdown) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared  -import Text.Pandoc.Blocks -import Text.ParserCombinators.Parsec ( parse, (<|>), GenParser ) -import Data.List ( group, isPrefixOf, drop, find, intersperse ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State - -type Notes = [[Block]] -type Refs = KeyTable -type WriterState = (Notes, Refs) - --- | Convert Pandoc to Markdown. -writeMarkdown :: WriterOptions -> Pandoc -> String -writeMarkdown opts document =  -  render $ evalState (pandocToMarkdown opts document) ([],[])  - --- | Return markdown representation of document. -pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToMarkdown opts (Pandoc meta blocks) = do -  let before  = writerIncludeBefore opts -  let after   = writerIncludeAfter opts -  let before' = if null before then empty else text before -  let after'  = if null after then empty else text after -  metaBlock <- metaToMarkdown opts meta -  let head = if writerStandalone opts -                then metaBlock $+$ text (writerHeader opts) -                else empty -  let headerBlocks = filter isHeaderBlock blocks -  let toc = if writerTableOfContents opts  -               then tableOfContents opts headerBlocks -               else empty -  body <- blockListToMarkdown opts blocks -  (notes, _) <- get -  notes' <- notesToMarkdown opts (reverse notes) -  (_, refs) <- get  -- note that the notes may contain refs -  refs' <- keyTableToMarkdown opts (reverse refs) -  return $ head $+$ before' $+$ toc $+$ body $+$ text "" $+$  -           notes' $+$ text "" $+$ refs' $+$ after' - --- | Return markdown representation of reference key table. -keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc -keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat -  --- | Return markdown representation of a reference key.  -keyToMarkdown :: WriterOptions  -              -> ([Inline], (String, String))  -              -> State WriterState Doc -keyToMarkdown opts (label, (src, tit)) = do -  label' <- inlineListToMarkdown opts label -  let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\"" -  return $ text "  " <> char '[' <> label' <> char ']' <> text ": " <> -           text src <> tit'  - --- | Return markdown representation of notes. -notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToMarkdown opts notes =  -  mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=  -  return . vcat - --- | Return markdown representation of a note. -noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToMarkdown opts num blocks = do -  contents  <- blockListToMarkdown opts blocks -  let marker = text "[^" <> text (show num) <> text "]:" -  return $ hang marker (writerTabStop opts) contents  - --- | Escape special characters for Markdown. -escapeString :: String -> String -escapeString = escapeStringUsing markdownEscapes -  where markdownEscapes = ('\160', " "):(backslashEscapes "`<\\*_^~") - --- | Convert bibliographic information into Markdown header. -metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc -metaToMarkdown opts (Meta title authors date) = do -  title'   <- titleToMarkdown opts title -  authors' <- authorsToMarkdown authors -  date'    <- dateToMarkdown date -  return $ title' $+$ authors' $+$ date' - -titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -titleToMarkdown opts [] = return empty -titleToMarkdown opts lst = do -  contents <- inlineListToMarkdown opts lst -  return $ text "% " <> contents  - -authorsToMarkdown :: [String] -> State WriterState Doc -authorsToMarkdown [] = return empty -authorsToMarkdown lst = return $  -  text "% " <> text (joinWithSep ", " (map escapeString lst)) - -dateToMarkdown :: String -> State WriterState Doc -dateToMarkdown [] = return empty -dateToMarkdown str = return $ text "% " <> text (escapeString str) - --- | Construct table of contents from list of header blocks. -tableOfContents :: WriterOptions -> [Block] -> Doc  -tableOfContents opts headers = -  let opts' = opts { writerIgnoreNotes = True } -      contents = BulletList $ map elementToListItem $ hierarchicalize headers -  in  evalState (blockToMarkdown opts' contents) ([],[]) - --- | Converts an Element to a list item for a table of contents, -elementToListItem :: Element -> [Block] -elementToListItem (Blk _) = [] -elementToListItem (Sec headerText subsecs) = [Plain headerText] ++  -  if null subsecs -     then [] -     else [BulletList $ map elementToListItem subsecs] - --- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char st Char -olMarker = do (start, style, delim) <- anyOrderedListMarker -              if delim == Period &&  -                          (style == UpperAlpha || (style == UpperRoman && -                          start `elem` [1, 5, 10, 50, 100, 500, 1000])) -                          then spaceChar >> spaceChar -                          else spaceChar - --- | True if string begins with an ordered list marker -beginsWithOrderedListMarker :: String -> Bool -beginsWithOrderedListMarker str =  -  case parse olMarker "para start" str of -         Left  _  -> False  -         Right _  -> True - -wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -wrappedMarkdown opts inlines = do -  let chunks  = splitBy LineBreak inlines -  let chunks' = if null chunks -                   then [] -                   else (map (++ [Str "  "]) $ init chunks) ++ [last chunks] -  lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks' -  return $ vcat lns - --- | Convert Pandoc block element to markdown. -blockToMarkdown :: WriterOptions -- ^ Options -                -> Block         -- ^ Block element -                -> State WriterState Doc  -blockToMarkdown opts Null = return empty -blockToMarkdown opts (Plain inlines) =  -  wrappedMarkdown opts inlines -blockToMarkdown opts (Para inlines) = do -  contents <- wrappedMarkdown opts inlines -  -- escape if para starts with ordered list marker -  let esc = if (not (writerStrictMarkdown opts)) &&  -               beginsWithOrderedListMarker (render contents) -               then char '\\' -               else empty  -  return $ esc <> contents <> text "\n" -blockToMarkdown opts (RawHtml str) = return $ text str -blockToMarkdown opts HorizontalRule = return $ text "\n* * * * *\n" -blockToMarkdown opts (Header level inlines) = do -  contents <- inlineListToMarkdown opts inlines -  return $ text ((replicate level '#') ++ " ") <> contents <> text "\n" -blockToMarkdown opts (CodeBlock str) = return $ -  (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" -blockToMarkdown opts (BlockQuote blocks) = do -  contents <- blockListToMarkdown opts blocks -  return $ (vcat $ map (text . ("> " ++)) $ lines $ render contents) <>  -           text "\n" -blockToMarkdown opts (Table caption aligns widths headers rows) =  do -  caption' <- inlineListToMarkdown opts caption -  let caption'' = if null caption -                     then empty -                     else text "" $+$ (text "Table: " <> caption') -  headers' <- mapM (blockListToMarkdown opts) headers -  let widthsInChars = map (floor . (78 *)) widths -  let alignHeader alignment = case alignment of -                                AlignLeft    -> leftAlignBlock -                                AlignCenter  -> centerAlignBlock -                                AlignRight   -> rightAlignBlock -                                AlignDefault -> leftAlignBlock   -  let makeRow = hsepBlocks . (zipWith alignHeader aligns) .  -                (zipWith docToBlock widthsInChars) -  let head = makeRow headers' -  rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row -                            return $ makeRow cols) rows -  let tableWidth = sum widthsInChars -  let maxRowHeight = maximum $ map heightOfBlock (head:rows') -  let isMultilineTable = maxRowHeight > 1 -  let underline = hsep $  -                  map (\width -> text $ replicate width '-') widthsInChars -  let border = if isMultilineTable -                  then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-' -                  else empty -  let spacer = if isMultilineTable -                  then text "" -                  else empty -  let body = vcat $ intersperse spacer $ map blockToDoc rows' -  return $ (nest 2 $ border $+$ (blockToDoc head) $+$ underline $+$ body $+$  -                     border $+$ caption'') <> text "\n" -blockToMarkdown opts (BulletList items) = do -  contents <- mapM (bulletListItemToMarkdown opts) items -  return $ (vcat contents) <> text "\n" -blockToMarkdown opts (OrderedList attribs items) = do -  let markers  = orderedListMarkers attribs -  let markers' = map (\m -> if length m < 3 -                               then m ++ replicate (3 - length m) ' ' -                               else m) markers  -  contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ -              zip markers' items   -  return $ (vcat contents) <> text "\n" -blockToMarkdown opts (DefinitionList items) = do -  contents <- mapM (definitionListItemToMarkdown opts) items -  return $ (vcat contents) <> text "\n" - --- | Convert bullet list item (list of blocks) to markdown. -bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToMarkdown opts items = do -  contents <- blockListToMarkdown opts items -  return $ hang (text "-  ") (writerTabStop opts) contents - --- | Convert ordered list item (a list of blocks) to markdown. -orderedListItemToMarkdown :: WriterOptions -- ^ options -                          -> String        -- ^ list item marker -                          -> [Block]       -- ^ list item (list of blocks) -                          -> State WriterState Doc -orderedListItemToMarkdown opts marker items = do -  contents <- blockListToMarkdown opts items -  -- The complexities here are needed to ensure that if the list -  -- marker is 4 characters or longer, the second and following -  -- lines are indented 4 spaces but the list item begins after the marker. -  return $ sep [nest (min (3 - length marker) 0) (text marker),  -                nest (writerTabStop opts) contents] - --- | Convert definition list item (label, list of blocks) to markdown. -definitionListItemToMarkdown :: WriterOptions -                             -> ([Inline],[Block])  -                             -> State WriterState Doc -definitionListItemToMarkdown opts (label, items) = do -  labelText <- inlineListToMarkdown opts label -  let tabStop = writerTabStop opts -  let leader  = char ':' -  contents <- mapM (\item -> blockToMarkdown opts item >>=  -                   (\txt -> return (leader $$ nest tabStop txt))) -                   items >>= return . vcat -  return $ labelText $+$ contents - --- | Convert list of Pandoc block elements to markdown. -blockListToMarkdown :: WriterOptions -- ^ Options -                    -> [Block]       -- ^ List of block elements -                    -> State WriterState Doc  -blockListToMarkdown opts blocks = -  mapM (blockToMarkdown opts) blocks >>= return . vcat - --- | Get reference for target; if none exists, create unique one and return. ---   Prefer label if possible; otherwise, generate a unique key. -getReference :: [Inline] -> Target -> State WriterState [Inline] -getReference label (src, tit) = do -  (_,refs) <- get -  case find ((== (src, tit)) . snd) refs of -    Just (ref, _) -> return ref -    Nothing       -> do -      let label' = case find ((== label) . fst) refs of -                      Just _ -> -- label is used; generate numerical label -                                 case find (\n -> not (any (== [Str (show n)]) -                                           (map fst refs))) [1..10000] of -                                      Just x  -> [Str (show x)] -                                      Nothing -> error "no unique label" -                      Nothing -> label -      modify (\(notes, refs) -> (notes, (label', (src,tit)):refs)) -      return label' - --- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = -  mapM (inlineToMarkdown opts) lst >>= return . hcat - --- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc -inlineToMarkdown opts (Emph lst) = do  -  contents <- inlineListToMarkdown opts lst -  return $ char '*' <> contents <> char '*' -inlineToMarkdown opts (Strong lst) = do -  contents <- inlineListToMarkdown opts lst -  return $ text "**" <> contents <> text "**" -inlineToMarkdown opts (Strikeout lst) = do -  contents <- inlineListToMarkdown opts lst -  return $ text "~~" <> contents <> text "~~" -inlineToMarkdown opts (Superscript lst) = do -  contents <- inlineListToMarkdown opts lst -  let contents' = text $ substitute " " "\\ " $ render contents -  return $ char '^' <> contents' <> char '^' -inlineToMarkdown opts (Subscript lst) = do -  contents <- inlineListToMarkdown opts lst -  let contents' = text $ substitute " " "\\ " $ render contents -  return $ char '~' <> contents' <> char '~' -inlineToMarkdown opts (Quoted SingleQuote lst) = do -  contents <- inlineListToMarkdown opts lst -  return $ char '\'' <> contents <> char '\'' -inlineToMarkdown opts (Quoted DoubleQuote lst) = do -  contents <- inlineListToMarkdown opts lst -  return $ char '"' <> contents <> char '"' -inlineToMarkdown opts EmDash = return $ text "--" -inlineToMarkdown opts EnDash = return $ char '-' -inlineToMarkdown opts Apostrophe = return $ char '\'' -inlineToMarkdown opts Ellipses = return $ text "..." -inlineToMarkdown opts (Code str) = -  let tickGroups = filter (\s -> '`' `elem` s) $ group str  -      longest    = if null tickGroups -                     then 0 -                     else maximum $ map length tickGroups  -      marker     = replicate (longest + 1) '`'  -      spacer     = if (longest == 0) then "" else " " in -  return $ text (marker ++ spacer ++ str ++ spacer ++ marker) -inlineToMarkdown opts (Str str) = return $ text $ escapeString str -inlineToMarkdown opts (TeX str) = return $ text str -inlineToMarkdown opts (HtmlInline str) = return $ text str  -inlineToMarkdown opts (LineBreak) = return $ text "  \n" -inlineToMarkdown opts Space = return $ char ' ' -inlineToMarkdown opts (Link txt (src, tit)) = do -  linktext <- inlineListToMarkdown opts txt -  let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" -  let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src -  let useRefLinks = writerReferenceLinks opts -  let useAuto = null tit && txt == [Code srcSuffix] -  ref <- if useRefLinks then getReference txt (src, tit) else return [] -  reftext <- inlineListToMarkdown opts ref -  return $ if useAuto -              then char '<' <> text srcSuffix <> char '>'  -              else if useRefLinks -                      then let first  = char '[' <> linktext <> char ']' -                               second = if txt == ref -                                           then text "[]" -                                           else char '[' <> reftext <> char ']' -                           in  first <> second -                      else char '[' <> linktext <> char ']' <>  -                           char '(' <> text src <> linktitle <> char ')'  -inlineToMarkdown opts (Image alternate (source, tit)) = do -  let txt = if (null alternate) || (alternate == [Str ""]) ||  -               (alternate == [Str source]) -- to prevent autolinks -               then [Str "image"] -               else alternate -  linkPart <- inlineToMarkdown opts (Link txt (source, tit))  -  return $ char '!' <> linkPart -inlineToMarkdown opts (Note contents) = do  -  modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state -  (notes, _) <- get -  let ref = show $ (length notes) -  return $ text "[^" <> text ref <> char ']' diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs deleted file mode 100644 index ddcbf95c0..000000000 --- a/src/Text/Pandoc/Writers/RST.hs +++ /dev/null @@ -1,325 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Writers.RST  -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha -   Portability : portable - -Conversion of 'Pandoc' documents to reStructuredText. - -reStructuredText:  <http://docutils.sourceforge.net/rst.html> --} -module Text.Pandoc.Writers.RST ( writeRST) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared  -import Text.Pandoc.Blocks -import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State - -type Notes = [[Block]] -type Refs = KeyTable -type WriterState = (Notes, Refs, Refs) -- first Refs is links, second pictures - --- | Convert Pandoc to RST. -writeRST :: WriterOptions -> Pandoc -> String -writeRST opts document =  -  render $ evalState (pandocToRST opts document) ([],[],[])  - --- | Return RST representation of document. -pandocToRST :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToRST opts (Pandoc meta blocks) = do -  let before  = writerIncludeBefore opts -  let after   = writerIncludeAfter opts -      before' = if null before then empty else text before -      after'  = if null after then empty else text after -  metaBlock <- metaToRST opts meta -  let head = if (writerStandalone opts) -                then metaBlock $+$ text (writerHeader opts) -                else empty -  body <- blockListToRST opts blocks -  (notes, _, _) <- get -  notes' <- notesToRST opts (reverse notes) -  (_, refs, pics) <- get  -- note that the notes may contain refs -  refs' <- keyTableToRST opts (reverse refs) -  pics' <- pictTableToRST opts (reverse pics) -  return $ head $+$ before' $+$ body $+$ notes' $+$ text "" $+$ refs' $+$  -           pics' $+$ after' - --- | Return RST representation of reference key table. -keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc -keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat -  --- | Return RST representation of a reference key.  -keyToRST :: WriterOptions  -         -> ([Inline], (String, String))  -         -> State WriterState Doc -keyToRST opts (label, (src, tit)) = do -  label' <- inlineListToRST opts label -  let label'' = if ':' `elem` (render label') -                   then char '`' <> label' <> char '`' -                   else label' -  return $ text ".. _" <> label'' <> text ": " <> text src - --- | Return RST representation of notes. -notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToRST opts notes =  -  mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>=  -  return . vcat - --- | Return RST representation of a note. -noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToRST opts num note = do -  contents <- blockListToRST opts note -  let marker = text ".. [" <> text (show num) <> text "] " -  return $ hang marker 3 contents  - --- | Return RST representation of picture reference table. -pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc -pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat -  --- | Return RST representation of a picture substitution reference.  -pictToRST :: WriterOptions  -         -> ([Inline], (String, String))  -         -> State WriterState Doc -pictToRST opts (label, (src, _)) = do -  label' <- inlineListToRST opts label -  return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <> -           text src - --- | Take list of inline elements and return wrapped doc. -wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc -wrappedRST opts inlines = mapM (wrapIfNeeded opts (inlineListToRST opts)) -                          (splitBy LineBreak inlines) >>= return . vcat - --- | Escape special characters for RST. -escapeString :: String -> String -escapeString = escapeStringUsing (backslashEscapes "`\\|*_") - --- | Convert bibliographic information into RST header. -metaToRST :: WriterOptions -> Meta -> State WriterState Doc -metaToRST opts (Meta title authors date) = do -  title'   <- titleToRST opts title -  authors' <- authorsToRST authors -  date'    <- dateToRST date -  let toc  =  if writerTableOfContents opts -                 then text "" $+$ text ".. contents::" -                 else empty -  return $ title' $+$ authors' $+$ date' $+$ toc - -titleToRST :: WriterOptions -> [Inline] -> State WriterState Doc -titleToRST opts [] = return empty -titleToRST opts lst = do -  contents <- inlineListToRST opts lst -  let titleLength = length $ render contents -  let border = text (replicate titleLength '=') -  return $ border $+$ contents $+$ border <> text "\n" - -authorsToRST :: [String] -> State WriterState Doc -authorsToRST [] = return empty -authorsToRST (first:rest) = do -  rest' <- authorsToRST rest -  return $ (text ":Author: " <> text first) $+$ rest' - -dateToRST :: String -> State WriterState Doc -dateToRST [] = return empty -dateToRST str = return $ text ":Date: " <> text (escapeString str) - --- | Convert Pandoc block element to RST.  -blockToRST :: WriterOptions -- ^ Options -                -> Block         -- ^ Block element -                -> State WriterState Doc  -blockToRST opts Null = return empty -blockToRST opts (Plain inlines) = wrappedRST opts inlines -blockToRST opts (Para [TeX str]) = -  let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in -  return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str') -blockToRST opts (Para inlines) = do -  contents <- wrappedRST opts inlines -  return $ contents <> text "\n" -blockToRST opts (RawHtml str) =  -  let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in -  return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str') -blockToRST opts HorizontalRule = return $ text "--------------\n" -blockToRST opts (Header level inlines) = do -  contents <- inlineListToRST opts inlines -  let headerLength = length $ render contents -  let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) -  let border = text $ replicate headerLength headerChar -  return $ contents $+$ border <> text "\n" -blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$  -  (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" -blockToRST opts (BlockQuote blocks) = do -  contents <- blockListToRST opts blocks  -  return $ (nest (writerTabStop opts) contents) <> text "\n" -blockToRST opts (Table caption aligns widths headers rows) =  do -  caption' <- inlineListToRST opts caption -  let caption'' = if null caption -                     then empty -                     else text "" $+$ (text "Table: " <> caption') -  headers' <- mapM (blockListToRST opts) headers -  let widthsInChars = map (floor . (78 *)) widths -  let alignHeader alignment = case alignment of -                                AlignLeft    -> leftAlignBlock -                                AlignCenter  -> centerAlignBlock -                                AlignRight   -> rightAlignBlock -                                AlignDefault -> leftAlignBlock   -  let hpipeBlocks blocks = hcatBlocks [beg, middle, end]  -        where height = maximum (map heightOfBlock blocks) -              sep    = TextBlock 3 height (replicate height " | ") -              beg    = TextBlock 2 height (replicate height "| ") -              end    = TextBlock 2 height (replicate height " |") -              middle = hcatBlocks $ intersperse sep blocks -  let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars -  let head = makeRow headers' -  rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row -                            return $ makeRow cols) rows -  let tableWidth = sum widthsInChars -  let maxRowHeight = maximum $ map heightOfBlock (head:rows') -  let border ch = char '+' <> char ch <> -                  (hcat $ intersperse (char ch <> char '+' <> char ch) $  -                          map (\l -> text $ replicate l ch) widthsInChars) <> -                  char ch <> char '+' -  let body = vcat $ intersperse (border '-') $ map blockToDoc rows' -  return $ border '-' $+$ blockToDoc head $+$ border '=' $+$ body $+$  -           border '-' $$ caption'' $$ text "" -blockToRST opts (BulletList items) = do -  contents <- mapM (bulletListItemToRST opts) items -  -- ensure that sublists have preceding blank line -  return $ text "" $+$ vcat contents <> text "\n" -blockToRST opts (OrderedList (start, style, delim) items) = do -  let markers = if start == 1 && style == DefaultStyle && delim == DefaultDelim  -                   then take (length items) $ repeat "#." -                   else take (length items) $ orderedListMarkers  -                                              (start, style, delim) -  let maxMarkerLength = maximum $ map length markers -  let markers' = map (\m -> let s = maxMarkerLength - length m -                            in  m ++ replicate s ' ') markers -  contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $ -              zip markers' items   -  -- ensure that sublists have preceding blank line -  return $ text "" $+$ vcat contents <> text "\n" -blockToRST opts (DefinitionList items) = do -  contents <- mapM (definitionListItemToRST opts) items -  return $ (vcat contents) <> text "\n" - --- | Convert bullet list item (list of blocks) to RST. -bulletListItemToRST :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToRST opts items = do -  contents <- blockListToRST opts items -  return $ hang (text "- ") 3 contents - --- | Convert ordered list item (a list of blocks) to RST. -orderedListItemToRST :: WriterOptions -- ^ options -                          -> String   -- ^ marker for list item -                          -> [Block]  -- ^ list item (list of blocks) -                          -> State WriterState Doc -orderedListItemToRST opts marker items = do -  contents <- blockListToRST opts items -  return $ hang (text marker) (length marker + 1) contents  - --- | Convert defintion list item (label, list of blocks) to RST. -definitionListItemToRST :: WriterOptions -> ([Inline], [Block]) -> State WriterState Doc -definitionListItemToRST opts (label, items) = do -  label <- inlineListToRST opts label -  contents <- blockListToRST opts items -  return $ label $+$ nest (writerTabStop opts) contents - --- | Convert list of Pandoc block elements to RST. -blockListToRST :: WriterOptions -- ^ Options -                    -> [Block]       -- ^ List of block elements -                    -> State WriterState Doc  -blockListToRST opts blocks = -  mapM (blockToRST opts) blocks >>= return . vcat - --- | Convert list of Pandoc inline elements to RST. -inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat - --- | Convert Pandoc inline element to RST. -inlineToRST :: WriterOptions -> Inline -> State WriterState Doc -inlineToRST opts (Emph lst) = do  -  contents <- inlineListToRST opts lst -  return $ char '*' <> contents <> char '*' -inlineToRST opts (Strong lst) = do -  contents <- inlineListToRST opts lst -  return $ text "**" <> contents <> text "**" -inlineToRST opts (Strikeout lst) = do  -  contents <- inlineListToRST opts lst -  return $ text "[STRIKEOUT:" <> contents <> char ']' -inlineToRST opts (Superscript lst) = do  -  contents <- inlineListToRST opts lst -  return $ text "\\ :sup:`" <> contents <> text "`\\ " -inlineToRST opts (Subscript lst) = do  -  contents <- inlineListToRST opts lst -  return $ text "\\ :sub:`" <> contents <> text "`\\ " -inlineToRST opts (Quoted SingleQuote lst) = do -  contents <- inlineListToRST opts lst -  return $ char '\'' <> contents <> char '\'' -inlineToRST opts (Quoted DoubleQuote lst) = do -  contents <- inlineListToRST opts lst -  return $ char '"' <> contents <> char '"' -inlineToRST opts EmDash = return $ text "--" -inlineToRST opts EnDash = return $ char '-' -inlineToRST opts Apostrophe = return $ char '\'' -inlineToRST opts Ellipses = return $ text "..." -inlineToRST opts (Code str) = return $ text $ "``" ++ str ++ "``" -inlineToRST opts (Str str) = return $ text $ escapeString str -inlineToRST opts (TeX str) = return $ text str -inlineToRST opts (HtmlInline str) = return empty -inlineToRST opts (LineBreak) = return $ char ' ' -- RST doesn't have linebreaks  -inlineToRST opts Space = return $ char ' ' -inlineToRST opts (Link [Code str] (src, tit)) | src == str || -                                                src == "mailto:" ++ str = do -  let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src -  return $ text srcSuffix -inlineToRST opts (Link txt (src, tit)) = do -  let useReferenceLinks = writerReferenceLinks opts -  linktext <- inlineListToRST opts $ normalizeSpaces txt -  if useReferenceLinks -    then do (notes, refs, pics) <- get -            let refs' = if (txt, (src, tit)) `elem` refs -                           then refs -                           else (txt, (src, tit)):refs -            put (notes, refs', pics) -            return $ char '`' <> linktext <> text "`_" -    else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_" -inlineToRST opts (Image alternate (source, tit)) = do -  (notes, refs, pics) <- get -  let labelsUsed = map fst pics  -  let txt = if null alternate || alternate == [Str ""] ||  -               alternate `elem` labelsUsed -               then [Str $ "image" ++ show (length refs)] -               else alternate -  let pics' = if (txt, (source, tit)) `elem` pics -                 then pics -                 else (txt, (source, tit)):pics -  put (notes, refs, pics') -  label <- inlineListToRST opts txt -  return $ char '|' <> label <> char '|' -inlineToRST opts (Note contents) = do  -  -- add to notes in state -  modify (\(notes, refs, pics) -> (contents:notes, refs, pics)) -  (notes, _, _) <- get -  let ref = show $ (length notes) -  return $ text " [" <> text ref <> text "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs deleted file mode 100644 index 3bd5c63b2..000000000 --- a/src/Text/Pandoc/Writers/RTF.hs +++ /dev/null @@ -1,286 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA --} - -{- | -   Module      : Text.Pandoc.Writers.RTF -   Copyright   : Copyright (C) 2006-7 John MacFarlane -   License     : GNU GPL, version 2 or above  - -   Maintainer  : John MacFarlane <jgm@berkeley.edu> -   Stability   : alpha  -   Portability : portable - -Conversion of 'Pandoc' documents to RTF (rich text format). --} -module Text.Pandoc.Writers.RTF ( writeRTF ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Regex ( matchRegexAll, mkRegex ) -import Data.List ( isSuffixOf ) -import Data.Char ( ord ) - --- | Convert Pandoc to a string in rich text format. -writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc meta blocks) =  -  let head = if writerStandalone options -                then rtfHeader (writerHeader options) meta  -                else "" -      toc  = if writerTableOfContents options -                then tableOfContents $ filter isHeaderBlock blocks -                else ""  -      foot = if writerStandalone options then "\n}\n" else ""  -      body = writerIncludeBefore options ++  -             concatMap (blockToRTF 0 AlignDefault) blocks ++  -             writerIncludeAfter options -  in  head ++ toc ++ body ++ foot - --- | Construct table of contents from list of header blocks. -tableOfContents :: [Block] -> String  -tableOfContents headers = -  let contentsTree = hierarchicalize headers -  in  concatMap (blockToRTF 0 AlignDefault) $  -      [Header 1 [Str "Contents"],  -       BulletList (map elementToListItem contentsTree)] - -elementToListItem :: Element -> [Block] -elementToListItem (Blk _) = [] -elementToListItem (Sec sectext subsecs) = [Plain sectext] ++ -  if null subsecs -     then [] -     else [BulletList (map elementToListItem subsecs)] - --- | Convert unicode characters (> 127) into rich text format representation. -handleUnicode :: String -> String -handleUnicode [] = [] -handleUnicode (c:cs) = -  if ord c > 127  -     then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs -     else c:(handleUnicode cs) - --- | Escape special characters. -escapeSpecial :: String -> String -escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}")) - --- | Escape strings as needed for rich text format. -stringToRTF :: String -> String -stringToRTF = handleUnicode . escapeSpecial - --- | Escape things as needed for code block in RTF. -codeStringToRTF :: String -> String -codeStringToRTF str = joinWithSep "\\line\n" $ lines (stringToRTF str) - --- | Deal with raw LaTeX. -latexToRTF :: String -> String -latexToRTF str = "{\\cf1 " ++ (stringToRTF str) ++ "\\cf0 } " - --- | Make a paragraph with first-line indent, block indent, and space after. -rtfParSpaced :: Int       -- ^ space after (in twips) -             -> Int       -- ^ block indent (in twips) -             -> Int       -- ^ first line indent (relative to block) (in twips) -             -> Alignment -- ^ alignment -             -> String    -- ^ string with content -             -> String  -rtfParSpaced spaceAfter indent firstLineIndent alignment content =  -  let alignString = case alignment of -                           AlignLeft -> "\\ql " -                           AlignRight -> "\\qr " -                           AlignCenter -> "\\qc " -                           AlignDefault -> "\\ql " -  in  "{\\pard " ++ alignString ++ -      "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++  -      " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" - --- | Default paragraph.  -rtfPar :: Int       -- ^ block indent (in twips) -       -> Int       -- ^ first line indent (relative to block) (in twips) -       -> Alignment -- ^ alignment -       -> String    -- ^ string with content -       -> String  -rtfPar = rtfParSpaced 180  - --- | Compact paragraph (e.g. for compact list items). -rtfCompact ::  Int       -- ^ block indent (in twips) -           ->  Int       -- ^ first line indent (relative to block) (in twips) -           ->  Alignment -- ^ alignment -           ->  String    -- ^ string with content -           ->  String  -rtfCompact = rtfParSpaced 0  - --- number of twips to indent -indentIncrement = 720 -listIncrement = 360 - --- | Returns appropriate bullet list marker for indent level. -bulletMarker :: Int -> String -bulletMarker indent = case indent `mod` 720 of -                             0         -> "\\bullet " -                             otherwise -> "\\endash " - --- | Returns appropriate (list of) ordered list markers for indent level. -orderedMarkers :: Int -> ListAttributes -> [String] -orderedMarkers indent (start, style, delim) =  -  if style == DefaultStyle && delim == DefaultDelim -     then case indent `mod` 720 of -              0         -> orderedListMarkers (start, Decimal, Period) -              otherwise -> orderedListMarkers (start, LowerAlpha, Period) -     else orderedListMarkers (start, style, delim) - --- | Returns RTF header. -rtfHeader :: String    -- ^ header text -          -> Meta      -- ^ bibliographic information -          -> String -rtfHeader headerText (Meta title authors date) = -  let titletext = if null title -                     then ""  -                     else rtfPar 0 0 AlignCenter $ -                          "\\b \\fs36 " ++ inlineListToRTF title -      authorstext = if null authors -                       then ""  -                       else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $ -                                                    map stringToRTF authors)) -      datetext = if date == ""  -                    then "" -                    else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in -  let spacer = if null (titletext ++ authorstext ++ datetext) -                  then "" -                  else rtfPar 0 0 AlignDefault "" in -  headerText ++ titletext ++ authorstext ++ datetext ++ spacer - --- | Convert Pandoc block element to RTF. -blockToRTF :: Int       -- ^ indent level -           -> Alignment -- ^ alignment -           -> Block     -- ^ block to convert -           -> String -blockToRTF _ _ Null = "" -blockToRTF indent alignment (Plain lst) =  -  rtfCompact indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (Para lst) =  -  rtfPar indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (BlockQuote lst) =  -  concatMap (blockToRTF (indent + indentIncrement) alignment) lst  -blockToRTF indent _ (CodeBlock str) = -  rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawHtml str) = "" -blockToRTF indent alignment (BulletList lst) = spaceAtEnd $  -  concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $  -  zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $  -  concatMap (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule =  -  rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ -  "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst -blockToRTF indent alignment (Table caption aligns sizes headers rows) =  -  tableRowToRTF True indent aligns sizes headers ++  -  concatMap (tableRowToRTF False indent aligns sizes) rows ++ -  rtfPar indent 0 alignment (inlineListToRTF caption) - -tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String  -tableRowToRTF header indent aligns sizes cols = -  let columns = concat $ zipWith (tableItemToRTF indent) aligns cols -      totalTwips = 6 * 1440 -- 6 inches -      rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) -                                0 sizes -      cellDefs = map (\edge -> (if header -                                   then "\\clbrdrb\\brdrs" -                                   else "") ++ "\\cellx" ++ show edge) -                     rightEdges -      start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ -              "\\trkeep\\intbl\n{\n" -      end = "}\n\\intbl\\row}\n" -  in  start ++ columns ++ end - -tableItemToRTF :: Int -> Alignment -> [Block] -> String  -tableItemToRTF indent alignment item = -  let contents = concatMap (blockToRTF indent alignment) item -  in  "{\\intbl " ++ contents ++ "\\cell}\n" - --- | Ensure that there's the same amount of space after compact --- lists as after regular lists. -spaceAtEnd :: String -> String -spaceAtEnd str =  -  if isSuffixOf "\\par}\n" str -     then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" -     else str - --- | Convert list item (list of blocks) to RTF. -listItemToRTF :: Alignment  -- ^ alignment -              -> Int        -- ^ indent level -              -> String     -- ^ list start marker -              -> [Block]    -- ^ list item (list of blocks) -              -> [Char] -listItemToRTF alignment indent marker [] =  -  rtfCompact (indent + listIncrement) (0 - listIncrement) alignment  -             (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")  -listItemToRTF alignment indent marker list =  -  let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in -  -- insert the list marker into the (processed) first block -  let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of -                    Just (before, matched, after, _) ->  -                                before ++ "\\fi" ++ show (0 - listIncrement) ++  -                                " " ++ marker ++ "\\tx" ++  -                                show listIncrement ++ "\\tab" ++ after -                    Nothing -> first in -  modFirst ++ concat rest - --- | Convert definition list item (label, list of blocks) to RTF. -definitionListItemToRTF :: Alignment          -- ^ alignment -                        -> Int                -- ^ indent level -                        -> ([Inline],[Block]) -- ^ list item (list of blocks) -                        -> [Char] -definitionListItemToRTF alignment indent (label, items) = -  let labelText = blockToRTF indent alignment (Plain label) -      itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items -  in  labelText ++ itemsText  - --- | Convert list of inline items to RTF. -inlineListToRTF :: [Inline]   -- ^ list of inlines to convert -                -> String -inlineListToRTF lst = concatMap inlineToRTF lst - --- | Convert inline item to RTF. -inlineToRTF :: Inline         -- ^ inline to convert -            -> String -inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Quoted SingleQuote lst) =  -  "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) =  -  "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" -inlineToRTF Apostrophe = "\\u8217'" -inlineToRTF Ellipses = "\\u8230?" -inlineToRTF EmDash = "\\u8212-" -inlineToRTF EnDash = "\\u8211-" -inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} " -inlineToRTF (Str str) = stringToRTF str -inlineToRTF (TeX str) = latexToRTF str -inlineToRTF (HtmlInline str) = "" -inlineToRTF (LineBreak) = "\\line " -inlineToRTF Space = " " -inlineToRTF (Link text (src, tit)) =  -  "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++  -  "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" -inlineToRTF (Image alternate (source, tit)) =  -  "{\\cf1 [image: " ++ source ++ "]\\cf0}"  -inlineToRTF (Note contents) = -  "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++  -  (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"  | 
