aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2006-10-17 14:22:29 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2006-10-17 14:22:29 +0000
commitdf7b68225101966051f8b592a27127bf789eb81e (patch)
treea063e97ed58d0bdb2cbb5a95c3e8c1bcce54aa00 /src/Text/Pandoc
parente7dbfef4d8aa528d9245424e9c372e900a774c90 (diff)
downloadpandoc-df7b68225101966051f8b592a27127bf789eb81e.tar.gz
initial import
git-svn-id: https://pandoc.googlecode.com/svn/trunk@2 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/ASCIIMathML.hs14
-rw-r--r--src/Text/Pandoc/Definition.hs50
-rw-r--r--src/Text/Pandoc/HtmlEntities.hs306
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs434
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs585
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs582
-rw-r--r--src/Text/Pandoc/Readers/RST.hs644
-rw-r--r--src/Text/Pandoc/Shared.hs417
-rw-r--r--src/Text/Pandoc/UTF8.hs43
-rw-r--r--src/Text/Pandoc/Writers/DefaultHeaders.hs27
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs197
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs164
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs149
-rw-r--r--src/Text/Pandoc/Writers/RST.hs188
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs194
-rw-r--r--src/Text/Pandoc/Writers/S5.hs95
16 files changed, 4089 insertions, 0 deletions
diff --git a/src/Text/Pandoc/ASCIIMathML.hs b/src/Text/Pandoc/ASCIIMathML.hs
new file mode 100644
index 000000000..85d7c46b9
--- /dev/null
+++ b/src/Text/Pandoc/ASCIIMathML.hs
@@ -0,0 +1,14 @@
+----------------------------------------------------
+-- Do not edit this file by hand. Edit
+-- 'templates/ASCIIMathML.hs'
+-- and run ./fillTemplates.pl Text/Pandoc/ASCIIMathML.hs
+----------------------------------------------------
+
+-- | Definitions for use of Pandoc.ASCIIMathML in HTML.
+-- (See <http://www1.chapman.edu/~jipsen/mathml/asciimath.html>.)
+module Text.Pandoc.ASCIIMathML ( asciiMathMLScript ) where
+
+-- | String containing Pandoc.ASCIIMathML javascript.
+asciiMathMLScript :: String
+asciiMathMLScript = "<script type=\"text/javascript\">\n/* ASCIIMathML.js - copyright Peter Jipsen, released under the GPL\nSee http://www1.chapman.edu/~jipsen/mathml/asciimath.html/ */\n\nvar checkForMathML = true; // check if browser can display MathML\nvar notifyIfNoMathML = true; // display note if no MathML capability\nvar alertIfNoMathML = false; // show alert box if no MathML capability\nvar mathcolor = \"red\"; // change it to \"\" (to inherit) or any other color\nvar mathfontfamily = \"serif\"; // change to \"\" to inherit (works in IE) \n // or another family (e.g. \"arial\")\nvar displaystyle = true; // puts limits above and below large operators\nvar showasciiformulaonhover = true; // helps students learn ASCIIMath\nvar decimalsign = \".\"; // change to \",\" if you like, beware of `(1,2)`!\nvar AMdelimiter1 = \"`\", AMescape1 = \"\\\\\\\\`\"; // can use other characters\nvar AMdelimiter2 = \"$\", AMescape2 = \"\\\\\\\\\\\\$\", AMdelimiter2regexp = \"\\\\$\";\nvar doubleblankmathdelimiter = false; // if true, x+1 is equal to `x+1`\n // for IE this works only in <!-- -->\n//var separatetokens;// has been removed (email me if this is a problem)\nvar isIE = document.createElementNS==null;\n\nif (document.getElementById==null) \n alert(\"This webpage requires a recent browser such as\\\n\\nMozilla/Netscape 7+ or Internet Explorer 6+MathPlayer\")\n\n// all further global variables start with \"AM\"\n\nfunction AMcreateElementXHTML(t) {\n if (isIE) return document.createElement(t);\n else return document.createElementNS(\"http://www.w3.org/1999/xhtml\",t);\n}\n\nfunction AMnoMathMLNote() {\n var nd = AMcreateElementXHTML(\"h3\");\n nd.setAttribute(\"align\",\"center\")\n nd.appendChild(AMcreateElementXHTML(\"p\"));\n nd.appendChild(document.createTextNode(\"To view the \"));\n var an = AMcreateElementXHTML(\"a\");\n an.appendChild(document.createTextNode(\"ASCIIMathML\"));\n an.setAttribute(\"href\",\"http://www.chapman.edu/~jipsen/asciimath.html\");\n nd.appendChild(an);\n nd.appendChild(document.createTextNode(\" notation use Internet Explorer 6+\")); \n an = AMcreateElementXHTML(\"a\");\n an.appendChild(document.createTextNode(\"MathPlayer\"));\n an.setAttribute(\"href\",\"http://www.dessci.com/en/products/mathplayer/download.htm\");\n nd.appendChild(an);\n nd.appendChild(document.createTextNode(\" or Netscape/Mozilla/Firefox\"));\n nd.appendChild(AMcreateElementXHTML(\"p\"));\n return nd;\n}\n\nfunction AMisMathMLavailable() {\n var regex = /KHTML/; /* ASCIIMathML.js - copyright Peter Jipsen, released under the GPL\nSee http://www1.chapman.edu/~jipsen/mathml/asciimath.html/ */\n if (navigator.appName.slice(0,8)==\"Netscape\")\n if (navigator.appVersion.slice(0,1)>=\"5\" && !regex.test(navigator.userAgent)) return null;\n else return AMnoMathMLNote();\n else if (navigator.appName.slice(0,9)==\"Microsoft\")\n try {\n var ActiveX = new ActiveXObject(\"MathPlayer.Factory.1\");\n return null;\n } catch (e) {\n return AMnoMathMLNote();\n }\n else return AMnoMathMLNote();\n}\n\n// character lists for Mozilla/Netscape fonts\nvar AMcal = [0xEF35,0x212C,0xEF36,0xEF37,0x2130,0x2131,0xEF38,0x210B,0x2110,0xEF39,0xEF3A,0x2112,0x2133,0xEF3B,0xEF3C,0xEF3D,0xEF3E,0x211B,0xEF3F,0xEF40,0xEF41,0xEF42,0xEF43,0xEF44,0xEF45,0xEF46];\nvar AMfrk = [0xEF5D,0xEF5E,0x212D,0xEF5F,0xEF60,0xEF61,0xEF62,0x210C,0x2111,0xEF63,0xEF64,0xEF65,0xEF66,0xEF67,0xEF68,0xEF69,0xEF6A,0x211C,0xEF6B,0xEF6C,0xEF6D,0xEF6E,0xEF6F,0xEF70,0xEF71,0x2128];\nvar AMbbb = [0xEF8C,0xEF8D,0x2102,0xEF8E,0xEF8F,0xEF90,0xEF91,0x210D,0xEF92,0xEF93,0xEF94,0xEF95,0xEF96,0x2115,0xEF97,0x2119,0x211A,0x211D,0xEF98,0xEF99,0xEF9A,0xEF9B,0xEF9C,0xEF9D,0xEF9E,0x2124];\n\nvar CONST = 0, UNARY = 1, BINARY = 2, INFIX = 3, LEFTBRACKET = 4, \n RIGHTBRACKET = 5, SPACE = 6, UNDEROVER = 7, DEFINITION = 8,\n LEFTRIGHT = 9, TEXT = 10; // token types\n\nvar AMsqrt = {input:\"sqrt\", tag:\"msqrt\", output:\"sqrt\", tex:null, ttype:UNARY},\n AMroot = {input:\"root\", tag:\"mroot\", output:\"root\", tex:null, ttype:BINARY},\n AMfrac = {input:\"frac\", tag:\"mfrac\", output:\"/\", tex:null, ttype:BINARY},\n AMdiv = {input:\"/\", tag:\"mfrac\", output:\"/\", tex:null, ttype:INFIX},\n AMover = {input:\"stackrel\", tag:\"mover\", output:\"stackrel\", tex:null, ttype:BINARY},\n AMsub = {input:\"_\", tag:\"msub\", output:\"_\", tex:null, ttype:INFIX},\n AMsup = {input:\"^\", tag:\"msup\", output:\"^\", tex:null, ttype:INFIX},\n AMtext = {input:\"text\", tag:\"mtext\", output:\"text\", tex:null, ttype:TEXT},\n AMmbox = {input:\"mbox\", tag:\"mtext\", output:\"mbox\", tex:null, ttype:TEXT},\n AMquote = {input:\"\\\"\", tag:\"mtext\", output:\"mbox\", tex:null, ttype:TEXT};\n\nvar AMsymbols = [\n//some greek symbols\n{input:\"alpha\", tag:\"mi\", output:\"\\u03B1\", tex:null, ttype:CONST},\n{input:\"beta\", tag:\"mi\", output:\"\\u03B2\", tex:null, ttype:CONST},\n{input:\"chi\", tag:\"mi\", output:\"\\u03C7\", tex:null, ttype:CONST},\n{input:\"delta\", tag:\"mi\", output:\"\\u03B4\", tex:null, ttype:CONST},\n{input:\"Delta\", tag:\"mo\", output:\"\\u0394\", tex:null, ttype:CONST},\n{input:\"epsi\", tag:\"mi\", output:\"\\u03B5\", tex:\"epsilon\", ttype:CONST},\n{input:\"varepsilon\", tag:\"mi\", output:\"\\u025B\", tex:null, ttype:CONST},\n{input:\"eta\", tag:\"mi\", output:\"\\u03B7\", tex:null, ttype:CONST},\n{input:\"gamma\", tag:\"mi\", output:\"\\u03B3\", tex:null, ttype:CONST},\n{input:\"Gamma\", tag:\"mo\", output:\"\\u0393\", tex:null, ttype:CONST},\n{input:\"iota\", tag:\"mi\", output:\"\\u03B9\", tex:null, ttype:CONST},\n{input:\"kappa\", tag:\"mi\", output:\"\\u03BA\", tex:null, ttype:CONST},\n{input:\"lambda\", tag:\"mi\", output:\"\\u03BB\", tex:null, ttype:CONST},\n{input:\"Lambda\", tag:\"mo\", output:\"\\u039B\", tex:null, ttype:CONST},\n{input:\"mu\", tag:\"mi\", output:\"\\u03BC\", tex:null, ttype:CONST},\n{input:\"nu\", tag:\"mi\", output:\"\\u03BD\", tex:null, ttype:CONST},\n{input:\"omega\", tag:\"mi\", output:\"\\u03C9\", tex:null, ttype:CONST},\n{input:\"Omega\", tag:\"mo\", output:\"\\u03A9\", tex:null, ttype:CONST},\n{input:\"phi\", tag:\"mi\", output:\"\\u03C6\", tex:null, ttype:CONST},\n{input:\"varphi\", tag:\"mi\", output:\"\\u03D5\", tex:null, ttype:CONST},\n{input:\"Phi\", tag:\"mo\", output:\"\\u03A6\", tex:null, ttype:CONST},\n{input:\"pi\", tag:\"mi\", output:\"\\u03C0\", tex:null, ttype:CONST},\n{input:\"Pi\", tag:\"mo\", output:\"\\u03A0\", tex:null, ttype:CONST},\n{input:\"psi\", tag:\"mi\", output:\"\\u03C8\", tex:null, ttype:CONST},\n{input:\"Psi\", tag:\"mi\", output:\"\\u03A8\", tex:null, ttype:CONST},\n{input:\"rho\", tag:\"mi\", output:\"\\u03C1\", tex:null, ttype:CONST},\n{input:\"sigma\", tag:\"mi\", output:\"\\u03C3\", tex:null, ttype:CONST},\n{input:\"Sigma\", tag:\"mo\", output:\"\\u03A3\", tex:null, ttype:CONST},\n{input:\"tau\", tag:\"mi\", output:\"\\u03C4\", tex:null, ttype:CONST},\n{input:\"theta\", tag:\"mi\", output:\"\\u03B8\", tex:null, ttype:CONST},\n{input:\"vartheta\", tag:\"mi\", output:\"\\u03D1\", tex:null, ttype:CONST},\n{input:\"Theta\", tag:\"mo\", output:\"\\u0398\", tex:null, ttype:CONST},\n{input:\"upsilon\", tag:\"mi\", output:\"\\u03C5\", tex:null, ttype:CONST},\n{input:\"xi\", tag:\"mi\", output:\"\\u03BE\", tex:null, ttype:CONST},\n{input:\"Xi\", tag:\"mo\", output:\"\\u039E\", tex:null, ttype:CONST},\n{input:\"zeta\", tag:\"mi\", output:\"\\u03B6\", tex:null, ttype:CONST},\n\n//binary operation symbols\n{input:\"*\", tag:\"mo\", output:\"\\u22C5\", tex:\"cdot\", ttype:CONST},\n{input:\"**\", tag:\"mo\", output:\"\\u22C6\", tex:\"star\", ttype:CONST},\n{input:\"//\", tag:\"mo\", output:\"/\", tex:null, ttype:CONST},\n{input:\"\\\\\\\\\", tag:\"mo\", output:\"\\\\\", tex:\"backslash\", ttype:CONST},\n{input:\"setminus\", tag:\"mo\", output:\"\\\\\", tex:null, ttype:CONST},\n{input:\"xx\", tag:\"mo\", output:\"\\u00D7\", tex:\"times\", ttype:CONST},\n{input:\"-:\", tag:\"mo\", output:\"\\u00F7\", tex:\"divide\", ttype:CONST},\n{input:\"@\", tag:\"mo\", output:\"\\u2218\", tex:\"circ\", ttype:CONST},\n{input:\"o+\", tag:\"mo\", output:\"\\u2295\", tex:\"oplus\", ttype:CONST},\n{input:\"ox\", tag:\"mo\", output:\"\\u2297\", tex:\"otimes\", ttype:CONST},\n{input:\"o.\", tag:\"mo\", output:\"\\u2299\", tex:\"odot\", ttype:CONST},\n{input:\"sum\", tag:\"mo\", output:\"\\u2211\", tex:null, ttype:UNDEROVER},\n{input:\"prod\", tag:\"mo\", output:\"\\u220F\", tex:null, ttype:UNDEROVER},\n{input:\"^^\", tag:\"mo\", output:\"\\u2227\", tex:\"wedge\", ttype:CONST},\n{input:\"^^^\", tag:\"mo\", output:\"\\u22C0\", tex:\"bigwedge\", ttype:UNDEROVER},\n{input:\"vv\", tag:\"mo\", output:\"\\u2228\", tex:\"vee\", ttype:CONST},\n{input:\"vvv\", tag:\"mo\", output:\"\\u22C1\", tex:\"bigvee\", ttype:UNDEROVER},\n{input:\"nn\", tag:\"mo\", output:\"\\u2229\", tex:\"cap\", ttype:CONST},\n{input:\"nnn\", tag:\"mo\", output:\"\\u22C2\", tex:\"bigcap\", ttype:UNDEROVER},\n{input:\"uu\", tag:\"mo\", output:\"\\u222A\", tex:\"cup\", ttype:CONST},\n{input:\"uuu\", tag:\"mo\", output:\"\\u22C3\", tex:\"bigcup\", ttype:UNDEROVER},\n\n//binary relation symbols\n{input:\"!=\", tag:\"mo\", output:\"\\u2260\", tex:\"ne\", ttype:CONST},\n{input:\":=\", tag:\"mo\", output:\":=\", tex:null, ttype:CONST},\n{input:\"lt\", tag:\"mo\", output:\"<\", tex:null, ttype:CONST},\n{input:\"<=\", tag:\"mo\", output:\"\\u2264\", tex:\"le\", ttype:CONST},\n{input:\"lt=\", tag:\"mo\", output:\"\\u2264\", tex:\"leq\", ttype:CONST},\n{input:\">=\", tag:\"mo\", output:\"\\u2265\", tex:\"ge\", ttype:CONST},\n{input:\"geq\", tag:\"mo\", output:\"\\u2265\", tex:null, ttype:CONST},\n{input:\"-<\", tag:\"mo\", output:\"\\u227A\", tex:\"prec\", ttype:CONST},\n{input:\"-lt\", tag:\"mo\", output:\"\\u227A\", tex:null, ttype:CONST},\n{input:\">-\", tag:\"mo\", output:\"\\u227B\", tex:\"succ\", ttype:CONST},\n{input:\"-<=\", tag:\"mo\", output:\"\\u2AAF\", tex:\"preceq\", ttype:CONST},\n{input:\">-=\", tag:\"mo\", output:\"\\u2AB0\", tex:\"succeq\", ttype:CONST},\n{input:\"in\", tag:\"mo\", output:\"\\u2208\", tex:null, ttype:CONST},\n{input:\"!in\", tag:\"mo\", output:\"\\u2209\", tex:\"notin\", ttype:CONST},\n{input:\"sub\", tag:\"mo\", output:\"\\u2282\", tex:\"subset\", ttype:CONST},\n{input:\"sup\", tag:\"mo\", output:\"\\u2283\", tex:\"supset\", ttype:CONST},\n{input:\"sube\", tag:\"mo\", output:\"\\u2286\", tex:\"subseteq\", ttype:CONST},\n{input:\"supe\", tag:\"mo\", output:\"\\u2287\", tex:\"supseteq\", ttype:CONST},\n{input:\"-=\", tag:\"mo\", output:\"\\u2261\", tex:\"equiv\", ttype:CONST},\n{input:\"~=\", tag:\"mo\", output:\"\\u2245\", tex:\"cong\", ttype:CONST},\n{input:\"~~\", tag:\"mo\", output:\"\\u2248\", tex:\"approx\", ttype:CONST},\n{input:\"prop\", tag:\"mo\", output:\"\\u221D\", tex:\"propto\", ttype:CONST},\n\n//logical symbols\n{input:\"and\", tag:\"mtext\", output:\"and\", tex:null, ttype:SPACE},\n{input:\"or\", tag:\"mtext\", output:\"or\", tex:null, ttype:SPACE},\n{input:\"not\", tag:\"mo\", output:\"\\u00AC\", tex:\"neg\", ttype:CONST},\n{input:\"=>\", tag:\"mo\", output:\"\\u21D2\", tex:\"implies\", ttype:CONST},\n{input:\"if\", tag:\"mo\", output:\"if\", tex:null, ttype:SPACE},\n{input:\"<=>\", tag:\"mo\", output:\"\\u21D4\", tex:\"iff\", ttype:CONST},\n{input:\"AA\", tag:\"mo\", output:\"\\u2200\", tex:\"forall\", ttype:CONST},\n{input:\"EE\", tag:\"mo\", output:\"\\u2203\", tex:\"exists\", ttype:CONST},\n{input:\"_|_\", tag:\"mo\", output:\"\\u22A5\", tex:\"bot\", ttype:CONST},\n{input:\"TT\", tag:\"mo\", output:\"\\u22A4\", tex:\"top\", ttype:CONST},\n{input:\"|--\", tag:\"mo\", output:\"\\u22A2\", tex:\"vdash\", ttype:CONST},\n{input:\"|==\", tag:\"mo\", output:\"\\u22A8\", tex:\"models\", ttype:CONST},\n\n//grouping brackets\n{input:\"(\", tag:\"mo\", output:\"(\", tex:null, ttype:LEFTBRACKET},\n{input:\")\", tag:\"mo\", output:\")\", tex:null, ttype:RIGHTBRACKET},\n{input:\"[\", tag:\"mo\", output:\"[\", tex:null, ttype:LEFTBRACKET},\n{input:\"]\", tag:\"mo\", output:\"]\", tex:null, ttype:RIGHTBRACKET},\n{input:\"{\", tag:\"mo\", output:\"{\", tex:null, ttype:LEFTBRACKET},\n{input:\"}\", tag:\"mo\", output:\"}\", tex:null, ttype:RIGHTBRACKET},\n{input:\"|\", tag:\"mo\", output:\"|\", tex:null, ttype:LEFTRIGHT},\n//{input:\"||\", tag:\"mo\", output:\"||\", tex:null, ttype:LEFTRIGHT},\n{input:\"(:\", tag:\"mo\", output:\"\\u2329\", tex:\"langle\", ttype:LEFTBRACKET},\n{input:\":)\", tag:\"mo\", output:\"\\u232A\", tex:\"rangle\", ttype:RIGHTBRACKET},\n{input:\"<<\", tag:\"mo\", output:\"\\u2329\", tex:null, ttype:LEFTBRACKET},\n{input:\">>\", tag:\"mo\", output:\"\\u232A\", tex:null, ttype:RIGHTBRACKET},\n{input:\"{:\", tag:\"mo\", output:\"{:\", tex:null, ttype:LEFTBRACKET, invisible:true},\n{input:\":}\", tag:\"mo\", output:\":}\", tex:null, ttype:RIGHTBRACKET, invisible:true},\n\n//miscellaneous symbols\n{input:\"int\", tag:\"mo\", output:\"\\u222B\", tex:null, ttype:CONST},\n{input:\"dx\", tag:\"mi\", output:\"{:d x:}\", tex:null, ttype:DEFINITION},\n{input:\"dy\", tag:\"mi\", output:\"{:d y:}\", tex:null, ttype:DEFINITION},\n{input:\"dz\", tag:\"mi\", output:\"{:d z:}\", tex:null, ttype:DEFINITION},\n{input:\"dt\", tag:\"mi\", output:\"{:d t:}\", tex:null, ttype:DEFINITION},\n{input:\"oint\", tag:\"mo\", output:\"\\u222E\", tex:null, ttype:CONST},\n{input:\"del\", tag:\"mo\", output:\"\\u2202\", tex:\"partial\", ttype:CONST},\n{input:\"grad\", tag:\"mo\", output:\"\\u2207\", tex:\"nabla\", ttype:CONST},\n{input:\"+-\", tag:\"mo\", output:\"\\u00B1\", tex:\"pm\", ttype:CONST},\n{input:\"O/\", tag:\"mo\", output:\"\\u2205\", tex:\"emptyset\", ttype:CONST},\n{input:\"oo\", tag:\"mo\", output:\"\\u221E\", tex:\"infty\", ttype:CONST},\n{input:\"aleph\", tag:\"mo\", output:\"\\u2135\", tex:null, ttype:CONST},\n{input:\"...\", tag:\"mo\", output:\"...\", tex:\"ldots\", ttype:CONST},\n{input:\":.\", tag:\"mo\", output:\"\\u2234\", tex:\"therefore\", ttype:CONST},\n{input:\"/_\", tag:\"mo\", output:\"\\u2220\", tex:\"angle\", ttype:CONST},\n{input:\"\\\\ \", tag:\"mo\", output:\"\\u00A0\", tex:null, ttype:CONST},\n{input:\"quad\", tag:\"mo\", output:\"\\u00A0\\u00A0\", tex:null, ttype:CONST},\n{input:\"qquad\", tag:\"mo\", output:\"\\u00A0\\u00A0\\u00A0\\u00A0\", tex:null, ttype:CONST},\n{input:\"cdots\", tag:\"mo\", output:\"\\u22EF\", tex:null, ttype:CONST},\n{input:\"vdots\", tag:\"mo\", output:\"\\u22EE\", tex:null, ttype:CONST},\n{input:\"ddots\", tag:\"mo\", output:\"\\u22F1\", tex:null, ttype:CONST},\n{input:\"diamond\", tag:\"mo\", output:\"\\u22C4\", tex:null, ttype:CONST},\n{input:\"square\", tag:\"mo\", output:\"\\u25A1\", tex:null, ttype:CONST},\n{input:\"|__\", tag:\"mo\", output:\"\\u230A\", tex:\"lfloor\", ttype:CONST},\n{input:\"__|\", tag:\"mo\", output:\"\\u230B\", tex:\"rfloor\", ttype:CONST},\n{input:\"|~\", tag:\"mo\", output:\"\\u2308\", tex:\"lceiling\", ttype:CONST},\n{input:\"~|\", tag:\"mo\", output:\"\\u2309\", tex:\"rceiling\", ttype:CONST},\n{input:\"CC\", tag:\"mo\", output:\"\\u2102\", tex:null, ttype:CONST},\n{input:\"NN\", tag:\"mo\", output:\"\\u2115\", tex:null, ttype:CONST},\n{input:\"QQ\", tag:\"mo\", output:\"\\u211A\", tex:null, ttype:CONST},\n{input:\"RR\", tag:\"mo\", output:\"\\u211D\", tex:null, ttype:CONST},\n{input:\"ZZ\", tag:\"mo\", output:\"\\u2124\", tex:null, ttype:CONST},\n{input:\"f\", tag:\"mi\", output:\"f\", tex:null, ttype:UNARY, func:true},\n{input:\"g\", tag:\"mi\", output:\"g\", tex:null, ttype:UNARY, func:true},\n\n//standard functions\n{input:\"lim\", tag:\"mo\", output:\"lim\", tex:null, ttype:UNDEROVER},\n{input:\"Lim\", tag:\"mo\", output:\"Lim\", tex:null, ttype:UNDEROVER},\n{input:\"sin\", tag:\"mo\", output:\"sin\", tex:null, ttype:UNARY, func:true},\n{input:\"cos\", tag:\"mo\", output:\"cos\", tex:null, ttype:UNARY, func:true},\n{input:\"tan\", tag:\"mo\", output:\"tan\", tex:null, ttype:UNARY, func:true},\n{input:\"sinh\", tag:\"mo\", output:\"sinh\", tex:null, ttype:UNARY, func:true},\n{input:\"cosh\", tag:\"mo\", output:\"cosh\", tex:null, ttype:UNARY, func:true},\n{input:\"tanh\", tag:\"mo\", output:\"tanh\", tex:null, ttype:UNARY, func:true},\n{input:\"cot\", tag:\"mo\", output:\"cot\", tex:null, ttype:UNARY, func:true},\n{input:\"sec\", tag:\"mo\", output:\"sec\", tex:null, ttype:UNARY, func:true},\n{input:\"csc\", tag:\"mo\", output:\"csc\", tex:null, ttype:UNARY, func:true},\n{input:\"log\", tag:\"mo\", output:\"log\", tex:null, ttype:UNARY, func:true},\n{input:\"ln\", tag:\"mo\", output:\"ln\", tex:null, ttype:UNARY, func:true},\n{input:\"det\", tag:\"mo\", output:\"det\", tex:null, ttype:UNARY, func:true},\n{input:\"dim\", tag:\"mo\", output:\"dim\", tex:null, ttype:CONST},\n{input:\"mod\", tag:\"mo\", output:\"mod\", tex:null, ttype:CONST},\n{input:\"gcd\", tag:\"mo\", output:\"gcd\", tex:null, ttype:UNARY, func:true},\n{input:\"lcm\", tag:\"mo\", output:\"lcm\", tex:null, ttype:UNARY, func:true},\n{input:\"lub\", tag:\"mo\", output:\"lub\", tex:null, ttype:CONST},\n{input:\"glb\", tag:\"mo\", output:\"glb\", tex:null, ttype:CONST},\n{input:\"min\", tag:\"mo\", output:\"min\", tex:null, ttype:UNDEROVER},\n{input:\"max\", tag:\"mo\", output:\"max\", tex:null, ttype:UNDEROVER},\n\n//arrows\n{input:\"uarr\", tag:\"mo\", output:\"\\u2191\", tex:\"uparrow\", ttype:CONST},\n{input:\"darr\", tag:\"mo\", output:\"\\u2193\", tex:\"downarrow\", ttype:CONST},\n{input:\"rarr\", tag:\"mo\", output:\"\\u2192\", tex:\"rightarrow\", ttype:CONST},\n{input:\"->\", tag:\"mo\", output:\"\\u2192\", tex:\"to\", ttype:CONST},\n{input:\"|->\", tag:\"mo\", output:\"\\u21A6\", tex:\"mapsto\", ttype:CONST},\n{input:\"larr\", tag:\"mo\", output:\"\\u2190\", tex:\"leftarrow\", ttype:CONST},\n{input:\"harr\", tag:\"mo\", output:\"\\u2194\", tex:\"leftrightarrow\", ttype:CONST},\n{input:\"rArr\", tag:\"mo\", output:\"\\u21D2\", tex:\"Rightarrow\", ttype:CONST},\n{input:\"lArr\", tag:\"mo\", output:\"\\u21D0\", tex:\"Leftarrow\", ttype:CONST},\n{input:\"hArr\", tag:\"mo\", output:\"\\u21D4\", tex:\"Leftrightarrow\", ttype:CONST},\n\n//commands with argument\nAMsqrt, AMroot, AMfrac, AMdiv, AMover, AMsub, AMsup,\n{input:\"hat\", tag:\"mover\", output:\"\\u005E\", tex:null, ttype:UNARY, acc:true},\n{input:\"bar\", tag:\"mover\", output:\"\\u00AF\", tex:\"overline\", ttype:UNARY, acc:true},\n{input:\"vec\", tag:\"mover\", output:\"\\u2192\", tex:null, ttype:UNARY, acc:true},\n{input:\"dot\", tag:\"mover\", output:\".\", tex:null, ttype:UNARY, acc:true},\n{input:\"ddot\", tag:\"mover\", output:\"..\", tex:null, ttype:UNARY, acc:true},\n{input:\"ul\", tag:\"munder\", output:\"\\u0332\", tex:\"underline\", ttype:UNARY, acc:true},\nAMtext, AMmbox, AMquote,\n{input:\"bb\", tag:\"mstyle\", atname:\"fontweight\", atval:\"bold\", output:\"bb\", tex:null, ttype:UNARY},\n{input:\"mathbf\", tag:\"mstyle\", atname:\"fontweight\", atval:\"bold\", output:\"mathbf\", tex:null, ttype:UNARY},\n{input:\"sf\", tag:\"mstyle\", atname:\"fontfamily\", atval:\"sans-serif\", output:\"sf\", tex:null, ttype:UNARY},\n{input:\"mathsf\", tag:\"mstyle\", atname:\"fontfamily\", atval:\"sans-serif\", output:\"mathsf\", tex:null, ttype:UNARY},\n{input:\"bbb\", tag:\"mstyle\", atname:\"mathvariant\", atval:\"double-struck\", output:\"bbb\", tex:null, ttype:UNARY, codes:AMbbb},\n{input:\"mathbb\", tag:\"mstyle\", atname:\"mathvariant\", atval:\"double-struck\", output:\"mathbb\", tex:null, ttype:UNARY, codes:AMbbb},\n{input:\"cc\", tag:\"mstyle\", atname:\"mathvariant\", atval:\"script\", output:\"cc\", tex:null, ttype:UNARY, codes:AMcal},\n{input:\"mathcal\", tag:\"mstyle\", atname:\"mathvariant\", atval:\"script\", output:\"mathcal\", tex:null, ttype:UNARY, codes:AMcal},\n{input:\"tt\", tag:\"mstyle\", atname:\"fontfamily\", atval:\"monospace\", output:\"tt\", tex:null, ttype:UNARY},\n{input:\"mathtt\", tag:\"mstyle\", atname:\"fontfamily\", atval:\"monospace\", output:\"mathtt\", tex:null, ttype:UNARY},\n{input:\"fr\", tag:\"mstyle\", atname:\"mathvariant\", atval:\"fraktur\", output:\"fr\", tex:null, ttype:UNARY, codes:AMfrk},\n{input:\"mathfrak\", tag:\"mstyle\", atname:\"mathvariant\", atval:\"fraktur\", output:\"mathfrak\", tex:null, ttype:UNARY, codes:AMfrk}\n];\n\nfunction compareNames(s1,s2) {\n if (s1.input > s2.input) return 1\n else return -1;\n}\n\nvar AMnames = []; //list of input symbols\n\nfunction AMinitSymbols() {\n var texsymbols = [], i;\n for (i=0; i<AMsymbols.length; i++)\n if (AMsymbols[i].tex) \n texsymbols[texsymbols.length] = {input:AMsymbols[i].tex, \n tag:AMsymbols[i].tag, output:AMsymbols[i].output, ttype:AMsymbols[i].ttype};\n AMsymbols = AMsymbols.concat(texsymbols);\n AMsymbols.sort(compareNames);\n for (i=0; i<AMsymbols.length; i++) AMnames[i] = AMsymbols[i].input;\n}\n\nvar AMmathml = \"http://www.w3.org/1998/Math/MathML\";\n\nfunction AMcreateElementMathML(t) {\n if (isIE) return document.createElement(\"m:\"+t);\n else return document.createElementNS(AMmathml,t);\n}\n\nfunction AMcreateMmlNode(t,frag) {\n// var node = AMcreateElementMathML(name);\n if (isIE) var node = document.createElement(\"m:\"+t);\n else var node = document.createElementNS(AMmathml,t);\n node.appendChild(frag);\n return node;\n}\n\nfunction newcommand(oldstr,newstr) {\n AMsymbols = AMsymbols.concat([{input:oldstr, tag:\"mo\", output:newstr, \n tex:null, ttype:DEFINITION}]);\n}\n\nfunction AMremoveCharsAndBlanks(str,n) {\n//remove n characters and any following blanks\n var st;\n if (str.charAt(n)==\"\\\\\" && str.charAt(n+1)!=\"\\\\\" && str.charAt(n+1)!=\" \") \n st = str.slice(n+1);\n else st = str.slice(n);\n for (var i=0; i<st.length && st.charCodeAt(i)<=32; i=i+1);\n return st.slice(i);\n}\n\nfunction AMposition(arr, str, n) { \n// return position >=n where str appears or would be inserted\n// assumes arr is sorted\n if (n==0) {\n var h,m;\n n = -1;\n h = arr.length;\n while (n+1<h) {\n m = (n+h) >> 1;\n if (arr[m]<str) n = m; else h = m;\n }\n return h;\n } else\n for (var i=n; i<arr.length && arr[i]<str; i++);\n return i; // i=arr.length || arr[i]>=str\n}\n\nfunction AMgetSymbol(str) {\n//return maximal initial substring of str that appears in names\n//return null if there is none\n var k = 0; //new pos\n var j = 0; //old pos\n var mk; //match pos\n var st;\n var tagst;\n var match = \"\";\n var more = true;\n for (var i=1; i<=str.length && more; i++) {\n st = str.slice(0,i); //initial substring of length i\n j = k;\n k = AMposition(AMnames, st, j);\n if (k<AMnames.length && str.slice(0,AMnames[k].length)==AMnames[k]){\n match = AMnames[k];\n mk = k;\n i = match.length;\n }\n more = k<AMnames.length && str.slice(0,AMnames[k].length)>=AMnames[k];\n }\n AMpreviousSymbol=AMcurrentSymbol;\n if (match!=\"\"){\n AMcurrentSymbol=AMsymbols[mk].ttype;\n return AMsymbols[mk]; \n }\n// if str[0] is a digit or - return maxsubstring of digits.digits\n AMcurrentSymbol=CONST;\n k = 1;\n st = str.slice(0,1);\n var integ = true;\n while (\"0\"<=st && st<=\"9\" && k<=str.length) {\n st = str.slice(k,k+1);\n k++;\n }\n if (st == decimalsign) {\n st = str.slice(k,k+1);\n if (\"0\"<=st && st<=\"9\") {\n integ = false;\n k++;\n while (\"0\"<=st && st<=\"9\" && k<=str.length) {\n st = str.slice(k,k+1);\n k++;\n }\n }\n }\n if ((integ && k>1) || k>2) {\n st = str.slice(0,k-1);\n tagst = \"mn\";\n } else {\n k = 2;\n st = str.slice(0,1); //take 1 character\n tagst = ((\"A\">st || st>\"Z\") && (\"a\">st || st>\"z\")?\"mo\":\"mi\");\n }\n if (st==\"-\" && AMpreviousSymbol==INFIX) {\n AMcurrentSymbol = INFIX; //trick \"/\" into recognizing \"-\" on second parse\n return {input:st, tag:tagst, output:st, ttype:UNARY, func:true};\n }\n return {input:st, tag:tagst, output:st, ttype:CONST};\n}\n\nfunction AMremoveBrackets(node) {\n var st;\n if (node.nodeName==\"mrow\") {\n st = node.firstChild.firstChild.nodeValue;\n if (st==\"(\" || st==\"[\" || st==\"{\") node.removeChild(node.firstChild);\n }\n if (node.nodeName==\"mrow\") {\n st = node.lastChild.firstChild.nodeValue;\n if (st==\")\" || st==\"]\" || st==\"}\") node.removeChild(node.lastChild);\n }\n}\n\n/* ASCIIMathML.js - copyright Peter Jipsen, released under the GPL\nSee http://www1.chapman.edu/~jipsen/mathml/asciimath.html/ */\n\nvar AMnestingDepth,AMpreviousSymbol,AMcurrentSymbol;\n\nfunction AMparseSexpr(str) { //parses str and returns [node,tailstr]\n var symbol, node, result, i, st,// rightvert = false,\n newFrag = document.createDocumentFragment();\n str = AMremoveCharsAndBlanks(str,0);\n symbol = AMgetSymbol(str); //either a token or a bracket or empty\n if (symbol == null || symbol.ttype == RIGHTBRACKET && AMnestingDepth > 0) {\n return [null,str];\n }\n if (symbol.ttype == DEFINITION) {\n str = symbol.output+AMremoveCharsAndBlanks(str,symbol.input.length); \n symbol = AMgetSymbol(str);\n }\n switch (symbol.ttype) {\n case UNDEROVER:\n case CONST:\n str = AMremoveCharsAndBlanks(str,symbol.input.length); \n return [AMcreateMmlNode(symbol.tag, //its a constant\n document.createTextNode(symbol.output)),str];\n case LEFTBRACKET: //read (expr+)\n AMnestingDepth++;\n str = AMremoveCharsAndBlanks(str,symbol.input.length); \n result = AMparseExpr(str,true);\n AMnestingDepth--;\n if (typeof symbol.invisible == \"boolean\" && symbol.invisible) \n node = AMcreateMmlNode(\"mrow\",result[0]);\n else {\n node = AMcreateMmlNode(\"mo\",document.createTextNode(symbol.output));\n node = AMcreateMmlNode(\"mrow\",node);\n node.appendChild(result[0]);\n }\n return [node,result[1]];\n case TEXT:\n if (symbol!=AMquote) str = AMremoveCharsAndBlanks(str,symbol.input.length);\n if (str.charAt(0)==\"{\") i=str.indexOf(\"}\");\n else if (str.charAt(0)==\"(\") i=str.indexOf(\")\");\n else if (str.charAt(0)==\"[\") i=str.indexOf(\"]\");\n else if (symbol==AMquote) i=str.slice(1).indexOf(\"\\\"\")+1;\n else i = 0;\n if (i==-1) i = str.length;\n st = str.slice(1,i);\n if (st.charAt(0) == \" \") {\n node = AMcreateElementMathML(\"mspace\");\n node.setAttribute(\"width\",\"1ex\");\n newFrag.appendChild(node);\n }\n newFrag.appendChild(\n AMcreateMmlNode(symbol.tag,document.createTextNode(st)));\n if (st.charAt(st.length-1) == \" \") {\n node = AMcreateElementMathML(\"mspace\");\n node.setAttribute(\"width\",\"1ex\");\n newFrag.appendChild(node);\n }\n str = AMremoveCharsAndBlanks(str,i+1);\n return [AMcreateMmlNode(\"mrow\",newFrag),str];\n case UNARY:\n str = AMremoveCharsAndBlanks(str,symbol.input.length); \n result = AMparseSexpr(str);\n if (result[0]==null) return [AMcreateMmlNode(symbol.tag,\n document.createTextNode(symbol.output)),str];\n if (typeof symbol.func == \"boolean\" && symbol.func) { // functions hack\n st = str.charAt(0);\n if (st==\"^\" || st==\"_\" || st==\"/\" || st==\"|\" || st==\",\") {\n return [AMcreateMmlNode(symbol.tag,\n document.createTextNode(symbol.output)),str];\n } else {\n node = AMcreateMmlNode(\"mrow\",\n AMcreateMmlNode(symbol.tag,document.createTextNode(symbol.output)));\n node.appendChild(result[0]);\n return [node,result[1]];\n }\n }\n AMremoveBrackets(result[0]);\n if (symbol.input == \"sqrt\") { // sqrt\n return [AMcreateMmlNode(symbol.tag,result[0]),result[1]];\n } else if (typeof symbol.acc == \"boolean\" && symbol.acc) { // accent\n node = AMcreateMmlNode(symbol.tag,result[0]);\n node.appendChild(AMcreateMmlNode(\"mo\",document.createTextNode(symbol.output)));\n return [node,result[1]];\n } else { // font change command\n if (!isIE && typeof symbol.codes != \"undefined\") {\n for (i=0; i<result[0].childNodes.length; i++)\n if (result[0].childNodes[i].nodeName==\"mi\" || result[0].nodeName==\"mi\") {\n st = (result[0].nodeName==\"mi\"?result[0].firstChild.nodeValue:\n result[0].childNodes[i].firstChild.nodeValue);\n var newst = [];\n for (var j=0; j<st.length; j++)\n if (st.charCodeAt(j)>64 && st.charCodeAt(j)<91) newst = newst +\n String.fromCharCode(symbol.codes[st.charCodeAt(j)-65]);\n else newst = newst + st.charAt(j);\n if (result[0].nodeName==\"mi\")\n result[0]=AMcreateElementMathML(\"mo\").\n appendChild(document.createTextNode(newst));\n else result[0].replaceChild(AMcreateElementMathML(\"mo\").\n appendChild(document.createTextNode(newst)),result[0].childNodes[i]);\n }\n }\n node = AMcreateMmlNode(symbol.tag,result[0]);\n node.setAttribute(symbol.atname,symbol.atval);\n return [node,result[1]];\n }\n case BINARY:\n str = AMremoveCharsAndBlanks(str,symbol.input.length); \n result = AMparseSexpr(str);\n if (result[0]==null) return [AMcreateMmlNode(\"mo\",\n document.createTextNode(symbol.input)),str];\n AMremoveBrackets(result[0]);\n var result2 = AMparseSexpr(result[1]);\n if (result2[0]==null) return [AMcreateMmlNode(\"mo\",\n document.createTextNode(symbol.input)),str];\n AMremoveBrackets(result2[0]);\n if (symbol.input==\"root\" || symbol.input==\"stackrel\") \n newFrag.appendChild(result2[0]);\n newFrag.appendChild(result[0]);\n if (symbol.input==\"frac\") newFrag.appendChild(result2[0]);\n return [AMcreateMmlNode(symbol.tag,newFrag),result2[1]];\n case INFIX:\n str = AMremoveCharsAndBlanks(str,symbol.input.length); \n return [AMcreateMmlNode(\"mo\",document.createTextNode(symbol.output)),str];\n case SPACE:\n str = AMremoveCharsAndBlanks(str,symbol.input.length); \n node = AMcreateElementMathML(\"mspace\");\n node.setAttribute(\"width\",\"1ex\");\n newFrag.appendChild(node);\n newFrag.appendChild(\n AMcreateMmlNode(symbol.tag,document.createTextNode(symbol.output)));\n node = AMcreateElementMathML(\"mspace\");\n node.setAttribute(\"width\",\"1ex\");\n newFrag.appendChild(node);\n return [AMcreateMmlNode(\"mrow\",newFrag),str];\n case LEFTRIGHT:\n// if (rightvert) return [null,str]; else rightvert = true;\n AMnestingDepth++;\n str = AMremoveCharsAndBlanks(str,symbol.input.length); \n result = AMparseExpr(str,false);\n AMnestingDepth--;\n var st = \"\";\n if (result[0].lastChild!=null)\n st = result[0].lastChild.firstChild.nodeValue;\n if (st == \"|\") { // its an absolute value subterm\n node = AMcreateMmlNode(\"mo\",document.createTextNode(symbol.output));\n node = AMcreateMmlNode(\"mrow\",node);\n node.appendChild(result[0]);\n return [node,result[1]];\n } else { // the \"|\" is a \\mid\n node = AMcreateMmlNode(\"mo\",document.createTextNode(symbol.output));\n node = AMcreateMmlNode(\"mrow\",node);\n return [node,str];\n }\n default:\n//alert(\"default\");\n str = AMremoveCharsAndBlanks(str,symbol.input.length); \n return [AMcreateMmlNode(symbol.tag, //its a constant\n document.createTextNode(symbol.output)),str];\n }\n}\n\nfunction AMparseIexpr(str) {\n var symbol, sym1, sym2, node, result, underover;\n str = AMremoveCharsAndBlanks(str,0);\n sym1 = AMgetSymbol(str);\n result = AMparseSexpr(str);\n node = result[0];\n str = result[1];\n symbol = AMgetSymbol(str);\n if (symbol.ttype == INFIX && symbol.input != \"/\") {\n str = AMremoveCharsAndBlanks(str,symbol.input.length);\n// if (symbol.input == \"/\") result = AMparseIexpr(str); else ...\n result = AMparseSexpr(str);\n if (result[0] == null) // show box in place of missing argument\n result[0] = AMcreateMmlNode(\"mo\",document.createTextNode(\"\\u25A1\"));\n else AMremoveBrackets(result[0]);\n str = result[1];\n// if (symbol.input == \"/\") AMremoveBrackets(node);\n if (symbol.input == \"_\") {\n sym2 = AMgetSymbol(str);\n underover = (sym1.ttype == UNDEROVER);\n if (sym2.input == \"^\") {\n str = AMremoveCharsAndBlanks(str,sym2.input.length);\n var res2 = AMparseSexpr(str);\n AMremoveBrackets(res2[0]);\n str = res2[1];\n node = AMcreateMmlNode((underover?\"munderover\":\"msubsup\"),node);\n node.appendChild(result[0]);\n node.appendChild(res2[0]);\n node = AMcreateMmlNode(\"mrow\",node); // so sum does not stretch\n } else {\n node = AMcreateMmlNode((underover?\"munder\":\"msub\"),node);\n node.appendChild(result[0]);\n }\n } else {\n node = AMcreateMmlNode(symbol.tag,node);\n node.appendChild(result[0]);\n }\n }\n return [node,str];\n}\n\nfunction AMparseExpr(str,rightbracket) {\n var symbol, node, result, i, nodeList = [],\n newFrag = document.createDocumentFragment();\n do {\n str = AMremoveCharsAndBlanks(str,0);\n result = AMparseIexpr(str);\n node = result[0];\n str = result[1];\n symbol = AMgetSymbol(str);\n if (symbol.ttype == INFIX && symbol.input == \"/\") {\n str = AMremoveCharsAndBlanks(str,symbol.input.length);\n result = AMparseIexpr(str);\n if (result[0] == null) // show box in place of missing argument\n result[0] = AMcreateMmlNode(\"mo\",document.createTextNode(\"\\u25A1\"));\n else AMremoveBrackets(result[0]);\n str = result[1];\n AMremoveBrackets(node);\n node = AMcreateMmlNode(symbol.tag,node);\n node.appendChild(result[0]);\n newFrag.appendChild(node);\n symbol = AMgetSymbol(str);\n } \n else if (node!=undefined) newFrag.appendChild(node);\n } while ((symbol.ttype != RIGHTBRACKET && \n (symbol.ttype != LEFTRIGHT || rightbracket)\n || AMnestingDepth == 0) && symbol!=null && symbol.output!=\"\");\n if (symbol.ttype == RIGHTBRACKET || symbol.ttype == LEFTRIGHT) {\n// if (AMnestingDepth > 0) AMnestingDepth--;\n var len = newFrag.childNodes.length;\n if (len>0 && newFrag.childNodes[len-1].nodeName == \"mrow\" && len>1 &&\n newFrag.childNodes[len-2].nodeName == \"mo\" &&\n newFrag.childNodes[len-2].firstChild.nodeValue == \",\") { //matrix\n var right = newFrag.childNodes[len-1].lastChild.firstChild.nodeValue;\n if (right==\")\" || right==\"]\") {\n var left = newFrag.childNodes[len-1].firstChild.firstChild.nodeValue;\n if (left==\"(\" && right==\")\" && symbol.output != \"}\" || \n left==\"[\" && right==\"]\") {\n var pos = []; // positions of commas\n var matrix = true;\n var m = newFrag.childNodes.length;\n for (i=0; matrix && i<m; i=i+2) {\n pos[i] = [];\n node = newFrag.childNodes[i];\n if (matrix) matrix = node.nodeName==\"mrow\" && \n (i==m-1 || node.nextSibling.nodeName==\"mo\" && \n node.nextSibling.firstChild.nodeValue==\",\")&&\n node.firstChild.firstChild.nodeValue==left &&\n node.lastChild.firstChild.nodeValue==right;\n if (matrix) \n for (var j=0; j<node.childNodes.length; j++)\n if (node.childNodes[j].firstChild.nodeValue==\",\")\n pos[i][pos[i].length]=j;\n if (matrix && i>1) matrix = pos[i].length == pos[i-2].length;\n }\n if (matrix) {\n var row, frag, n, k, table = document.createDocumentFragment();\n for (i=0; i<m; i=i+2) {\n row = document.createDocumentFragment();\n frag = document.createDocumentFragment();\n node = newFrag.firstChild; // <mrow>(-,-,...,-,-)</mrow>\n n = node.childNodes.length;\n k = 0;\n node.removeChild(node.firstChild); //remove (\n for (j=1; j<n-1; j++) {\n if (typeof pos[i][k] != \"undefined\" && j==pos[i][k]){\n node.removeChild(node.firstChild); //remove ,\n row.appendChild(AMcreateMmlNode(\"mtd\",frag));\n k++;\n } else frag.appendChild(node.firstChild);\n }\n row.appendChild(AMcreateMmlNode(\"mtd\",frag));\n if (newFrag.childNodes.length>2) {\n newFrag.removeChild(newFrag.firstChild); //remove <mrow>)</mrow>\n newFrag.removeChild(newFrag.firstChild); //remove <mo>,</mo>\n }\n table.appendChild(AMcreateMmlNode(\"mtr\",row));\n }\n node = AMcreateMmlNode(\"mtable\",table);\n if (typeof symbol.invisible == \"boolean\" && symbol.invisible) node.setAttribute(\"columnalign\",\"left\");\n newFrag.replaceChild(node,newFrag.firstChild);\n }\n }\n }\n }\n str = AMremoveCharsAndBlanks(str,symbol.input.length);\n if (typeof symbol.invisible != \"boolean\" || !symbol.invisible) {\n node = AMcreateMmlNode(\"mo\",document.createTextNode(symbol.output));\n newFrag.appendChild(node);\n }\n }\n return [newFrag,str];\n}\n\nfunction AMparseMath(str) {\n var result, node = AMcreateElementMathML(\"mstyle\");\n if (mathcolor != \"\") node.setAttribute(\"mathcolor\",mathcolor);\n if (displaystyle) node.setAttribute(\"displaystyle\",\"true\");\n if (mathfontfamily != \"\") node.setAttribute(\"fontfamily\",mathfontfamily);\n AMnestingDepth = 0;\n node.appendChild(AMparseExpr(str.replace(/^\\s+/g,\"\"),false)[0]);\n node = AMcreateMmlNode(\"math\",node);\n if (showasciiformulaonhover) //fixed by djhsu so newline\n node.setAttribute(\"title\",str.replace(/\\s+/g,\" \"));//does not show in Gecko\n if (mathfontfamily != \"\" && (isIE || mathfontfamily != \"serif\")) {\n var fnode = AMcreateElementXHTML(\"font\");\n fnode.setAttribute(\"face\",mathfontfamily);\n fnode.appendChild(node);\n return fnode;\n }\n return node;\n}\n\nfunction AMstrarr2docFrag(arr, linebreaks) {\n var newFrag=document.createDocumentFragment();\n var expr = false;\n for (var i=0; i<arr.length; i++) {\n if (expr) newFrag.appendChild(AMparseMath(arr[i]));\n else {\n var arri = (linebreaks ? arr[i].split(\"\\n\\n\") : [arr[i]]);\n newFrag.appendChild(AMcreateElementXHTML(\"span\").\n appendChild(document.createTextNode(arri[0])));\n for (var j=1; j<arri.length; j++) {\n newFrag.appendChild(AMcreateElementXHTML(\"p\"));\n newFrag.appendChild(AMcreateElementXHTML(\"span\").\n appendChild(document.createTextNode(arri[j])));\n }\n }\n expr = !expr;\n }\n return newFrag;\n}\n\nfunction AMprocessNodeR(n, linebreaks) {\n var mtch, str, arr, frg, i;\n if (n.childNodes.length == 0) {\n if ((n.nodeType!=8 || linebreaks) &&\n n.parentNode.nodeName!=\"form\" && n.parentNode.nodeName!=\"FORM\" &&\n n.parentNode.nodeName!=\"textarea\" && n.parentNode.nodeName!=\"TEXTAREA\" &&\n n.parentNode.nodeName!=\"code\" && n.parentNode.nodeName!=\"CODE\" && /* ASCIIMathML.js - copyright Peter Jipsen, released under the GPL\nSee http://www1.chapman.edu/~jipsen/mathml/asciimath.html/ */\n n.parentNode.nodeName!=\"pre\" && n.parentNode.nodeName!=\"PRE\") {\n str = n.nodeValue;\n if (!(str == null)) {\n str = str.replace(/\\r\\n\\r\\n/g,\"\\n\\n\");\n if (doubleblankmathdelimiter) {\n str = str.replace(/\\x20\\x20\\./g,\" \"+AMdelimiter1+\".\");\n str = str.replace(/\\x20\\x20,/g,\" \"+AMdelimiter1+\",\");\n str = str.replace(/\\x20\\x20/g,\" \"+AMdelimiter1+\" \");\n }\n str = str.replace(/\\x20+/g,\" \");\n str = str.replace(/\\s*\\r\\n/g,\" \");\n mtch = false;\n str = str.replace(new RegExp(AMescape2, \"g\"),\n function(st){mtch=true;return \"AMescape2\"});\n str = str.replace(new RegExp(AMescape1, \"g\"),\n function(st){mtch=true;return \"AMescape1\"});\n str = str.replace(new RegExp(AMdelimiter2regexp, \"g\"),AMdelimiter1);\n arr = str.split(AMdelimiter1);\n for (i=0; i<arr.length; i++)\n arr[i]=arr[i].replace(/AMescape2/g,AMdelimiter2).\n replace(/AMescape1/g,AMdelimiter1);\n if (arr.length>1 || mtch) {\n if (checkForMathML) {\n checkForMathML = false;\n var nd = AMisMathMLavailable();\n AMnoMathML = nd != null;\n if (AMnoMathML && notifyIfNoMathML) \n if (alertIfNoMathML)\n alert(\"To view the ASCIIMathML notation use Internet Explorer 6 +\\nMathPlayer (free from www.dessci.com)\\n\\\n or Firefox/Mozilla/Netscape\");\n else AMbody.insertBefore(nd,AMbody.childNodes[0]);\n }\n if (!AMnoMathML) {\n frg = AMstrarr2docFrag(arr,n.nodeType==8);\n var len = frg.childNodes.length;\n n.parentNode.replaceChild(frg,n);\n return len-1;\n } else return 0;\n }\n }\n } else return 0;\n } else if (n.nodeName!=\"math\") {\n for (i=0; i<n.childNodes.length; i++)\n i += AMprocessNodeR(n.childNodes[i], linebreaks);\n }\n return 0;\n}\n\nfunction AMprocessNode(n, linebreaks, spanclassAM) {\n var frag,st;\n if (spanclassAM!=null) {\n frag = document.getElementsByTagName(\"span\")\n for (var i=0;i<frag.length;i++)\n if (frag[i].className == \"AM\")\n AMprocessNodeR(frag[i],linebreaks);\n } else {\n try {\n st = n.innerHTML;\n } catch(err) {}\n if (st==null || \n st.indexOf(AMdelimiter1)!=-1 || st.indexOf(AMdelimiter2)!=-1) \n AMprocessNodeR(n,linebreaks);\n }\n if (isIE) { //needed to match size and font of formula to surrounding text\n frag = document.getElementsByTagName('math');\n for (var i=0;i<frag.length;i++) frag[i].update()\n }\n}\n\nvar AMbody;\nvar AMnoMathML = false, AMtranslated = false;\n\nfunction translate(spanclassAM) {\n if (!AMtranslated) { // run this only once\n AMtranslated = true;\n AMinitSymbols();\n AMbody = document.getElementsByTagName(\"body\")[0];\n AMprocessNode(AMbody, false, spanclassAM);\n }\n}\n\nif (isIE) { // avoid adding MathPlayer info explicitly to each webpage\n document.write(\"<object id=\\\"mathplayer\\\"\\\n classid=\\\"clsid:32F66A20-7614-11D4-BD11-00104BD3F987\\\"></object>\");\n document.write(\"<?import namespace=\\\"m\\\" implementation=\\\"#mathplayer\\\"?>\");\n}\n\n// GO1.1 Generic onload by Brothercake \n// http://www.brothercake.com/\n//onload function (replaces the onload=\"translate()\" in the <body> tag)\nfunction generic()\n{\n translate();\n};\n//setup onload function\nif(typeof window.addEventListener != 'undefined')\n{\n //.. gecko, safari, konqueror and standard\n window.addEventListener('load', generic, false);\n}\nelse if(typeof document.addEventListener != 'undefined')\n{\n //.. opera 7\n document.addEventListener('load', generic, false);\n}\nelse if(typeof window.attachEvent != 'undefined')\n{\n //.. win/ie\n window.attachEvent('onload', generic);\n}\n//** remove this condition to degrade older browsers\nelse\n{\n //.. mac/ie5 and anything else that gets this far\n //if there's an existing onload function\n if(typeof window.onload == 'function')\n {\n //store it\n var existing = onload;\n //add new onload handler\n window.onload = function()\n {\n //call existing onload function\n existing();\n //call generic onload function\n generic();\n };\n }\n else\n {\n //setup onload function\n window.onload = generic;\n }\n}\n</script>\n"
+
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
new file mode 100644
index 000000000..08ff3905e
--- /dev/null
+++ b/src/Text/Pandoc/Definition.hs
@@ -0,0 +1,50 @@
+-- | Definition of 'Pandoc' data structure for format-neutral representation
+-- of documents.
+module Text.Pandoc.Definition where
+
+data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show)
+
+-- | Bibliographic information for the document: title (list of 'Inline'),
+-- authors (list of strings), date (string).
+data Meta = Meta [Inline] -- title
+ [String] -- authors
+ String -- date
+ deriving (Eq, Show, Read)
+
+-- | Block element.
+data Block
+ = Plain [Inline] -- ^ Plain text, not a paragraph
+ | Blank -- ^ A blank line
+ | Null -- ^ Nothing
+ | Para [Inline] -- ^ Paragraph
+ | Key [Inline] Target -- ^ Reference key: name (list of inlines) and 'Target'
+ | CodeBlock String -- ^ Code block (literal)
+ | RawHtml String -- ^ Raw HTML block (literal)
+ | BlockQuote [Block] -- ^ Block quote (list of blocks)
+ | OrderedList [[Block]] -- ^ Ordered list (list of items, each a list of blocks)
+ | BulletList [[Block]] -- ^ Bullet list (list of items, each a list of blocks)
+ | Header Int [Inline] -- ^ Header - level (integer) and text (list of inlines)
+ | HorizontalRule -- ^ Horizontal rule
+ | Note String [Block] -- ^ Footnote or endnote - reference (string), text (list of blocks)
+ deriving (Eq, Read, Show)
+
+-- | Target for a link: either a URL or an indirect (labeled) reference.
+data Target
+ = Src String String -- ^ First string is URL, second is title
+ | Ref [Inline] -- ^ Label (list of inlines) for an indirect reference
+ deriving (Show, Eq, Read)
+
+-- | Inline elements.
+data Inline
+ = Str String -- ^ Text (string)
+ | Emph [Inline] -- ^ Emphasized text (list of inlines)
+ | Strong [Inline] -- ^ Strongly emphasized text (list of inlines)
+ | Code String -- ^ Inline code (literal)
+ | Space -- ^ Inter-word space
+ | LineBreak -- ^ Hard line break
+ | TeX String -- ^ LaTeX code (literal)
+ | HtmlInline String -- ^ HTML code (literal)
+ | Link [Inline] Target -- ^ Hyperlink: text (list of inlines) and target
+ | Image [Inline] Target -- ^ Image: alternative text (list of inlines) and target
+ | NoteRef String -- ^ Footnote or endnote reference
+ deriving (Show, Eq, Read)
diff --git a/src/Text/Pandoc/HtmlEntities.hs b/src/Text/Pandoc/HtmlEntities.hs
new file mode 100644
index 000000000..bbb438ef5
--- /dev/null
+++ b/src/Text/Pandoc/HtmlEntities.hs
@@ -0,0 +1,306 @@
+-- | Functions for encoding unicode characters as HTML entity
+-- references, and vice versa.
+module Text.Pandoc.HtmlEntities (
+ htmlEntityToChar,
+ charToHtmlEntity,
+ decodeEntities,
+ encodeEntities
+ ) where
+import Char ( chr, ord )
+import Text.Regex ( mkRegex, matchRegexAll )
+import Maybe ( fromMaybe )
+
+-- regexs for entities
+decimalCodedEntity = mkRegex "&#([0-9]+);"
+characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;"
+
+-- | Return a string with all entity references decoded to unicode characters
+-- where possible.
+decodeEntities :: String -> String
+decodeEntities str =
+ case (matchRegexAll characterEntity str) of
+ Nothing -> str
+ Just (before, match, rest, _) -> before ++ replacement ++ (decodeEntities rest)
+ where replacement = case (htmlEntityToChar match) of
+ Just ch -> [ch]
+ Nothing -> match
+
+-- | Returns a string with characters replaced with entity references where possible.
+encodeEntities :: String -> String
+encodeEntities = concatMap (\c -> fromMaybe [c] (charToHtmlEntity c))
+
+-- | If the string is a valid entity reference, returns @Just@ the character,
+-- otherwise @Nothing@.
+htmlEntityToChar :: String -> Maybe Char
+htmlEntityToChar entity =
+ case (lookup entity htmlEntityTable) of
+ Just ch -> Just ch
+ Nothing -> case (matchRegexAll decimalCodedEntity entity) of
+ Just (_, _, _, [sub]) -> Just (chr (read sub))
+ Nothing -> Nothing
+
+-- | If there is an entity reference corresponding to the character, returns
+-- @Just@ the entity reference, otherwise @Nothing@.
+charToHtmlEntity :: Char -> Maybe String
+charToHtmlEntity char =
+ let matches = filter (\(entity, character) -> (character == char)) htmlEntityTable in
+ if (length matches) == 0 then
+ Nothing
+ else
+ Just (fst (head matches))
+
+htmlEntityTable :: [(String, Char)]
+htmlEntityTable = [
+ ("&quot;", chr 34),
+ ("&amp;", chr 38),
+ ("&lt;", chr 60),
+ ("&gt;", chr 62),
+ ("&nbsp;", chr 160),
+ ("&iexcl;", chr 161),
+ ("&cent;", chr 162),
+ ("&pound;", chr 163),
+ ("&curren;", chr 164),
+ ("&yen;", chr 165),
+ ("&brvbar;", chr 166),
+ ("&sect;", chr 167),
+ ("&uml;", chr 168),
+ ("&copy;", chr 169),
+ ("&ordf;", chr 170),
+ ("&laquo;", chr 171),
+ ("&not;", chr 172),
+ ("&shy;", chr 173),
+ ("&reg;", chr 174),
+ ("&macr;", chr 175),
+ ("&deg;", chr 176),
+ ("&plusmn;", chr 177),
+ ("&sup2;", chr 178),
+ ("&sup3;", chr 179),
+ ("&acute;", chr 180),
+ ("&micro;", chr 181),
+ ("&para;", chr 182),
+ ("&middot;", chr 183),
+ ("&cedil;", chr 184),
+ ("&sup1;", chr 185),
+ ("&ordm;", chr 186),
+ ("&raquo;", chr 187),
+ ("&frac14;", chr 188),
+ ("&frac12;", chr 189),
+ ("&frac34;", chr 190),
+ ("&iquest;", chr 191),
+ ("&Agrave;", chr 192),
+ ("&Aacute;", chr 193),
+ ("&Acirc;", chr 194),
+ ("&Atilde;", chr 195),
+ ("&Auml;", chr 196),
+ ("&Aring;", chr 197),
+ ("&AElig;", chr 198),
+ ("&Ccedil;", chr 199),
+ ("&Egrave;", chr 200),
+ ("&Eacute;", chr 201),
+ ("&Ecirc;", chr 202),
+ ("&Euml;", chr 203),
+ ("&Igrave;", chr 204),
+ ("&Iacute;", chr 205),
+ ("&Icirc;", chr 206),
+ ("&Iuml;", chr 207),
+ ("&ETH;", chr 208),
+ ("&Ntilde;", chr 209),
+ ("&Ograve;", chr 210),
+ ("&Oacute;", chr 211),
+ ("&Ocirc;", chr 212),
+ ("&Otilde;", chr 213),
+ ("&Ouml;", chr 214),
+ ("&times;", chr 215),
+ ("&Oslash;", chr 216),
+ ("&Ugrave;", chr 217),
+ ("&Uacute;", chr 218),
+ ("&Ucirc;", chr 219),
+ ("&Uuml;", chr 220),
+ ("&Yacute;", chr 221),
+ ("&THORN;", chr 222),
+ ("&szlig;", chr 223),
+ ("&agrave;", chr 224),
+ ("&aacute;", chr 225),
+ ("&acirc;", chr 226),
+ ("&atilde;", chr 227),
+ ("&auml;", chr 228),
+ ("&aring;", chr 229),
+ ("&aelig;", chr 230),
+ ("&ccedil;", chr 231),
+ ("&egrave;", chr 232),
+ ("&eacute;", chr 233),
+ ("&ecirc;", chr 234),
+ ("&euml;", chr 235),
+ ("&igrave;", chr 236),
+ ("&iacute;", chr 237),
+ ("&icirc;", chr 238),
+ ("&iuml;", chr 239),
+ ("&eth;", chr 240),
+ ("&ntilde;", chr 241),
+ ("&ograve;", chr 242),
+ ("&oacute;", chr 243),
+ ("&ocirc;", chr 244),
+ ("&otilde;", chr 245),
+ ("&ouml;", chr 246),
+ ("&divide;", chr 247),
+ ("&oslash;", chr 248),
+ ("&ugrave;", chr 249),
+ ("&uacute;", chr 250),
+ ("&ucirc;", chr 251),
+ ("&uuml;", chr 252),
+ ("&yacute;", chr 253),
+ ("&thorn;", chr 254),
+ ("&yuml;", chr 255),
+ ("&OElig;", chr 338),
+ ("&oelig;", chr 339),
+ ("&Scaron;", chr 352),
+ ("&scaron;", chr 353),
+ ("&Yuml;", chr 376),
+ ("&fnof;", chr 402),
+ ("&circ;", chr 710),
+ ("&tilde;", chr 732),
+ ("&Alpha;", chr 913),
+ ("&Beta;", chr 914),
+ ("&Gamma;", chr 915),
+ ("&Delta;", chr 916),
+ ("&Epsilon;", chr 917),
+ ("&Zeta;", chr 918),
+ ("&Eta;", chr 919),
+ ("&Theta;", chr 920),
+ ("&Iota;", chr 921),
+ ("&Kappa;", chr 922),
+ ("&Lambda;", chr 923),
+ ("&Mu;", chr 924),
+ ("&Nu;", chr 925),
+ ("&Xi;", chr 926),
+ ("&Omicron;", chr 927),
+ ("&Pi;", chr 928),
+ ("&Rho;", chr 929),
+ ("&Sigma;", chr 931),
+ ("&Tau;", chr 932),
+ ("&Upsilon;", chr 933),
+ ("&Phi;", chr 934),
+ ("&Chi;", chr 935),
+ ("&Psi;", chr 936),
+ ("&Omega;", chr 937),
+ ("&alpha;", chr 945),
+ ("&beta;", chr 946),
+ ("&gamma;", chr 947),
+ ("&delta;", chr 948),
+ ("&epsilon;", chr 949),
+ ("&zeta;", chr 950),
+ ("&eta;", chr 951),
+ ("&theta;", chr 952),
+ ("&iota;", chr 953),
+ ("&kappa;", chr 954),
+ ("&lambda;", chr 955),
+ ("&mu;", chr 956),
+ ("&nu;", chr 957),
+ ("&xi;", chr 958),
+ ("&omicron;", chr 959),
+ ("&pi;", chr 960),
+ ("&rho;", chr 961),
+ ("&sigmaf;", chr 962),
+ ("&sigma;", chr 963),
+ ("&tau;", chr 964),
+ ("&upsilon;", chr 965),
+ ("&phi;", chr 966),
+ ("&chi;", chr 967),
+ ("&psi;", chr 968),
+ ("&omega;", chr 969),
+ ("&thetasym;", chr 977),
+ ("&upsih;", chr 978),
+ ("&piv;", chr 982),
+ ("&ensp;", chr 8194),
+ ("&emsp;", chr 8195),
+ ("&thinsp;", chr 8201),
+ ("&zwnj;", chr 8204),
+ ("&zwj;", chr 8205),
+ ("&lrm;", chr 8206),
+ ("&rlm;", chr 8207),
+ ("&ndash;", chr 8211),
+ ("&mdash;", chr 8212),
+ ("&lsquo;", chr 8216),
+ ("&rsquo;", chr 8217),
+ ("&sbquo;", chr 8218),
+ ("&ldquo;", chr 8220),
+ ("&rdquo;", chr 8221),
+ ("&bdquo;", chr 8222),
+ ("&dagger;", chr 8224),
+ ("&Dagger;", chr 8225),
+ ("&bull;", chr 8226),
+ ("&hellip;", chr 8230),
+ ("&permil;", chr 8240),
+ ("&prime;", chr 8242),
+ ("&Prime;", chr 8243),
+ ("&lsaquo;", chr 8249),
+ ("&rsaquo;", chr 8250),
+ ("&oline;", chr 8254),
+ ("&frasl;", chr 8260),
+ ("&euro;", chr 8364),
+ ("&image;", chr 8465),
+ ("&weierp;", chr 8472),
+ ("&real;", chr 8476),
+ ("&trade;", chr 8482),
+ ("&alefsym;", chr 8501),
+ ("&larr;", chr 8592),
+ ("&uarr;", chr 8593),
+ ("&rarr;", chr 8594),
+ ("&darr;", chr 8595),
+ ("&harr;", chr 8596),
+ ("&crarr;", chr 8629),
+ ("&lArr;", chr 8656),
+ ("&uArr;", chr 8657),
+ ("&rArr;", chr 8658),
+ ("&dArr;", chr 8659),
+ ("&hArr;", chr 8660),
+ ("&forall;", chr 8704),
+ ("&part;", chr 8706),
+ ("&exist;", chr 8707),
+ ("&empty;", chr 8709),
+ ("&nabla;", chr 8711),
+ ("&isin;", chr 8712),
+ ("&notin;", chr 8713),
+ ("&ni;", chr 8715),
+ ("&prod;", chr 8719),
+ ("&sum;", chr 8721),
+ ("&minus;", chr 8722),
+ ("&lowast;", chr 8727),
+ ("&radic;", chr 8730),
+ ("&prop;", chr 8733),
+ ("&infin;", chr 8734),
+ ("&ang;", chr 8736),
+ ("&and;", chr 8743),
+ ("&or;", chr 8744),
+ ("&cap;", chr 8745),
+ ("&cup;", chr 8746),
+ ("&int;", chr 8747),
+ ("&there4;", chr 8756),
+ ("&sim;", chr 8764),
+ ("&cong;", chr 8773),
+ ("&asymp;", chr 8776),
+ ("&ne;", chr 8800),
+ ("&equiv;", chr 8801),
+ ("&le;", chr 8804),
+ ("&ge;", chr 8805),
+ ("&sub;", chr 8834),
+ ("&sup;", chr 8835),
+ ("&nsub;", chr 8836),
+ ("&sube;", chr 8838),
+ ("&supe;", chr 8839),
+ ("&oplus;", chr 8853),
+ ("&otimes;", chr 8855),
+ ("&perp;", chr 8869),
+ ("&sdot;", chr 8901),
+ ("&lceil;", chr 8968),
+ ("&rceil;", chr 8969),
+ ("&lfloor;", chr 8970),
+ ("&rfloor;", chr 8971),
+ ("&lang;", chr 9001),
+ ("&rang;", chr 9002),
+ ("&loz;", chr 9674),
+ ("&spades;", chr 9824),
+ ("&clubs;", chr 9827),
+ ("&hearts;", chr 9829),
+ ("&diams;", chr 9830)
+ ]
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
new file mode 100644
index 000000000..054d9eb72
--- /dev/null
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -0,0 +1,434 @@
+-- | Converts HTML to 'Pandoc' document.
+module Text.Pandoc.Readers.HTML (
+ readHtml,
+ rawHtmlInline,
+ rawHtmlBlock,
+ anyHtmlBlockTag,
+ anyHtmlInlineTag
+ ) where
+
+import Text.Regex ( matchRegex, mkRegex )
+import Text.ParserCombinators.Parsec
+import Text.ParserCombinators.Pandoc
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.HtmlEntities ( decodeEntities, htmlEntityToChar )
+import Maybe ( fromMaybe )
+import Char ( toUpper, toLower )
+
+-- | Convert HTML-formatted string to 'Pandoc' document.
+readHtml :: ParserState -- ^ Parser state
+ -> String -- ^ String to parse
+ -> Pandoc
+readHtml = readWith parseHtml
+
+-- for testing
+testString :: String -> IO ()
+testString = testStringWith parseHtml
+
+--
+-- Constants
+--
+
+inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite",
+ "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q",
+ "s", "samp", "select", "small", "span", "strike", "strong", "sub",
+ "sup", "textarea", "tt", "u", "var"]
+
+--
+-- HTML utility functions
+--
+
+-- | Read blocks until end tag.
+blocksTilEnd tag = try (do
+ blocks <- manyTill (do {b <- block; spaces; return b}) (htmlEndTag tag)
+ return blocks)
+
+-- | Read inlines until end tag.
+inlinesTilEnd tag = try (do
+ inlines <- manyTill inline (htmlEndTag tag)
+ return inlines)
+
+-- extract type from a tag: e.g. br from <br>, < br >, </br>, etc.
+extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of
+ Just [match] -> (map toLower match)
+ Nothing -> ""
+
+anyHtmlTag = try (do
+ char '<'
+ spaces
+ tag <- many1 alphaNum
+ attribs <- htmlAttributes
+ spaces
+ ender <- option "" (string "/")
+ let ender' = if (null ender) then "" else " /"
+ spaces
+ char '>'
+ return ("<" ++ tag ++ attribs ++ ender' ++ ">"))
+
+anyHtmlEndTag = try (do
+ char '<'
+ spaces
+ char '/'
+ spaces
+ tagType <- many1 alphaNum
+ spaces
+ char '>'
+ return ("</" ++ tagType ++ ">"))
+
+htmlTag :: String -> GenParser Char st (String, [(String, String)])
+htmlTag tag = try (do
+ char '<'
+ spaces
+ stringAnyCase tag
+ attribs <- many htmlAttribute
+ spaces
+ option "" (string "/")
+ spaces
+ char '>'
+ return (tag, (map (\(name, content, raw) -> (name, content)) attribs)))
+
+-- parses a quoted html attribute value
+quoted quoteChar = do
+ result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar]))
+ return (result, [quoteChar])
+
+htmlAttributes = do
+ attrList <- many htmlAttribute
+ return (concatMap (\(name, content, raw) -> raw) attrList)
+
+htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
+
+-- minimized boolean attribute (no = and value)
+htmlMinimizedAttribute = try (do
+ spaces
+ name <- many1 (choice [letter, oneOf ".-_:"])
+ spaces
+ notFollowedBy (char '=')
+ let content = name
+ return (name, content, (" " ++ name)))
+
+htmlRegularAttribute = try (do
+ spaces
+ name <- many1 (choice [letter, oneOf ".-_:"])
+ spaces
+ char '='
+ spaces
+ (content, quoteStr) <- choice [ (quoted '\''),
+ (quoted '"'),
+ (do{ a <- (many (alphaNum <|> (oneOf "-._:")));
+ return (a,"")} ) ]
+ return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr)))
+
+htmlEndTag tag = try (do
+ char '<'
+ spaces
+ char '/'
+ spaces
+ stringAnyCase tag
+ spaces
+ char '>'
+ return ("</" ++ tag ++ ">"))
+
+-- | Returns @True@ if the tag is an inline tag.
+isInline tag = (extractTagType tag) `elem` inlineHtmlTags
+
+anyHtmlBlockTag = try (do
+ tag <- choice [anyHtmlTag, anyHtmlEndTag]
+ if isInline tag then
+ fail "inline tag"
+ else
+ return tag)
+
+anyHtmlInlineTag = try (do
+ tag <- choice [ anyHtmlTag, anyHtmlEndTag ]
+ if isInline tag then
+ return tag
+ else
+ fail "not an inline tag")
+
+-- scripts must be treated differently, because they can contain <> etc.
+htmlScript = try (do
+ open <- string "<script"
+ rest <- manyTill anyChar (htmlEndTag "script")
+ return (open ++ rest ++ "</script>"))
+
+rawHtmlBlock = do
+ notFollowedBy (do {choice [htmlTag "/body", htmlTag "/html"]; return ' '})
+ body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition]
+ sp <- (many space)
+ state <- getState
+ if stateParseRaw state then
+ return (RawHtml (body ++ sp))
+ else
+ return Null
+
+htmlComment = try (do
+ string "<!--"
+ comment <- manyTill anyChar (try (string "-->"))
+ return ("<!--" ++ comment ++ "-->"))
+
+--
+-- parsing documents
+--
+
+xmlDec = try (do
+ string "<?"
+ rest <- manyTill anyChar (char '>')
+ return ("<?" ++ rest ++ ">"))
+
+definition = try (do
+ string "<!"
+ rest <- manyTill anyChar (char '>')
+ return ("<!" ++ rest ++ ">"))
+
+nonTitleNonHead = try (do
+ notFollowedBy' (htmlTag "title")
+ notFollowedBy' (htmlTag "/head")
+ result <- choice [do {rawHtmlBlock; return ' '}, anyChar]
+ return result)
+
+parseTitle = try (do
+ (tag, attribs) <- htmlTag "title"
+ contents <- inlinesTilEnd tag
+ spaces
+ return contents)
+
+-- parse header and return meta-information (for now, just title)
+parseHead = try (do
+ htmlTag "head"
+ spaces
+ skipMany nonTitleNonHead
+ contents <- option [] parseTitle
+ skipMany nonTitleNonHead
+ htmlTag "/head"
+ return (contents, [], ""))
+
+skipHtmlTag tag = option ("",[]) (htmlTag tag)
+
+-- h1 class="title" representation of title in body
+bodyTitle = try (do
+ (tag, attribs) <- htmlTag "h1"
+ cl <- case (extractAttribute "class" attribs) of
+ Just "title" -> do {return ""}
+ otherwise -> fail "not title"
+ inlinesTilEnd "h1"
+ return "")
+
+parseHtml = do
+ sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
+ skipHtmlTag "html"
+ spaces
+ (title, authors, date) <- option ([], [], "") parseHead
+ spaces
+ skipHtmlTag "body"
+ spaces
+ option "" bodyTitle -- skip title in body, because it's represented in meta
+ blocks <- parseBlocks
+ spaces
+ option "" (htmlEndTag "body")
+ spaces
+ option "" (htmlEndTag "html")
+ many anyChar -- ignore anything after </html>
+ eof
+ state <- getState
+ let keyBlocks = stateKeyBlocks state
+ return (Pandoc (Meta title authors date) (blocks ++ (reverse keyBlocks)))
+
+--
+-- parsing blocks
+--
+
+parseBlocks = do
+ spaces
+ result <- sepEndBy block spaces
+ return result
+
+block = choice [ codeBlock, header, hrule, list, blockQuote, para, plain,
+ rawHtmlBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
+
+headerLevel n = try (do
+ let level = "h" ++ show n
+ (tag, attribs) <- htmlTag level
+ contents <- inlinesTilEnd level
+ return (Header n (normalizeSpaces contents)))
+
+--
+-- hrule block
+--
+
+hrule = try (do
+ (tag, attribs) <- htmlTag "hr"
+ state <- getState
+ if (not (null attribs)) && (stateParseRaw state) then
+ unexpected "attributes in hr" -- in this case we want to parse it as raw html
+ else
+ return HorizontalRule)
+
+--
+-- code blocks
+--
+
+codeBlock = choice [ preCodeBlock, bareCodeBlock ] <?> "code block"
+
+preCodeBlock = try (do
+ htmlTag "pre"
+ spaces
+ htmlTag "code"
+ result <- manyTill anyChar (htmlEndTag "code")
+ spaces
+ htmlEndTag "pre"
+ return (CodeBlock (decodeEntities result)))
+
+bareCodeBlock = try (do
+ htmlTag "code"
+ result <- manyTill anyChar (htmlEndTag "code")
+ return (CodeBlock (decodeEntities result)))
+
+--
+-- block quotes
+--
+
+blockQuote = try (do
+ tag <- htmlTag "blockquote"
+ spaces
+ blocks <- blocksTilEnd "blockquote"
+ return (BlockQuote blocks))
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList ] <?> "list"
+
+orderedList = try (do
+ tag <- htmlTag "ol"
+ spaces
+ items <- sepEndBy1 listItem spaces
+ htmlEndTag "ol"
+ return (OrderedList items))
+
+bulletList = try (do
+ tag <- htmlTag "ul"
+ spaces
+ items <- sepEndBy1 listItem spaces
+ htmlEndTag "ul"
+ return (BulletList items))
+
+listItem = try (do
+ tag <- htmlTag "li"
+ spaces
+ blocks <- blocksTilEnd "li"
+ return blocks)
+
+--
+-- paragraph block
+--
+
+para = try (do
+ tag <- htmlTag "p"
+ result <- inlinesTilEnd "p"
+ return (Para (normalizeSpaces result)))
+
+--
+-- plain block
+--
+
+plain = do
+ result <- many1 inline
+ return (Plain (normalizeSpaces result))
+
+--
+-- inline
+--
+
+inline = choice [ text, special ] <?> "inline"
+
+text = choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "text"
+
+special = choice [ link, image, rawHtmlInline ] <?> "link, inline html, or image"
+
+entity = try (do
+ char '&'
+ body <- choice [(many1 letter),
+ (try (do{ char '#'; num <- many1 digit; return ("#" ++ num)}))]
+ char ';'
+ return (Str [fromMaybe '?' (htmlEntityToChar ("&" ++ body ++ ";"))]))
+
+code = try (do
+ htmlTag "code"
+ result <- manyTill anyChar (htmlEndTag "code")
+ -- remove internal line breaks, leading and trailing space, and decode entities
+ let result' = decodeEntities $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+ return (Code result'))
+
+rawHtmlInline = do
+ result <- choice [htmlScript, anyHtmlInlineTag]
+ state <- getState
+ if stateParseRaw state then
+ return (HtmlInline result)
+ else
+ return (Str "")
+
+betweenTags tag = try (do
+ htmlTag tag
+ result <- inlinesTilEnd tag
+ return (normalizeSpaces result))
+
+emph = try (do
+ result <- choice [betweenTags "em", betweenTags "it"]
+ return (Emph result))
+
+strong = try (do
+ result <- choice [betweenTags "b", betweenTags "strong"]
+ return (Strong result))
+
+whitespace = do
+ many1 space
+ return Space
+
+-- hard line break
+linebreak = do
+ htmlTag "br"
+ return LineBreak
+
+str = do
+ result <- many1 (noneOf "<& \t\n")
+ return (Str (decodeEntities result))
+
+--
+-- links and images
+--
+
+-- extract contents of attribute (attribute names are case-insensitive)
+extractAttribute name [] = Nothing
+extractAttribute name ((attrName, contents):rest) =
+ let name' = map toLower name
+ attrName' = map toLower attrName in
+ if (attrName' == name') then Just contents else extractAttribute name rest
+
+link = try (do
+ (tag, attributes) <- htmlTag "a"
+ url <- case (extractAttribute "href" attributes) of
+ Just url -> do {return url}
+ Nothing -> fail "no href"
+ let title = fromMaybe "" (extractAttribute "title" attributes)
+ label <- inlinesTilEnd "a"
+ ref <- generateReference url title
+ return (Link (normalizeSpaces label) ref))
+
+image = try (do
+ (tag, attributes) <- htmlTag "img"
+ url <- case (extractAttribute "src" attributes) of
+ Just url -> do {return url}
+ Nothing -> fail "no src"
+ let title = fromMaybe "" (extractAttribute "title" attributes)
+ let alt = fromMaybe "" (extractAttribute "alt" attributes)
+ ref <- generateReference url title
+ return (Image [Str alt] ref))
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
new file mode 100644
index 000000000..3bf3dfd23
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -0,0 +1,585 @@
+-- | Converts LaTeX to 'Pandoc' document.
+module Text.Pandoc.Readers.LaTeX (
+ readLaTeX,
+ rawLaTeXInline,
+ rawLaTeXEnvironment
+ ) where
+
+import Text.ParserCombinators.Parsec
+import Text.ParserCombinators.Pandoc
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Maybe ( fromMaybe )
+import Char ( chr )
+
+-- | Parse LaTeX from string and return 'Pandoc' document.
+readLaTeX :: ParserState -- ^ Parser state, including options for parser
+ -> String -- ^ String to parse
+ -> Pandoc
+readLaTeX = readWith parseLaTeX
+
+-- for testing
+testString = testStringWith parseLaTeX
+
+-- characters with special meaning
+specialChars = "\\$%&^&_~#{}\n \t|<>"
+
+--
+-- utility functions
+--
+
+-- | Change quotation marks in a string back to "basic" quotes.
+normalizeQuotes :: String -> String
+normalizeQuotes = gsub "''" "\"" . gsub "`" "'"
+
+-- | Change LaTeX En dashes between digits to hyphens.
+normalizeDashes :: String -> String
+normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2"
+
+normalizePunctuation :: String -> String
+normalizePunctuation = normalizeDashes . normalizeQuotes
+
+-- | Returns command option (between []) if any, or empty string.
+commandOpt = option "" (between (char '[') (char ']') (many1 (noneOf "]")))
+
+-- | Returns text between brackets and its matching pair.
+bracketedText = try (do
+ char '{'
+ result <- many (choice [ try (do{ char '\\';
+ b <- oneOf "{}";
+ return (['\\', b])}), -- escaped bracket
+ count 1 (noneOf "{}"),
+ do {text <- bracketedText; return ("{" ++ text ++ "}")} ])
+ char '}'
+ return (concat result))
+
+-- | Parses list of arguments of LaTeX command.
+commandArgs = many bracketedText
+
+-- | Parses LaTeX command, returns (name, star, option, list of arguments).
+command = try (do
+ char '\\'
+ name <- many1 alphaNum
+ star <- option "" (string "*") -- some commands have starred versions
+ opt <- commandOpt
+ args <- commandArgs
+ return (name, star, opt, args))
+
+begin name = try (do
+ string "\\begin{"
+ string name
+ char '}'
+ option "" commandOpt
+ option [] commandArgs
+ spaces
+ return name)
+
+end name = try (do
+ string "\\end{"
+ string name
+ char '}'
+ spaces
+ return name)
+
+-- | Returns a list of block elements containing the contents of an environment.
+environment name = try (do
+ begin name
+ spaces
+ contents <- manyTill block (end name)
+ return contents)
+
+anyEnvironment = try (do
+ string "\\begin{"
+ name <- many alphaNum
+ star <- option "" (string "*") -- some environments have starred variants
+ char '}'
+ option "" commandOpt
+ option [] commandArgs
+ spaces
+ contents <- manyTill block (end (name ++ star))
+ return (BlockQuote contents))
+
+--
+-- parsing documents
+--
+
+-- | Skip everything up through \begin{document}
+skipLaTeXHeader = try (do
+ manyTill anyChar (begin "document")
+ spaces
+ return "")
+
+-- | Parse LaTeX and return 'Pandoc'.
+parseLaTeX = do
+ option "" skipLaTeXHeader -- if parsing a fragment, this might not be present
+ blocks <- parseBlocks
+ spaces
+ option "" (string "\\end{document}") -- if parsing a fragment, this might not be present
+ spaces
+ eof
+ state <- getState
+ let keyBlocks = stateKeyBlocks state
+ let noteBlocks = stateNoteBlocks state
+ let blocks' = filter (/= Null) blocks
+ return (Pandoc (Meta [] [] "") (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
+
+--
+-- parsing blocks
+--
+
+parseBlocks = do
+ spaces
+ result <- many block
+ return result
+
+block = choice [ hrule, codeBlock, header, list, blockQuote, mathBlock, comment,
+ bibliographic, para, specialEnvironment, itemBlock, unknownEnvironment,
+ unknownCommand ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
+
+headerLevel n = try (do
+ let subs = concat $ replicate (n - 1) "sub"
+ string ("\\" ++ subs ++ "section")
+ option ' ' (char '*')
+ char '{'
+ title <- manyTill inline (char '}')
+ spaces
+ return (Header n (normalizeSpaces title)))
+
+--
+-- hrule block
+--
+
+hrule = try (do
+ oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", "\\newpage" ]
+ spaces
+ return HorizontalRule)
+
+--
+-- code blocks
+--
+
+codeBlock = try (do
+ string "\\begin{verbatim}" -- don't use begin function because it gobbles whitespace
+ option "" blanklines -- we want to gobble blank lines, but not leading space
+ contents <- manyTill anyChar (try (string "\\end{verbatim}"))
+ spaces
+ return (CodeBlock (stripTrailingNewlines contents)))
+
+--
+-- block quotes
+--
+
+blockQuote = choice [ blockQuote1, blockQuote2 ] <?> "blockquote"
+
+blockQuote1 = try (do
+ blocks <- environment "quote"
+ spaces
+ return (BlockQuote blocks))
+
+blockQuote2 = try (do
+ blocks <- environment "quotation"
+ spaces
+ return (BlockQuote blocks))
+
+--
+-- math block
+--
+
+mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
+ mathBlockWith (begin "displaymath") (end "displaymath") <|>
+ mathBlockWith (string "\\[") (string "\\]") <?> "math block"
+
+mathBlockWith start end = try (do
+ start
+ spaces
+ result <- manyTill anyChar end
+ spaces
+ return (BlockQuote [Para [TeX ("$" ++ result ++ "$")]]))
+
+--
+-- list blocks
+--
+
+list = bulletList <|> orderedList <?> "list"
+
+listItem = try (do
+ ("item", _, _, _) <- command
+ spaces
+ state <- getState
+ let oldParserContext = stateParserContext state
+ updateState (\state -> state {stateParserContext = ListItemState})
+ blocks <- many block
+ updateState (\state -> state {stateParserContext = oldParserContext})
+ return blocks)
+
+orderedList = try (do
+ begin "enumerate"
+ spaces
+ items <- many listItem
+ end "enumerate"
+ spaces
+ return (OrderedList items))
+
+bulletList = try (do
+ begin "itemize"
+ spaces
+ items <- many listItem
+ end "itemize"
+ spaces
+ return (BulletList items))
+
+--
+-- paragraph block
+--
+
+para = try (do
+ result <- many1 inline
+ spaces
+ return (Para (normalizeSpaces result)))
+
+--
+-- title authors date
+--
+
+bibliographic = choice [ maketitle, title, authors, date ]
+
+maketitle = try (do
+ string "\\maketitle"
+ spaces
+ return Null)
+
+title = try (do
+ string "\\title{"
+ tit <- manyTill inline (char '}')
+ spaces
+ updateState (\state -> state { stateTitle = tit })
+ return Null)
+
+authors = try (do
+ string "\\author{"
+ authors <- manyTill anyChar (char '}')
+ spaces
+ let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\" "\n" authors
+ updateState (\state -> state { stateAuthors = authors' })
+ return Null)
+
+date = try (do
+ string "\\date{"
+ date' <- manyTill anyChar (char '}')
+ spaces
+ updateState (\state -> state { stateDate = date' })
+ return Null)
+
+--
+-- item block
+-- for use in unknown environments that aren't being parsed as raw latex
+--
+
+-- this forces items to be parsed in different blocks
+itemBlock = try (do
+ ("item", _, opt, _) <- command
+ state <- getState
+ if (stateParserContext state == ListItemState) then
+ fail "item should be handled by list block"
+ else
+ if null opt then
+ return Null
+ else
+ return (Plain [Str opt]))
+
+--
+-- raw LaTeX
+--
+
+specialEnvironment = do -- these are always parsed as raw
+ followedBy' (choice (map (\name -> begin name) ["tabular", "figure", "tabbing", "eqnarry",
+ "picture", "table", "verse", "theorem"]))
+ rawLaTeXEnvironment
+
+-- | Parse any LaTeX environment and return a Para block containing
+-- the whole literal environment as raw TeX.
+rawLaTeXEnvironment :: GenParser Char st Block
+rawLaTeXEnvironment = try (do
+ string "\\begin"
+ char '{'
+ name <- many1 alphaNum
+ star <- option "" (string "*") -- for starred variants
+ let name' = name ++ star
+ char '}'
+ opt <- option "" commandOpt
+ args <- option [] commandArgs
+ let optStr = if (null opt) then "" else "[" ++ opt ++ "]"
+ let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args
+ contents <- manyTill (choice [(many1 (noneOf "\\")),
+ (do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }),
+ string "\\"]) (end name')
+ spaces
+ return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ optStr ++ argStr ++
+ (concat contents) ++ "\\end{" ++ name' ++ "}")]))
+
+unknownEnvironment = try (do
+ state <- getState
+ result <- if stateParseRaw state then -- check to see whether we should include raw TeX
+ rawLaTeXEnvironment -- if so, get the whole raw environment
+ else
+ anyEnvironment -- otherwise just the contents
+ return result)
+
+unknownCommand = try (do
+ notFollowedBy' (string "\\end{itemize}")
+ notFollowedBy' (string "\\end{enumerate}")
+ notFollowedBy' (string "\\end{document}")
+ (name, star, opt, args) <- command
+ spaces
+ let optStr = if null opt then "" else "[" ++ opt ++ "]"
+ let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args
+ state <- getState
+ if (name == "item") && ((stateParserContext state) == ListItemState) then
+ fail "should not be parsed as raw"
+ else
+ string ""
+ if stateParseRaw state then
+ return (Plain [TeX ("\\" ++ name ++ star ++ optStr ++ argStr)])
+ else
+ return (Plain [Str (joinWithSep " " args)]))
+
+-- latex comment
+comment = try (do
+ char '%'
+ result <- manyTill anyChar newline
+ spaces
+ return Null)
+
+--
+-- inline
+--
+
+inline = choice [ strong, emph, ref, lab, code, linebreak, math, ldots, accentedChar,
+ specialChar, specialInline, escapedChar, unescapedChar, str,
+ endline, whitespace ] <?> "inline"
+
+specialInline = choice [ link, image, footnote, rawLaTeXInline ] <?>
+ "link, raw TeX, note, or image"
+
+ldots = try (do
+ string "\\ldots"
+ return (Str "..."))
+
+accentedChar = normalAccentedChar <|> specialAccentedChar
+
+normalAccentedChar = try (do
+ char '\\'
+ accent <- oneOf "'`^\"~"
+ character <- choice [ between (char '{') (char '}') anyChar, anyChar ]
+ let table = fromMaybe [] $ lookup character accentTable
+ let result = case lookup accent table of
+ Just num -> chr num
+ Nothing -> '?'
+ return (Str [result]))
+
+-- an association list of letters and association list of accents
+-- and decimal character numbers.
+accentTable =
+ [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
+ ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
+ ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
+ ('N', [('~', 209)]),
+ ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
+ ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
+ ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
+ ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
+ ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
+ ('n', [('~', 241)]),
+ ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
+ ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
+
+specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, oslash, pound,
+ euro, copyright, sect ]
+
+ccedil = try (do
+ char '\\'
+ letter <- choice [try (string "cc"), try (string "cC")]
+ let num = if letter == "cc" then 231 else 199
+ return (Str [chr num]))
+
+aring = try (do
+ char '\\'
+ letter <- choice [try (string "aa"), try (string "AA")]
+ let num = if letter == "aa" then 229 else 197
+ return (Str [chr num]))
+
+iuml = try (do
+ string "\\\""
+ choice [try (string "\\i"), try (string "{\\i}")]
+ return (Str [chr 239]))
+
+icirc = try (do
+ string "\\^"
+ choice [try (string "\\i"), try (string "{\\i}")]
+ return (Str [chr 238]))
+
+szlig = try (do
+ string "\\ss"
+ return (Str [chr 223]))
+
+oslash = try (do
+ char '\\'
+ letter <- choice [char 'o', char 'O']
+ let num = if letter == 'o' then 248 else 216
+ return (Str [chr num]))
+
+aelig = try (do
+ char '\\'
+ letter <- choice [try (string "ae"), try (string "AE")]
+ let num = if letter == "ae" then 230 else 198
+ return (Str [chr num]))
+
+pound = try (do
+ string "\\pounds"
+ return (Str [chr 163]))
+
+euro = try (do
+ string "\\euro"
+ return (Str [chr 8364]))
+
+copyright = try (do
+ string "\\copyright"
+ return (Str [chr 169]))
+
+sect = try (do
+ string "\\S"
+ return (Str [chr 167]))
+
+escapedChar = escaped (oneOf " $%^&_#{}")
+
+unescapedChar = do -- ignore standalone, nonescaped special characters
+ oneOf "$^&_#{}|<>"
+ return (Str "")
+
+specialChar = choice [ backslash, bar, lt, gt ]
+
+backslash = try (do
+ string "\\textbackslash"
+ return (Str "\\"))
+
+bar = try (do
+ string "\\textbar"
+ return (Str "\\"))
+
+lt = try (do
+ string "\\textless"
+ return (Str "<"))
+
+gt = try (do
+ string "\\textgreater"
+ return (Str ">"))
+
+code = try (do
+ string "\\verb"
+ marker <- anyChar
+ result <- manyTill anyChar (char marker)
+ let result' = removeLeadingTrailingSpace result
+ return (Code result'))
+
+emph = try (do
+ oneOfStrings [ "\\emph{", "\\textit{" ]
+ result <- manyTill inline (char '}')
+ return (Emph result))
+
+lab = try (do
+ string "\\label{"
+ result <- manyTill anyChar (char '}')
+ return (Str ("(" ++ result ++ ")")))
+
+ref = try (do
+ string "\\ref{"
+ result <- manyTill anyChar (char '}')
+ return (Str (result)))
+
+strong = try (do
+ string "\\textbf{"
+ result <- manyTill inline (char '}')
+ return (Strong result))
+
+whitespace = do
+ many1 (oneOf "~ \t")
+ return Space
+
+-- hard line break
+linebreak = try (do
+ string "\\\\"
+ return LineBreak)
+
+str = do
+ result <- many1 (noneOf specialChars)
+ return (Str (normalizePunctuation result))
+
+-- endline internal to paragraph
+endline = try (do
+ newline
+ notFollowedBy blankline
+ return Space)
+
+-- math
+math = math1 <|> math2 <?> "math"
+
+math1 = try (do
+ char '$'
+ result <- many (noneOf "$")
+ char '$'
+ return (TeX ("$" ++ result ++ "$")))
+
+math2 = try (do
+ string "\\("
+ result <- many (noneOf "$")
+ string "\\)"
+ return (TeX ("$" ++ result ++ "$")))
+
+--
+-- links and images
+--
+
+link = try (do
+ string "\\href{"
+ url <- manyTill anyChar (char '}')
+ char '{'
+ label <- manyTill inline (char '}')
+ ref <- generateReference url ""
+ return (Link (normalizeSpaces label) ref))
+
+image = try (do
+ ("includegraphics", _, _, (src:lst)) <- command
+ return (Image [Str "image"] (Src src "")))
+
+footnote = try (do
+ ("footnote", _, _, (contents:[])) <- command
+ let blocks = case runParser parseBlocks defaultParserState "footnote" contents of
+ Left err -> error $ "Input:\n" ++ show contents ++
+ "\nError:\n" ++ show err
+ Right result -> result
+ state <- getState
+ let notes = stateNoteBlocks state
+ let nextRef = case notes of
+ [] -> "1"
+ (Note ref body):rest -> (show ((read ref) + 1))
+ setState (state { stateNoteBlocks = (Note nextRef blocks):notes })
+ return (NoteRef nextRef))
+
+-- | Parse any LaTeX command and return it in a raw TeX inline element.
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = try (do
+ (name, star, opt, args) <- command
+ let optStr = if (null opt) then "" else "[" ++ opt ++ "]"
+ let argStr = concatMap (\arg -> "{" ++ arg ++ "}") args
+ state <- getState
+ if ((name == "begin") || (name == "end") || (name == "item")) then
+ fail "not an inline command"
+ else
+ string ""
+ return (TeX ("\\" ++ name ++ star ++ optStr ++ argStr)))
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
new file mode 100644
index 000000000..60ac40fd7
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -0,0 +1,582 @@
+-- | Convert markdown to Pandoc document.
+module Text.Pandoc.Readers.Markdown (
+ readMarkdown
+ ) where
+
+import Text.ParserCombinators.Pandoc
+import Text.Pandoc.Definition
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.HTML ( rawHtmlInline, rawHtmlBlock, anyHtmlBlockTag,
+ anyHtmlInlineTag )
+import Text.Pandoc.HtmlEntities ( decodeEntities )
+import Text.Regex ( matchRegex, mkRegex )
+import Text.ParserCombinators.Parsec
+
+-- | Read markdown from an input string and return a Pandoc document.
+readMarkdown :: ParserState -> String -> Pandoc
+readMarkdown = readWith parseMarkdown
+
+-- | Parse markdown string with default options and print result (for testing).
+testString :: String -> IO ()
+testString = testStringWith parseMarkdown
+
+--
+-- Constants and data structure definitions
+--
+
+spaceChars = " \t"
+endLineChars = "\n"
+labelStart = '['
+labelEnd = ']'
+labelSep = ':'
+srcStart = '('
+srcEnd = ')'
+imageStart = '!'
+noteStart = '^'
+codeStart = '`'
+codeEnd = '`'
+emphStart = '*'
+emphEnd = '*'
+emphStartAlt = '_'
+emphEndAlt = '_'
+autoLinkStart = '<'
+autoLinkEnd = '>'
+mathStart = '$'
+mathEnd = '$'
+bulletListMarkers = "*+-"
+orderedListDelimiters = "."
+escapeChar = '\\'
+hruleChars = "*-_"
+quoteChars = "'\""
+atxHChar = '#'
+titleOpeners = "\"'("
+setextHChars = ['=','-']
+blockQuoteChar = '>'
+hyphenChar = '-'
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd, emphStartAlt,
+ emphEndAlt, codeStart, codeEnd, autoLinkEnd, autoLinkStart, mathStart,
+ mathEnd, imageStart, noteStart, hyphenChar]
+
+--
+-- auxiliary functions
+--
+
+-- | Skip a single endline if there is one.
+skipEndline = option Space endline
+
+indentSpaces = do
+ state <- getState
+ let tabStop = stateTabStop state
+ oneOfStrings [ "\t", (replicate tabStop ' ') ] <?> "indentation"
+
+skipNonindentSpaces = do
+ state <- getState
+ let tabStop = stateTabStop state
+ choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)]))
+
+--
+-- document structure
+--
+
+titleLine = try (do
+ char '%'
+ skipSpaces
+ line <- manyTill inline newline
+ return line)
+
+authorsLine = try (do
+ char '%'
+ skipSpaces
+ authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
+ newline
+ return (map removeLeadingTrailingSpace authors))
+
+dateLine = try (do
+ char '%'
+ skipSpaces
+ date <- many (noneOf "\n")
+ newline
+ return (removeTrailingSpace date))
+
+titleBlock = try (do
+ title <- option [] titleLine
+ author <- option [] authorsLine
+ date <- option "" dateLine
+ option "" blanklines
+ return (title, author, date))
+
+parseMarkdown = do
+ updateState (\state -> state { stateParseRaw = True }) -- need to parse raw HTML
+ (title, author, date) <- option ([],[],"") titleBlock
+ blocks <- parseBlocks
+ state <- getState
+ let keys = reverse $ stateKeyBlocks state
+ return (Pandoc (Meta title author date) (blocks ++ keys))
+
+--
+-- parsing blocks
+--
+
+parseBlocks = do
+ result <- manyTill block eof
+ return result
+
+block = choice [ codeBlock, referenceKey, note, header, hrule, list, blockQuote, rawHtmlBlocks,
+ rawLaTeXEnvironment, para, plain, blankBlock, nullBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = choice [ setextHeader, atxHeader ] <?> "header"
+
+atxHeader = try (do
+ lead <- many1 (char atxHChar)
+ skipSpaces
+ txt <- many1 (do {notFollowedBy' atxClosing; inline})
+ atxClosing
+ return (Header (length lead) (normalizeSpaces txt)))
+
+atxClosing = try (do
+ skipMany (char atxHChar)
+ skipSpaces
+ newline
+ option "" blanklines)
+
+setextHeader = choice (map (\x -> setextH x) (enumFromTo 1 (length setextHChars)))
+
+setextH n = try (do
+ txt <- many1 (do {notFollowedBy newline; inline})
+ endline
+ many1 (char (setextHChars !! (n-1)))
+ skipSpaces
+ newline
+ option "" blanklines
+ return (Header n (normalizeSpaces txt)))
+
+--
+-- hrule block
+--
+
+hruleWith chr =
+ try (do
+ skipSpaces
+ char chr
+ skipSpaces
+ char chr
+ skipSpaces
+ char chr
+ skipMany (oneOf (chr:spaceChars))
+ newline
+ option "" blanklines
+ return HorizontalRule)
+
+hrule = choice (map hruleWith hruleChars) <?> "hrule"
+
+--
+-- code blocks
+--
+
+indentedLine = try (do
+ indentSpaces
+ result <- manyTill anyChar newline
+ return (result ++ "\n"))
+
+-- two or more indented lines, possibly separated by blank lines
+indentedBlock = try (do
+ res1 <- indentedLine
+ blanks <- many blankline
+ res2 <- choice [indentedBlock, indentedLine]
+ return (res1 ++ blanks ++ res2))
+
+codeBlock = do
+ result <- choice [indentedBlock, indentedLine]
+ option "" blanklines
+ return (CodeBlock result)
+
+--
+-- note block
+--
+
+note = try (do
+ (NoteRef ref) <- noteRef
+ skipSpaces
+ raw <- sepBy (many (choice [nonEndline,
+ (try (do {endline; notFollowedBy (char noteStart); return '\n'}))
+ ])) (try (do {newline; char noteStart; option ' ' (char ' ')}))
+ newline
+ blanklines
+ -- parse the extracted block, which may contain various block elements:
+ state <- getState
+ let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of
+ Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err
+ Right result -> result
+ return (Note ref parsed))
+
+--
+-- block quotes
+--
+
+emacsBoxQuote = try (do
+ string ",----"
+ manyTill anyChar newline
+ raw <- manyTill (try (do{ char '|';
+ option ' ' (char ' ');
+ result <- manyTill anyChar newline;
+ return result}))
+ (string "`----")
+ manyTill anyChar newline
+ option "" blanklines
+ return raw)
+
+emailBlockQuoteStart = try (do
+ skipNonindentSpaces
+ char blockQuoteChar
+ option ' ' (char ' ')
+ return "> ")
+
+emailBlockQuote = try (do
+ emailBlockQuoteStart
+ raw <- sepBy (many (choice [nonEndline,
+ (try (do{ endline;
+ notFollowedBy' emailBlockQuoteStart;
+ return '\n'}))]))
+ (try (do {newline; emailBlockQuoteStart}))
+ newline <|> (do{ eof; return '\n'})
+ option "" blanklines
+ return raw)
+
+blockQuote = do
+ raw <- choice [ emailBlockQuote, emacsBoxQuote ]
+ -- parse the extracted block, which may contain various block elements:
+ state <- getState
+ let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState}) "block" ((joinWithSep "\n" raw) ++ "\n\n") of
+ Left err -> error $ "Raw block:\n" ++ show raw ++ "\nError:\n" ++ show err
+ Right result -> result
+ return (BlockQuote parsed)
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList ] <?> "list"
+
+bulletListStart =
+ try (do
+ option ' ' newline -- if preceded by a Plain block in a list context
+ skipNonindentSpaces
+ notFollowedBy' hrule -- because hrules start out just like lists
+ oneOf bulletListMarkers
+ spaceChar
+ skipSpaces)
+
+orderedListStart =
+ try (do
+ option ' ' newline -- if preceded by a Plain block in a list context
+ skipNonindentSpaces
+ many1 digit
+ oneOf orderedListDelimiters
+ oneOf spaceChars
+ skipSpaces)
+
+-- parse a line of a list item (start = parser for beginning of list item)
+listLine start = try (do
+ notFollowedBy' start
+ notFollowedBy blankline
+ notFollowedBy' (try (do{ indentSpaces;
+ many (spaceChar);
+ choice [bulletListStart, orderedListStart]}))
+ line <- manyTill anyChar newline
+ return (line ++ "\n"))
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start =
+ try (do
+ start
+ result <- many1 (listLine start)
+ blanks <- many blankline
+ return ((concat result) ++ blanks))
+
+-- continuation of a list item - indented and separated by blankline
+-- or (in compact lists) endline.
+-- note: nested lists are parsed as continuations
+listContinuation start =
+ try (do
+ followedBy' indentSpaces
+ result <- many1 (listContinuationLine start)
+ blanks <- many blankline
+ return ((concat result) ++ blanks))
+
+listContinuationLine start = try (do
+ notFollowedBy blankline
+ notFollowedBy' start
+ option "" indentSpaces
+ result <- manyTill anyChar newline
+ return (result ++ "\n"))
+
+listItem start =
+ try (do
+ first <- rawListItem start
+ rest <- many (listContinuation start)
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState})
+ "block" raw of
+ Left err -> error $ "Raw block:\n" ++ raw ++ "\nError:\n" ++ show err
+ Right result -> result
+ where raw = concat (first:rest)
+ return parsed)
+
+orderedList =
+ try (do
+ items <- many1 (listItem orderedListStart)
+ let items' = compactify items
+ return (OrderedList items'))
+
+bulletList =
+ try (do
+ items <- many1 (listItem bulletListStart)
+ let items' = compactify items
+ return (BulletList items'))
+
+--
+-- paragraph block
+--
+
+para = try (do
+ result <- many1 inline
+ newline
+ choice [ (do{ followedBy' (oneOfStrings [">", ",----"]); return "" }), blanklines ]
+ let result' = normalizeSpaces result
+ return (Para result'))
+
+plain = do
+ result <- many1 inline
+ let result' = normalizeSpaces result
+ return (Plain result')
+
+--
+-- raw html
+--
+
+rawHtmlBlocks = try (do
+ htmlBlocks <- many1 rawHtmlBlock
+ let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
+ let combined' = if (last combined == '\n') then
+ init combined -- strip extra newline
+ else
+ combined
+ return (RawHtml combined'))
+
+--
+-- reference key
+--
+
+referenceKey =
+ try (do
+ skipSpaces
+ label <- reference
+ char labelSep
+ skipSpaces
+ option ' ' (char autoLinkStart)
+ src <- many (noneOf (titleOpeners ++ [autoLinkEnd] ++ endLineChars))
+ option ' ' (char autoLinkEnd)
+ tit <- option "" title
+ blanklines
+ return (Key label (Src (removeTrailingSpace src) tit)))
+
+--
+-- inline
+--
+
+text = choice [ math, strong, emph, code2, code1, str, linebreak, tabchar,
+ whitespace, endline ] <?> "text"
+
+inline = choice [ rawLaTeXInline, escapedChar, special, hyphens, text, ltSign, symbol ] <?> "inline"
+
+special = choice [ link, referenceLink, rawHtmlInline, autoLink,
+ image, noteRef ] <?> "link, inline html, note, or image"
+
+escapedChar = escaped anyChar
+
+ltSign = do
+ notFollowedBy' rawHtmlBlocks -- don't return < if it starts html
+ char '<'
+ return (Str ['<'])
+
+specialCharsMinusLt = filter (/= '<') specialChars
+
+symbol = do
+ result <- oneOf specialCharsMinusLt
+ return (Str [result])
+
+hyphens = try (do
+ result <- many1 (char '-')
+ if (length result) == 1 then
+ skipEndline -- don't want to treat endline after hyphen as a space
+ else
+ do{ string ""; return Space }
+ return (Str result))
+
+-- parses inline code, between codeStart and codeEnd
+code1 =
+ try (do
+ char codeStart
+ result <- many (noneOf [codeEnd])
+ char codeEnd
+ let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines
+ return (Code result'))
+
+-- parses inline code, between 2 codeStarts and 2 codeEnds
+code2 =
+ try (do
+ string [codeStart, codeStart]
+ result <- manyTill anyChar (try (string [codeEnd, codeEnd]))
+ let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result -- get rid of any internal newlines
+ return (Code result'))
+
+mathWord = many1 (choice [(noneOf (" \t\n\\" ++ [mathEnd])), (try (do {c <- char '\\'; notFollowedBy (char mathEnd); return c}))])
+
+math = try (do
+ char mathStart
+ notFollowedBy space
+ words <- sepBy1 mathWord (many1 space)
+ char mathEnd
+ return (TeX ("$" ++ (joinWithSep " " words) ++ "$")))
+
+emph = do
+ result <- choice [ (enclosed (char emphStart) (char emphEnd) inline),
+ (enclosed (char emphStartAlt) (char emphEndAlt) inline) ]
+ return (Emph (normalizeSpaces result))
+
+strong = do
+ result <- choice [ (enclosed (count 2 (char emphStart)) (count 2 (char emphEnd)) inline),
+ (enclosed (count 2 (char emphStartAlt)) (count 2 (char emphEndAlt)) inline)]
+ return (Strong (normalizeSpaces result))
+
+whitespace = do
+ many1 (oneOf spaceChars) <?> "whitespace"
+ return Space
+
+tabchar = do
+ tab
+ return (Str "\t")
+
+-- hard line break
+linebreak = try (do
+ oneOf spaceChars
+ many1 (oneOf spaceChars)
+ endline
+ return LineBreak )
+
+nonEndline = noneOf endLineChars
+
+str = do
+ result <- many1 ((noneOf (specialChars ++ spaceChars ++ endLineChars)))
+ return (Str (decodeEntities result))
+
+-- an endline character that can be treated as a space, not a structural break
+endline =
+ try (do
+ newline
+ -- next line would allow block quotes without preceding blank line
+ -- Markdown.pl does allow this, but there's a chance of a wrapped
+ -- greater-than sign triggering a block quote by accident...
+-- notFollowedBy (try (do { choice [emailBlockQuoteStart, string ",----"]; return ' ' }))
+ notFollowedBy blankline
+ -- parse potential list starts at beginning of line differently if in a list:
+ st <- getState
+ if (stateParserContext st) == ListItemState then
+ do
+ notFollowedBy' orderedListStart
+ notFollowedBy' bulletListStart
+ else
+ option () pzero
+ return Space)
+
+--
+-- links
+--
+
+-- a reference label for a link
+reference = do
+ char labelStart
+ label <- manyTill inline (char labelEnd)
+ return (normalizeSpaces label)
+
+-- source for a link, with optional title
+source =
+ try (do
+ char srcStart
+ option ' ' (char autoLinkStart)
+ src <- many (noneOf ([srcEnd, autoLinkEnd] ++ titleOpeners))
+ option ' ' (char autoLinkEnd)
+ tit <- option "" title
+ skipSpaces
+ char srcEnd
+ return (Src (removeTrailingSpace src) tit))
+
+titleWith startChar endChar =
+ try (do
+ skipSpaces
+ skipEndline -- a title can be on the next line from the source
+ skipSpaces
+ char startChar
+ tit <- manyTill (choice [ try (do {char '\\'; char endChar}),
+ (noneOf (endChar:endLineChars)) ]) (char endChar)
+ let tit' = gsub "\"" "&quot;" tit
+ return tit')
+
+title = choice [titleWith '(' ')', titleWith '"' '"', titleWith '\'' '\''] <?> "title"
+
+link = choice [explicitLink, referenceLink] <?> "link"
+
+explicitLink =
+ try (do
+ label <- reference
+ src <- source
+ return (Link label src))
+
+referenceLink = choice [referenceLinkDouble, referenceLinkSingle]
+
+referenceLinkDouble = -- a link like [this][/url/]
+ try (do
+ label <- reference
+ skipSpaces
+ skipEndline
+ skipSpaces
+ ref <- reference
+ return (Link label (Ref ref)))
+
+referenceLinkSingle = -- a link like [this]
+ try (do
+ label <- reference
+ return (Link label (Ref [])))
+
+autoLink = -- a link <like.this.com>
+ try (do
+ notFollowedBy (do {anyHtmlBlockTag; return ' '})
+ src <- between (char autoLinkStart) (char autoLinkEnd)
+ (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd])))
+ case (matchRegex emailAddress src) of
+ Just _ -> return (Link [Str src] (Src ("mailto:" ++ src) ""))
+ Nothing -> return (Link [Str src] (Src src "")))
+
+emailAddress = mkRegex "([^@:/]+)@(([^.]+[.]?)*([^.]+))" -- presupposes no whitespace
+
+image =
+ try (do
+ char imageStart
+ (Link label src) <- link
+ return (Image label src))
+
+noteRef = try (do
+ char noteStart
+ ref <- between (char '(') (char ')') (many1 (noneOf " \t\n)"))
+ return (NoteRef ref))
+
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
new file mode 100644
index 000000000..82e5ea303
--- /dev/null
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -0,0 +1,644 @@
+-- | Parse reStructuredText and return Pandoc document.
+module Text.Pandoc.Readers.RST (
+ readRST
+ ) where
+import Text.Pandoc.Definition
+import Text.ParserCombinators.Pandoc
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.HTML ( anyHtmlBlockTag, anyHtmlInlineTag )
+import Text.Regex ( matchRegex, mkRegex )
+import Text.ParserCombinators.Parsec
+import Data.Maybe ( fromMaybe )
+import List ( findIndex )
+import Char ( toUpper )
+
+-- | Parse reStructuredText string and return Pandoc document.
+readRST :: ParserState -> String -> Pandoc
+readRST = readWith parseRST
+
+-- | Parse a string and print result (for testing).
+testString :: String -> IO ()
+testString = testStringWith parseRST
+
+--
+-- Constants and data structure definitions
+---
+
+bulletListMarkers = "*+-"
+underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = "\\`|*_<>$:[-"
+
+--
+-- parsing documents
+--
+
+isAnonKeyBlock block = case block of
+ (Key [Str "_"] str) -> True
+ otherwise -> False
+
+isNotAnonKeyBlock block = not (isAnonKeyBlock block)
+
+isHeader1 :: Block -> Bool
+isHeader1 (Header 1 _) = True
+isHeader1 _ = False
+
+isHeader2 :: Block -> Bool
+isHeader2 (Header 2 _) = True
+isHeader2 _ = False
+
+-- | Promote all headers in a list of blocks. (Part of
+-- title transformation for RST.)
+promoteHeaders :: Int -> [Block] -> [Block]
+promoteHeaders num ((Header level text):rest) =
+ (Header (level - num) text):(promoteHeaders num rest)
+promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
+promoteHeaders num [] = []
+
+-- | If list of blocks starts with a header (or a header and subheader)
+-- of level that are not found elsewhere, return it as a title and
+-- promote all the other headers.
+titleTransform :: [Block] -- ^ list of blocks
+ -> ([Block], [Inline]) -- ^ modified list of blocks, title
+titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title and subtitle
+ if (any isHeader1 rest) || (any isHeader2 rest) then
+ ((Header 1 head1):(Header 2 head2):rest, [])
+ else
+ ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
+titleTransform ((Header 1 head1):rest) = -- title, no subtitle
+ if (any isHeader1 rest) then
+ ((Header 1 head1):rest, [])
+ else
+ ((promoteHeaders 1 rest), head1)
+titleTransform blocks = (blocks, [])
+
+parseRST = do
+ state <- getState
+ input <- getInput
+ blocks <- parseBlocks -- first pass
+ let anonymousKeys = filter isAnonKeyBlock blocks
+ let blocks' = if (null anonymousKeys) then
+ blocks
+ else -- run parser again to fill in anonymous links...
+ case runParser parseBlocks (state { stateKeyBlocks = anonymousKeys })
+ "RST source, second pass" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> (filter isNotAnonKeyBlock result)
+ let (blocks'', title) = if stateStandalone state then
+ titleTransform blocks'
+ else
+ (blocks', [])
+ state <- getState
+ let authors = stateAuthors state
+ let date = stateDate state
+ let title' = if (null title) then (stateTitle state) else title
+ return (Pandoc (Meta title' authors date) blocks'')
+
+--
+-- parsing blocks
+--
+
+parseBlocks = do
+ result <- manyTill block eof
+ return result
+
+block = choice [ codeBlock, rawHtmlBlock, rawLaTeXBlock, blockQuote, referenceKey,
+ imageBlock, unknownDirective, header, hrule, list, fieldList, lineBlock,
+ para, plain, blankBlock, nullBlock ] <?> "block"
+
+--
+-- field list
+--
+
+fieldListItem = try (do
+ char ':'
+ name <- many1 alphaNum
+ string ": "
+ skipSpaces
+ first <- manyTill anyChar newline
+ rest <- many (do{ notFollowedBy (char ':');
+ notFollowedBy blankline;
+ skipSpaces;
+ manyTill anyChar newline })
+ return (name, (joinWithSep " " (first:rest))))
+
+fieldList = try (do
+ items <- many1 fieldListItem
+ blanklines
+ let authors = case (lookup "Authors" items) of
+ Just auth -> [auth]
+ Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
+ let date = case (lookup "Date" items) of
+ Just dat -> dat
+ Nothing -> ""
+ let title = case (lookup "Title" items) of
+ Just tit -> [Str tit]
+ Nothing -> []
+ let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && (x /= "Date") &&
+ (x /= "Title")) items
+ let result = map (\(x,y) -> Para [Strong [Str x], Str ":", Space, Str y]) remaining
+ updateState (\st -> st { stateAuthors = authors, stateDate = date, stateTitle = title })
+ return (BlockQuote result))
+
+--
+-- line block
+--
+
+lineBlockLine = try (do
+ string "| "
+ white <- many (oneOf " \t")
+ line <- manyTill inline newline
+ let line' = (if null white then [] else [Str white]) ++ line ++ [LineBreak]
+ return line')
+
+lineBlock = try (do
+ lines <- many1 lineBlockLine
+ blanklines
+ return $ Para (concat lines))
+
+--
+-- paragraph block
+--
+
+para = choice [ paraBeforeCodeBlock, paraNormal ] <?> "paragraph"
+
+codeBlockStart = try (do
+ string "::"
+ blankline
+ blankline)
+
+-- paragraph that ends in a :: starting a code block
+paraBeforeCodeBlock = try (do
+ result <- many1 (do {notFollowedBy' codeBlockStart; inline})
+ followedBy' (string "::")
+ return (Para (if (last result == Space) then
+ normalizeSpaces result
+ else
+ (normalizeSpaces result) ++ [Str ":"])))
+
+-- regular paragraph
+paraNormal = try (do
+ result <- many1 inline
+ newline
+ blanklines
+ let result' = normalizeSpaces result
+ return (Para result'))
+
+plain = do
+ result <- many1 inline
+ let result' = normalizeSpaces result
+ return (Plain result')
+
+--
+-- image block
+--
+
+imageBlock = try (do
+ string ".. image:: "
+ src <- manyTill anyChar newline
+ return (Plain [Image [Str "image"] (Src src "")]))
+
+--
+-- header blocks
+--
+
+header = choice [ doubleHeader, singleHeader ] <?> "header"
+
+-- a header with lines on top and bottom
+doubleHeader = try (do
+ c <- oneOf underlineChars
+ rest <- many (char c) -- the top line
+ let lenTop = length (c:rest)
+ skipSpaces
+ newline
+ txt <- many1 (do {notFollowedBy blankline; inline})
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ if (len > lenTop) then fail "title longer than border" else (do {return ()})
+ blankline -- spaces and newline
+ count lenTop (char c) -- the bottom line
+ blanklines
+ -- check to see if we've had this kind of header before.
+ -- if so, get appropriate level. if not, add to list.
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable', level) = case findIndex (== DoubleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return (Header level (normalizeSpaces txt)))
+
+-- a header with line on the bottom only
+singleHeader = try (do
+ notFollowedBy' whitespace
+ txt <- many1 (do {notFollowedBy blankline; inline})
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ blankline
+ c <- oneOf underlineChars
+ rest <- count (len - 1) (char c)
+ many (char c)
+ blanklines
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable', level) = case findIndex (== SingleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return (Header level (normalizeSpaces txt)))
+
+--
+-- hrule block
+--
+
+hruleWith chr =
+ try (do
+ count 4 (char chr)
+ skipMany (char chr)
+ skipSpaces
+ newline
+ blanklines
+ return HorizontalRule)
+
+hrule = choice (map hruleWith underlineChars) <?> "hrule"
+
+--
+-- code blocks
+--
+
+-- read a line indented by a given string
+indentedLine indents = try (do
+ string indents
+ result <- manyTill anyChar newline
+ return (result ++ "\n"))
+
+-- two or more indented lines, possibly separated by blank lines
+-- if variable = True, then any indent will work, but it must be consistent through the block
+-- if variable = False, indent should be one tab or equivalent in spaces
+indentedBlock variable = try (do
+ state <- getState
+ let tabStop = stateTabStop state
+ indents <- if variable then
+ many1 (oneOf " \t")
+ else
+ oneOfStrings ["\t", (replicate tabStop ' ')]
+ firstline <- manyTill anyChar newline
+ rest <- many (choice [ indentedLine indents,
+ try (do {b <- blanklines; l <- indentedLine indents; return (b ++ l)})])
+ option "" blanklines
+ return (firstline ++ "\n" ++ (concat rest)))
+
+codeBlock = try (do
+ codeBlockStart
+ result <- indentedBlock False -- the False means we want one tab stop indent on each line
+ return (CodeBlock result))
+
+--
+-- raw html
+--
+
+rawHtmlBlock = try (do
+ string ".. raw:: html"
+ blanklines
+ result <- indentedBlock True
+ return (RawHtml result))
+
+--
+-- raw latex
+--
+
+rawLaTeXBlock = try (do
+ string ".. raw:: latex"
+ blanklines
+ result <- indentedBlock True
+ return (Para [(TeX result)]))
+
+--
+-- block quotes
+--
+
+blockQuote = try (do
+ block <- indentedBlock True
+ -- parse the extracted block, which may contain various block elements:
+ state <- getState
+ let parsed = case runParser parseBlocks (state {stateParserContext = BlockQuoteState})
+ "block" (block ++ "\n\n") of
+ Left err -> error $ "Raw block:\n" ++ show block ++ "\nError:\n" ++ show err
+ Right result -> result
+ return (BlockQuote parsed))
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList ] <?> "list"
+
+-- parses bullet list start and returns its length (inc. following whitespace)
+bulletListStart =
+ try (do
+ notFollowedBy' hrule -- because hrules start out just like lists
+ marker <- oneOf bulletListMarkers
+ white <- many1 spaceChar
+ let len = length (marker:white)
+ return len)
+
+withPeriodSuffix parser = try (do
+ a <- parser
+ b <- char '.'
+ return (a ++ [b]))
+
+withParentheses parser = try (do
+ a <- char '('
+ b <- parser
+ c <- char ')'
+ return ([a] ++ b ++ [c]))
+
+withRightParen parser = try (do
+ a <- parser
+ b <- char ')'
+ return (a ++ [b]))
+
+upcaseWord = map toUpper
+
+romanNumeral = do
+ let lowerNumerals = ["i", "ii", "iii", "iiii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx", "xxi", "xxii", "xxiii", "xxiv" ]
+ let upperNumerals = map upcaseWord lowerNumerals
+ result <- choice $ map string (lowerNumerals ++ upperNumerals)
+ return result
+
+orderedListEnumerator = choice [ many1 digit,
+ string "#",
+ count 1 letter,
+ romanNumeral ]
+
+-- parses ordered list start and returns its length (inc. following whitespace)
+orderedListStart =
+ try (do
+ marker <- choice [ withPeriodSuffix orderedListEnumerator,
+ withParentheses orderedListEnumerator,
+ withRightParen orderedListEnumerator ]
+ white <- many1 spaceChar
+ let len = length (marker ++ white)
+ return len)
+
+-- parse a line of a list item
+listLine markerLength = try (do
+ notFollowedBy blankline
+ indentWith markerLength
+ line <- manyTill anyChar newline
+ return (line ++ "\n"))
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith num = do
+ state <- getState
+ let tabStop = stateTabStop state
+ if (num < tabStop) then
+ count num (char ' ')
+ else
+ choice [ try (count num (char ' ')),
+ (try (do {char '\t'; count (num - tabStop) (char ' ')})) ]
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start =
+ try (do
+ markerLength <- start
+ firstLine <- manyTill anyChar newline
+ restLines <- many (listLine markerLength)
+ return (markerLength, (firstLine ++ "\n" ++ (concat restLines))))
+
+-- continuation of a list item - indented and separated by blankline or (in compact lists)
+-- endline. Note: nested lists are parsed as continuations.
+listContinuation markerLength =
+ try (do
+ blanks <- many1 blankline
+ result <- many1 (listLine markerLength)
+ return (blanks ++ (concat result)))
+
+listItem start =
+ try (do
+ (markerLength, first) <- rawListItem start
+ rest <- many (listContinuation markerLength)
+ blanks <- choice [ try (do {b <- many blankline; followedBy' start; return b}),
+ many1 blankline ] -- whole list must end with blank
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let parsed = case runParser parseBlocks (state {stateParserContext = ListItemState})
+ "list item" raw of
+ Left err -> error $ "Raw:\n" ++ raw ++ "\nError:\n" ++ show err
+ Right result -> result
+ where raw = concat (first:rest) ++ blanks
+ return parsed)
+
+orderedList =
+ try (do
+ items <- many1 (listItem orderedListStart)
+ let items' = compactify items
+ return (OrderedList items'))
+
+bulletList =
+ try (do
+ items <- many1 (listItem bulletListStart)
+ let items' = compactify items
+ return (BulletList items'))
+
+--
+-- unknown directive (e.g. comment)
+--
+
+unknownDirective = try (do
+ string ".. "
+ manyTill anyChar newline
+ many (do {string " ";
+ char ':';
+ many1 (noneOf "\n:");
+ char ':';
+ many1 (noneOf "\n");
+ newline})
+ option "" blanklines
+ return Null)
+
+--
+-- reference key
+--
+
+referenceKey = choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
+
+imageKey = try (do
+ string ".. |"
+ ref <- manyTill inline (char '|')
+ skipSpaces
+ string "image::"
+ src <- manyTill anyChar newline
+ return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
+
+anonymousKey = try (do
+ choice [string ".. __:", string "__"]
+ skipSpaces
+ src <- manyTill anyChar newline
+ state <- getState
+ return (Key [Str "_"] (Src (removeLeadingTrailingSpace src) "")))
+
+regularKeyQuoted = try (do
+ string ".. _`"
+ ref <- manyTill inline (string "`:")
+ skipSpaces
+ src <- manyTill anyChar newline
+ return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
+
+regularKey = try (do
+ string ".. _"
+ ref <- manyTill inline (char ':')
+ skipSpaces
+ src <- manyTill anyChar newline
+ return (Key (normalizeSpaces ref) (Src (removeLeadingTrailingSpace src) "")))
+
+ --
+ -- inline
+ --
+
+text = choice [ strong, emph, code, str, tabchar, whitespace, endline ] <?> "text"
+
+inline = choice [ escapedChar, special, hyphens, text, symbol ] <?> "inline"
+
+special = choice [ link, image ] <?> "link, inline html, or image"
+
+hyphens = try (do
+ result <- many1 (char '-')
+ option Space endline -- don't want to treat endline after hyphen or dash as a space
+ return (Str result))
+
+escapedChar = escaped anyChar
+
+symbol = do
+ result <- oneOf specialChars
+ return (Str [result])
+
+-- parses inline code, between codeStart and codeEnd
+code =
+ try (do
+ string "``"
+ result <- manyTill anyChar (string "``")
+ let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+ return (Code result'))
+
+emph = do
+ result <- enclosed (char '*') (char '*') inline
+ return (Emph (normalizeSpaces result))
+
+strong = do
+ result <- enclosed (string "**") (string "**") inline
+ return (Strong (normalizeSpaces result))
+
+whitespace = do
+ many1 spaceChar <?> "whitespace"
+ return Space
+
+tabchar = do
+ tab
+ return (Str "\t")
+
+str = do
+ notFollowedBy' oneWordReferenceLink
+ result <- many1 (noneOf (specialChars ++ "\t\n "))
+ return (Str result)
+
+-- an endline character that can be treated as a space, not a structural break
+endline =
+ try (do
+ newline
+ notFollowedBy blankline
+ -- parse potential list starts at beginning of line differently if in a list:
+ st <- getState
+ if ((stateParserContext st) == ListItemState) then
+ notFollowedBy' (choice [orderedListStart, bulletListStart])
+ else
+ option () pzero
+ return Space)
+
+--
+-- links
+--
+
+link = choice [explicitLink, referenceLink, autoLink, oneWordReferenceLink] <?> "link"
+
+explicitLink =
+ try (do
+ char '`'
+ label <- manyTill inline (try (do {spaces; char '<'}))
+ src <- manyTill (noneOf ">\n ") (char '>')
+ skipSpaces
+ string "`_"
+ return (Link (normalizeSpaces label) (Src (removeLeadingTrailingSpace src) "")))
+
+anonymousLinkEnding =
+ try (do
+ char '_'
+ state <- getState
+ let anonKeys = stateKeyBlocks state
+ -- if there's a list of anon key refs (from previous pass), pop one off.
+ -- otherwise return an anon key ref for the next pass to take care of...
+ case anonKeys of
+ (Key [Str "_"] src):rest ->
+ do{ setState (state { stateKeyBlocks = rest });
+ return src }
+ otherwise -> return (Ref [Str "_"]))
+
+referenceLink =
+ try (do
+ char '`'
+ label <- manyTill inline (string "`_")
+ src <- option (Ref []) anonymousLinkEnding
+ return (Link (normalizeSpaces label) src))
+
+oneWordReferenceLink =
+ try (do
+ label <- many1 alphaNum
+ char '_'
+ src <- option (Ref []) anonymousLinkEnding
+ notFollowedBy alphaNum -- because this_is_not a link
+ return (Link [Str label] src))
+
+uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", "mailto:",
+ "news:", "telnet:" ]
+
+uri = try (do
+ scheme <- uriScheme
+ identifier <- many1 (noneOf " \t\n")
+ return (scheme ++ identifier))
+
+autoURI = try (do
+ src <- uri
+ return (Link [Str src] (Src src "")))
+
+emailChar = alphaNum <|> oneOf "-+_."
+
+emailAddress = try (do
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ return (addr ++ '@':dom))
+
+domainChar = alphaNum <|> char '-'
+
+domain = try (do
+ first <- many1 domainChar
+ dom <- many1 (try (do{ char '.'; many1 domainChar }))
+ return (joinWithSep "." (first:dom)))
+
+autoEmail = try (do
+ src <- emailAddress
+ return (Link [Str src] (Src ("mailto:" ++ src) "")))
+
+autoLink = autoURI <|> autoEmail
+
+-- For now, we assume that all substitution references are for images.
+image =
+ try (do
+ char '|'
+ ref <- manyTill inline (char '|')
+ return (Image (normalizeSpaces ref) (Ref ref)))
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
new file mode 100644
index 000000000..dc58dd6b4
--- /dev/null
+++ b/src/Text/Pandoc/Shared.hs
@@ -0,0 +1,417 @@
+-- | Utility functions and definitions used by the various Pandoc modules.
+module Text.Pandoc.Shared (
+ -- * Text processing
+ gsub,
+ joinWithSep,
+ tabsToSpaces,
+ backslashEscape,
+ escapePreservingRegex,
+ endsWith,
+ stripTrailingNewlines,
+ removeLeadingTrailingSpace,
+ removeLeadingSpace,
+ removeTrailingSpace,
+ -- * Parsing
+ readWith,
+ testStringWith,
+ HeaderType (..),
+ ParserContext (..),
+ ParserState (..),
+ defaultParserState,
+ -- * Native format prettyprinting
+ prettyPandoc,
+ -- * Pandoc block list processing
+ consolidateList,
+ isNoteBlock,
+ splitBySpace,
+ normalizeSpaces,
+ compactify,
+ generateReference,
+ WriterOptions (..),
+ KeyTable,
+ keyTable,
+ lookupKeySrc,
+ refsMatch,
+ replaceReferenceLinks,
+ replaceRefLinksBlockList
+ ) where
+import Text.Pandoc.Definition
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.HtmlEntities ( decodeEntities )
+import Text.Regex ( matchRegexAll, mkRegex, subRegex, Regex )
+import Char ( toLower )
+import List ( find, groupBy )
+
+-- | Parse a string with a given parser and state.
+readWith :: GenParser Char ParserState a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> String -- ^ input string
+ -> a
+readWith parser state input =
+ case runParser parser state "source" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> result
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a) =>
+ GenParser Char ParserState a
+ -> String
+ -> IO ()
+testStringWith parser str = putStrLn $ show $ readWith parser defaultParserState str
+
+-- | Parser state
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = BlockQuoteState -- ^ Used when running parser on contents of blockquote
+ | ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data ParserState = ParserState
+ { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML and LaTeX?
+ stateParserContext :: ParserContext, -- ^ What are we parsing?
+ stateKeyBlocks :: [Block], -- ^ List of reference key blocks
+ stateKeysUsed :: [[Inline]], -- ^ List of references used so far
+ stateNoteBlocks :: [Block], -- ^ List of note blocks
+ stateTabStop :: Int, -- ^ Tab stop
+ stateStandalone :: Bool, -- ^ If @True@, parse bibliographic info
+ stateTitle :: [Inline], -- ^ Title of document
+ stateAuthors :: [String], -- ^ Authors of document
+ stateDate :: String, -- ^ Date of document
+ stateHeaderTable :: [HeaderType] } -- ^ List of header types used, in what order (for reStructuredText only)
+ deriving Show
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateParseRaw = False,
+ stateParserContext = NullState,
+ stateKeyBlocks = [],
+ stateKeysUsed = [],
+ stateNoteBlocks = [],
+ stateTabStop = 4,
+ stateStandalone = False,
+ stateTitle = [],
+ stateAuthors = [],
+ stateDate = [],
+ stateHeaderTable = [] }
+
+-- | Consolidate @Str@s and @Space@s in an inline list into one big @Str@.
+-- Collapse adjacent @Space@s.
+consolidateList :: [Inline] -> [Inline]
+consolidateList ((Str a):(Str b):rest) = consolidateList ((Str (a ++ b)):rest)
+consolidateList ((Str a):Space:rest) = consolidateList ((Str (a ++ " ")):rest)
+consolidateList (Space:(Str a):rest) = consolidateList ((Str (" " ++ a)):rest)
+consolidateList (Space:Space:rest) = consolidateList ((Str " "):rest)
+consolidateList (inline:rest) = inline:(consolidateList rest)
+consolidateList [] = []
+
+-- | Indent string as a block.
+indentBy :: Int -- ^ Number of spaces to indent the block
+ -> Int -- ^ Number of spaces to indent first line, relative to block
+ -> String -- ^ Contents of block to indent
+ -> String
+indentBy num first [] = ""
+indentBy num first str =
+ let (firstLine:restLines) = lines str
+ firstLineIndent = num + first in
+ (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines)
+
+-- | Prettyprint list of Pandoc blocks elements.
+prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
+ -> [Block] -- ^ List of blocks
+ -> String
+prettyBlockList indent [] = indentBy indent 0 "[]"
+prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
+
+-- | Prettyprint Pandoc block element.
+prettyBlock :: Block -> String
+prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks)
+prettyBlock (Note ref blocks) = "Note " ++ (show ref) ++ "\n " ++ (prettyBlockList 2 blocks)
+prettyBlock (OrderedList blockLists) = "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+prettyBlock (BulletList blockLists) = "BulletList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+prettyBlock block = show block
+
+-- | Prettyprint Pandoc document.
+prettyPandoc :: Pandoc -> String
+prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ ")\n" ++ (prettyBlockList 0 blocks)
+
+-- | Convert tabs to spaces (with adjustable tab stop).
+tabsToSpaces :: Int -- ^ Tabstop
+ -> String -- ^ String to convert
+ -> String
+tabsToSpaces tabstop str =
+ unlines (map (tabsInLine tabstop tabstop) (lines str))
+
+-- | Convert tabs to spaces in one line.
+tabsInLine :: Int -- ^ Number of spaces to next tab stop
+ -> Int -- ^ Tabstop
+ -> String -- ^ Line to convert
+ -> String
+tabsInLine num tabstop "" = ""
+tabsInLine num tabstop (c:cs) =
+ let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in
+ let nextnumraw = (num - (length replacement)) in
+ let nextnum = if (nextnumraw < 1) then (nextnumraw + tabstop) else nextnumraw in
+ replacement ++ (tabsInLine nextnum tabstop cs)
+
+-- | Substitute string for every occurrence of regular expression.
+gsub :: String -- ^ Regular expression (as string) to substitute for
+ -> String -- ^ String to substitute for the regex
+ -> String -- ^ String to be substituted in
+ -> String
+gsub regex replacement str = subRegex (mkRegex regex) str replacement
+
+-- | Escape designated characters with backslash.
+backslashEscape :: [Char] -- ^ list of special characters to escape
+ -> String -- ^ string input
+ -> String
+backslashEscape special [] = []
+backslashEscape special (x:xs) = if x `elem` special then
+ '\\':x:(backslashEscape special xs)
+ else
+ x:(backslashEscape special xs)
+
+-- | Escape string by applying a function, but don't touch anything that matches regex.
+escapePreservingRegex :: (String -> String) -- ^ Escaping function
+ -> Regex -- ^ Regular expression
+ -> String -- ^ String to be escaped
+ -> String
+escapePreservingRegex escapeFunction regex str =
+ case (matchRegexAll regex str) of
+ Nothing -> escapeFunction str
+ Just (before, matched, after, _) ->
+ (escapeFunction before) ++ matched ++
+ (escapePreservingRegex escapeFunction regex after)
+
+-- | Returns @True@ if string ends with given character.
+endsWith :: Char -> [Char] -> Bool
+endsWith char [] = False
+endsWith char str = (char == last str)
+
+-- | Returns @True@ if block is a @Note@ block
+isNoteBlock :: Block -> Bool
+isNoteBlock (Note ref blocks) = True
+isNoteBlock _ = False
+
+-- | Joins a list of lists, separated by another list.
+joinWithSep :: [a] -- ^ List to use as separator
+ -> [[a]] -- ^ Lists to join
+ -> [a]
+joinWithSep sep [] = []
+joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
+
+-- | Strip trailing newlines from string.
+stripTrailingNewlines :: String -> String
+stripTrailingNewlines "" = ""
+stripTrailingNewlines str =
+ if (last str) == '\n' then
+ stripTrailingNewlines (init str)
+ else
+ str
+
+-- | Remove leading and trailing space (including newlines) from string.
+removeLeadingTrailingSpace :: String -> String
+removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
+
+-- | Remove leading space (including newlines) from string.
+removeLeadingSpace :: String -> String
+removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || (x == '\t'))
+
+-- | Remove trailing space (including newlines) from string.
+removeTrailingSpace :: String -> String
+removeTrailingSpace = reverse . removeLeadingSpace . reverse
+
+-- | Split list of inlines into groups separated by a space.
+splitBySpace :: [Inline] -> [[Inline]]
+splitBySpace lst = filter (\a -> (/= Space) (head a))
+ (groupBy (\a b -> (/= Space) a && (/= Space) b) lst)
+
+-- | Normalize a list of inline elements: remove leading and trailing
+-- @Space@ elements, and collapse double @Space@s into singles.
+normalizeSpaces :: [Inline] -> [Inline]
+normalizeSpaces [] = []
+normalizeSpaces list =
+ let removeDoubles [] = []
+ removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
+ removeDoubles (x:rest) = x:(removeDoubles rest) in
+ let removeLeading [] = []
+ removeLeading lst = if ((head lst) == Space) then tail lst else lst in
+ let removeTrailing [] = []
+ removeTrailing lst = if ((last lst) == Space) then init lst else lst in
+ removeLeading $ removeTrailing $ removeDoubles list
+
+-- | Change final list item from @Para@ to @Plain@ if the list should be compact.
+compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
+ -> [[Block]]
+compactify [] = []
+compactify items =
+ let final = last items
+ others = init items in
+ case final of
+ [Para a] -> if any containsPara others then items else others ++ [[Plain a]]
+ otherwise -> items
+
+containsPara :: [Block] -> Bool
+containsPara [] = False
+containsPara ((Para a):rest) = True
+containsPara ((BulletList items):rest) = (any containsPara items) || (containsPara rest)
+containsPara ((OrderedList items):rest) = (any containsPara items) || (containsPara rest)
+containsPara (x:rest) = containsPara rest
+
+-- | Options for writers
+data WriterOptions = WriterOptions
+ { writerStandalone :: Bool -- ^ If @True@, writer header and footer
+ , writerTitlePrefix :: String -- ^ Prefix for HTML titles
+ , writerHeader :: String -- ^ Header for the document
+ , writerIncludeBefore :: String -- ^ String to include before the document body
+ , writerIncludeAfter :: String -- ^ String to include after the document body
+ , writerSmartypants :: Bool -- ^ If @True@, use smart quotes, dashes, and ellipses
+ , writerS5 :: Bool -- ^ @True@ if we're writing S5 instead of normal HTML
+ , writerIncremental :: Bool -- ^ If @True@, display S5 lists incrementally
+ , writerNumberSections :: Bool -- ^ If @True@, number sections in LaTeX
+ , writerTabStop :: Int } -- ^ Tabstop for conversion between spaces and tabs
+ deriving Show
+
+--
+-- Functions for constructing lists of reference keys
+--
+
+-- | Returns @Just@ numerical key reference if there's already a key
+-- for the specified target in the list of blocks, otherwise @Nothing@.
+keyFoundIn :: [Block] -- ^ List of key blocks to search
+ -> Target -- ^ Target to search for
+ -> Maybe String
+keyFoundIn [] src = Nothing
+keyFoundIn ((Key [Str num] src1):rest) src = if (src1 == src) then
+ Just num
+ else
+ keyFoundIn rest src
+keyFoundIn (_:rest) src = keyFoundIn rest src
+
+-- | Return next unique numerical key, given keyList
+nextUniqueKey :: [[Inline]] -> String
+nextUniqueKey keys =
+ let nums = [1..10000]
+ notAKey n = not (any (== [Str (show n)]) keys) in
+ case (find notAKey nums) of
+ Just x -> show x
+ Nothing -> error "Could not find unique key for reference link"
+
+-- | Generate a reference for a URL (either an existing reference, if
+-- there is one, or a new one, if there isn't) and update parser state.
+generateReference :: String -- ^ URL
+ -> String -- ^ Title
+ -> GenParser tok ParserState Target
+generateReference url title = do
+ let src = Src (decodeEntities url) (decodeEntities title)
+ state <- getState
+ let keyBlocks = stateKeyBlocks state
+ let keysUsed = stateKeysUsed state
+ case (keyFoundIn keyBlocks src) of
+ Just num -> return (Ref [Str num])
+ Nothing -> do
+ let nextNum = nextUniqueKey keysUsed
+ updateState (\st -> st {stateKeyBlocks = (Key [Str nextNum] src):keyBlocks,
+ stateKeysUsed = [Str nextNum]:keysUsed})
+ return (Ref [Str nextNum])
+
+--
+-- code to replace reference links with real links and remove unneeded key blocks
+--
+
+type KeyTable = [([Inline], Target)]
+
+-- | Returns @True@ if block is a Key block
+isRefBlock :: Block -> Bool
+isRefBlock (Key _ _) = True
+isRefBlock _ = False
+
+-- | Returns a pair of a list of pairs of keys and associated sources, and a new
+-- list of blocks with the included key blocks deleted.
+keyTable :: [Block] -> (KeyTable, [Block])
+keyTable [] = ([],[])
+keyTable ((Key ref target):lst) = (((ref, target):table), rest)
+ where (table, rest) = keyTable lst
+keyTable (Null:lst) = keyTable lst -- get rid of Nulls
+keyTable (Blank:lst) = keyTable lst -- get rid of Blanks
+keyTable ((BlockQuote blocks):lst) = ((table1 ++ table2), ((BlockQuote rest1):rest2))
+ where (table1, rest1) = keyTable blocks
+ (table2, rest2) = keyTable lst
+keyTable ((Note ref blocks):lst) = ((table1 ++ table2), ((Note ref rest1):rest2))
+ where (table1, rest1) = keyTable blocks
+ (table2, rest2) = keyTable lst
+keyTable ((OrderedList blockLists):lst) = ((table1 ++ table2), ((OrderedList rest1):rest2))
+ where results = map keyTable blockLists
+ rest1 = map snd results
+ table1 = concatMap fst results
+ (table2, rest2) = keyTable lst
+keyTable ((BulletList blockLists):lst) = ((table1 ++ table2), ((BulletList rest1):rest2))
+ where results = map keyTable blockLists
+ rest1 = map snd results
+ table1 = concatMap fst results
+ (table2, rest2) = keyTable lst
+keyTable (other:lst) = (table, (other:rest))
+ where (table, rest) = keyTable lst
+
+-- | Look up key in key table and return target object.
+lookupKeySrc :: KeyTable -- ^ Key table
+ -> [Inline] -- ^ Key
+ -> Maybe Target
+lookupKeySrc table key = case table of
+ [] -> Nothing
+ (k, src):rest -> if (refsMatch k key) then Just src else lookupKeySrc rest key
+
+-- | Returns @True@ if keys match (case insensitive).
+refsMatch :: [Inline] -> [Inline] -> Bool
+refsMatch ((Str x):restx) ((Str y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Code x):restx) ((Code y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((TeX x):restx) ((TeX y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((NoteRef x):restx) ((NoteRef y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Emph x):restx) ((Emph y):resty) = refsMatch x y && refsMatch restx resty
+refsMatch ((Strong x):restx) ((Strong y):resty) = refsMatch x y && refsMatch restx resty
+refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
+refsMatch [] x = null x
+refsMatch x [] = null x
+
+-- | Replace reference links with explicit links in list of blocks, removing key blocks.
+replaceReferenceLinks :: [Block] -> [Block]
+replaceReferenceLinks blocks =
+ let (keytable, purged) = keyTable blocks in
+ replaceRefLinksBlockList keytable purged
+
+-- | Use key table to replace reference links with explicit links in a list of blocks
+replaceRefLinksBlockList :: KeyTable -> [Block] -> [Block]
+replaceRefLinksBlockList keytable lst = map (replaceRefLinksBlock keytable) lst
+
+-- | Use key table to replace reference links with explicit links in a block
+replaceRefLinksBlock :: KeyTable -> Block -> Block
+replaceRefLinksBlock keytable (Plain lst) = Plain (map (replaceRefLinksInline keytable) lst)
+replaceRefLinksBlock keytable (Para lst) = Para (map (replaceRefLinksInline keytable) lst)
+replaceRefLinksBlock keytable (Header lvl lst) = Header lvl (map (replaceRefLinksInline keytable) lst)
+replaceRefLinksBlock keytable (BlockQuote lst) = BlockQuote (map (replaceRefLinksBlock keytable) lst)
+replaceRefLinksBlock keytable (Note ref lst) = Note ref (map (replaceRefLinksBlock keytable) lst)
+replaceRefLinksBlock keytable (OrderedList lst) = OrderedList (map (replaceRefLinksBlockList keytable) lst)
+replaceRefLinksBlock keytable (BulletList lst) = BulletList (map (replaceRefLinksBlockList keytable) lst)
+replaceRefLinksBlock keytable other = other
+
+-- | Use key table to replace reference links with explicit links in an inline element.
+replaceRefLinksInline :: KeyTable -> Inline -> Inline
+replaceRefLinksInline keytable (Link text (Ref ref)) = (Link newText newRef)
+ where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of
+ Nothing -> (Ref ref)
+ Just src -> src
+ newText = map (replaceRefLinksInline keytable) text
+replaceRefLinksInline keytable (Image text (Ref ref)) = (Image newText newRef)
+ where newRef = case lookupKeySrc keytable (if (null ref) then text else ref) of
+ Nothing -> (Ref ref)
+ Just src -> src
+ newText = map (replaceRefLinksInline keytable) text
+replaceRefLinksInline keytable (Emph lst) = Emph (map (replaceRefLinksInline keytable) lst)
+replaceRefLinksInline keytable (Strong lst) = Strong (map (replaceRefLinksInline keytable) lst)
+replaceRefLinksInline keytable other = other
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
new file mode 100644
index 000000000..66590809f
--- /dev/null
+++ b/src/Text/Pandoc/UTF8.hs
@@ -0,0 +1,43 @@
+-- | Functions for converting Unicode strings to UTF-8 and vice versa.
+--
+-- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
+-- (c) 2003, OGI School of Science & Engineering, Oregon Health and
+-- Science University.
+--
+-- Modified by Martin Norbaeck to pass illegal UTF-8 sequences through unchanged.
+module Text.Pandoc.UTF8 (
+ decodeUTF8,
+ encodeUTF8
+ ) where
+
+-- From the Char module supplied with HBC.
+
+-- | Take a UTF-8 string and decode it into a Unicode string.
+decodeUTF8 :: String -> String
+decodeUTF8 "" = ""
+decodeUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
+ '\x80' <= c' && c' <= '\xbf' =
+ toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : decodeUTF8 cs
+decodeUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
+ '\x80' <= c' && c' <= '\xbf' &&
+ '\x80' <= c'' && c'' <= '\xbf' =
+ toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : decodeUTF8 cs
+decodeUTF8 (c:cs) = c : decodeUTF8 cs
+
+-- | Take a Unicode string and encode it as a UTF-8 string.
+encodeUTF8 :: String -> String
+encodeUTF8 "" = ""
+encodeUTF8 (c:cs) =
+ if c > '\x0000' && c < '\x0080' then
+ c : encodeUTF8 cs
+ else if c < toEnum 0x0800 then
+ let i = fromEnum c
+ in toEnum (0xc0 + i `div` 0x40) :
+ toEnum (0x80 + i `mod` 0x40) :
+ encodeUTF8 cs
+ else
+ let i = fromEnum c
+ in toEnum (0xe0 + i `div` 0x1000) :
+ toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
+ toEnum (0x80 + i `mod` 0x40) :
+ encodeUTF8 cs
diff --git a/src/Text/Pandoc/Writers/DefaultHeaders.hs b/src/Text/Pandoc/Writers/DefaultHeaders.hs
new file mode 100644
index 000000000..87dd7d8ff
--- /dev/null
+++ b/src/Text/Pandoc/Writers/DefaultHeaders.hs
@@ -0,0 +1,27 @@
+----------------------------------------------------
+-- Do not edit this file by hand. Edit
+-- 'templates/DefaultHeaders.hs'
+-- and run ./fillTemplates.pl Text/Pandoc/Writers/DefaultHeaders.hs
+----------------------------------------------------
+
+-- | Default headers for Pandoc writers.
+module Text.Pandoc.Writers.DefaultHeaders (
+ defaultLaTeXHeader,
+ defaultHtmlHeader,
+ defaultS5Header,
+ defaultRTFHeader
+ ) where
+import Text.Pandoc.Writers.S5
+
+defaultLaTeXHeader :: String
+defaultLaTeXHeader = "\\documentclass{article}\n\\usepackage{hyperref}\n\\usepackage{ucs}\n\\usepackage[utf8x]{inputenc}\n\\usepackage{graphicx}\n\\setlength{\\parindent}{0pt}\n\\setlength{\\parskip}{6pt plus 2pt minus 1pt}\n% This is needed for code blocks in footnotes:\n\\usepackage{fancyvrb}\n\\VerbatimFootnotes\n"
+
+defaultHtmlHeader :: String
+defaultHtmlHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n \"http://www.w3.org/TR/html4/loose.dtd\">\n<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<meta name=\"generator\" content=\"pandoc\" />\n<style type=\"text/css\">\ndiv.pandocNote { border-left: 1px solid grey; padding-left: 1em; }\nspan.pandocNoteRef { vertical-align: super; font-size: 80%; }\nspan.pandocNoteMarker { }\n</style>\n"
+
+defaultS5Header :: String
+defaultS5Header = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<!-- configuration parameters -->\n<meta name=\"defaultView\" content=\"slideshow\" />\n<meta name=\"controlVis\" content=\"hidden\" />\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<meta name=\"generator\" content=\"pandoc\" />\n" ++ s5CSS ++ s5Javascript
+
+defaultRTFHeader :: String
+defaultRTFHeader = "{\\rtf1\\ansi\\deff0{\\fonttbl{\\f0 Times New Roman;}{\\f1 Courier;}}\n{\\colortbl;\\red255\\green0\\blue0;\\red0\\green0\\blue255;}\n\\widowctrl\\hyphauto\n\n"
+
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
new file mode 100644
index 000000000..9eecf2761
--- /dev/null
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -0,0 +1,197 @@
+-- | Converts Pandoc to HTML.
+module Text.Pandoc.Writers.HTML (
+ writeHtml
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Html ( stringToHtmlString )
+import Text.Regex ( mkRegex )
+import Numeric ( showHex )
+import Char ( ord )
+import List ( isPrefixOf )
+
+-- | Convert Pandoc document to string in HTML format.
+writeHtml :: WriterOptions -> Pandoc -> String
+writeHtml options (Pandoc (Meta title authors date) blocks) =
+ let titlePrefix = writerTitlePrefix options in
+ let topTitle = if not (null titlePrefix) then
+ [Str titlePrefix] ++ (if not (null title) then [Str " - "] ++ title else [])
+ else
+ title in
+ let head = if (writerStandalone options) then
+ htmlHeader options (Meta topTitle authors date)
+ else
+ ""
+ titleBlocks = if (writerStandalone options) && (not (null title)) &&
+ (not (writerS5 options)) then
+ [RawHtml "<h1 class=\"title\">", Plain title, RawHtml "</h1>\n"]
+ else
+ []
+ foot = if (writerStandalone options) then "</body>\n</html>\n" else ""
+ body = (writerIncludeBefore options) ++
+ concatMap (blockToHtml options) (replaceReferenceLinks (titleBlocks ++ blocks)) ++
+ (writerIncludeAfter options) in
+ head ++ body ++ foot
+
+-- | Obfuscate a "mailto:" link using Javascript.
+obfuscateLink :: WriterOptions -> [Inline] -> String -> String
+obfuscateLink options text src =
+ let text' = inlineListToHtml options text in
+ let linkText = if src == ("mailto:" ++ text') then "e" else "'" ++ text' ++ "'"
+ altText = if src == ("mailto:" ++ text') then "\\1 [at] \\2" else text' ++ " (\\1 [at] \\2)" in
+ gsub "mailto:([^@]*)@(.*)" ("<script type=\"text/javascript\">h='\\2';n='\\1';e=n+'@'+h;document.write('<a href=\"mailto:'+e+'\">'+" ++ linkText ++ "+'<\\/a>');</script><noscript>" ++ altText ++ "</noscript>") src
+
+-- | Obfuscate character as entity.
+obfuscateChar :: Char -> String
+obfuscateChar char = let num = ord char in
+ let numstr = if even num then (show num) else ("x" ++ (showHex num "")) in
+ "&#" ++ numstr ++ ";"
+
+-- | Escape string, preserving character entities and quote.
+stringToHtml :: String -> String
+stringToHtml str = escapePreservingRegex stringToHtmlString (mkRegex "\"|(&[[:alnum:]]*;)") str
+
+-- | Escape string as in 'stringToHtml' but add smartypants filter.
+stringToSmartHtml :: String -> String
+stringToSmartHtml =
+ let escapeDoubleQuotes =
+ gsub "(\"|&quot;|'')" "&rdquo;" . -- rest are right quotes
+ gsub "([[:space:]])(\"|&quot;)" "\\1&ldquo;" . -- never right quo after space
+ gsub "(\"|&quot;|``)('|`|&lsquo;)([^[:punct:][:space:]])" "&ldquo;&lsquo;\\3" . -- "'word left
+ gsub "(\"|&quot;|``)([^[:punct:][:space:]])" "&ldquo;\\2" -- "word left
+ escapeSingleQuotes =
+ gsub "'" "&rsquo;" . -- otherwise right
+ gsub "([[:space:]])'" "\\1&lsquo;" . -- never right quo after space
+ gsub "`" "&lsquo;" . -- ` is left
+ gsub "([^[:punct:][:space:]])'" "\\1&rsquo;" . -- word' right
+ gsub "('|`)(\"|&quot;|&ldquo;|``)" "&lsquo;&ldquo;" . -- '"word left
+ gsub "^('|`)([^[:punct:][:space:]])" "&lsquo;\\2" . -- 'word left
+ gsub "([^[:punct:][:space:]])'(s|S)" "\\1&rsquo;\\2" . -- possessive
+ gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1&lsquo;\\2" . -- 'word left
+ gsub "'([0-9][0-9](s|S))" "&rsquo;\\1" -- '80s - decade abbrevs.
+ escapeDashes = gsub " ?-- ?" "&mdash;" .
+ gsub " ?--- ?" "&mdash;" .
+ gsub "([0-9])--?([0-9])" "\\1&ndash;\\2"
+ escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "&hellip;" in
+ escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses . stringToHtml
+
+-- | Escape code string as needed for HTML.
+codeStringToHtml :: String -> String
+codeStringToHtml [] = []
+codeStringToHtml (x:xs) = case x of
+ '&' -> "&amp;" ++ codeStringToHtml xs
+ '<' -> "&lt;" ++ codeStringToHtml xs
+ _ -> x:(codeStringToHtml xs)
+
+-- | Escape string to HTML appropriate for attributes
+attributeStringToHtml :: String -> String
+attributeStringToHtml = gsub "\"" "&quot;"
+
+-- | Returns an HTML header with appropriate bibliographic information.
+htmlHeader :: WriterOptions -> Meta -> String
+htmlHeader options (Meta title authors date) =
+ let titletext = if (null title) then
+ ""
+ else
+ "<title>" ++ (inlineListToHtml options title) ++ "</title>\n"
+ authortext = if (null authors) then
+ ""
+ else
+ "<meta name=\"author\" content=\"" ++
+ (joinWithSep ", " (map stringToHtml authors)) ++ "\" />\n"
+ datetext = if (date == "") then
+ ""
+ else
+ "<meta name=\"date\" content=\"" ++ (stringToHtml date) ++ "\" />\n" in
+ (writerHeader options) ++ authortext ++ datetext ++ titletext ++ "</head>\n<body>\n"
+
+-- | Convert Pandoc block element to HTML.
+blockToHtml :: WriterOptions -> Block -> String
+blockToHtml options Blank = "\n"
+blockToHtml options Null = ""
+blockToHtml options (Plain lst) = inlineListToHtml options lst
+blockToHtml options (Para lst) = "<p>" ++ (inlineListToHtml options lst) ++ "</p>\n"
+blockToHtml options (BlockQuote blocks) =
+ if (writerS5 options) then -- in S5, treat list in blockquote specially
+ -- if default is incremental, make it nonincremental; otherwise incremental
+ let inc = not (writerIncremental options) in
+ case blocks of
+ [BulletList lst] -> blockToHtml (options {writerIncremental = inc}) (BulletList lst)
+ [OrderedList lst] -> blockToHtml (options {writerIncremental = inc}) (OrderedList lst)
+ otherwise -> "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++
+ "</blockquote>\n"
+ else
+ "<blockquote>\n" ++ (concatMap (blockToHtml options) blocks) ++ "</blockquote>\n"
+blockToHtml options (Note ref lst) =
+ let marker = "<span class=\"pandocNoteMarker\"><a name=\"note_" ++ ref ++
+ "\" href=\"#ref_" ++ ref ++ "\">(" ++ ref ++ ")</a></span> " in
+ let contents = (concatMap (blockToHtml options) lst) in
+ let contents' = case contents of
+ ('<':'p':'>':rest) -> "<p class=\"first\">" ++ marker ++ rest ++ "\n"
+ otherwise -> marker ++ contents ++ "\n" in
+ "<div class=\"pandocNote\">\n" ++ contents' ++ "</div>\n"
+blockToHtml options (Key _ _) = ""
+blockToHtml options (CodeBlock str) = "<pre><code>" ++ (codeStringToHtml str) ++
+ "</code></pre>\n"
+blockToHtml options (RawHtml str) = str
+blockToHtml options (BulletList lst) =
+ let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
+ "<ul" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ul>\n"
+blockToHtml options (OrderedList lst) =
+ let attribs = if (writerIncremental options) then " class=\"incremental\"" else "" in
+ "<ol" ++ attribs ++ ">\n" ++ (concatMap (listItemToHtml options) lst) ++ "</ol>\n"
+blockToHtml options HorizontalRule = "<hr />\n"
+blockToHtml options (Header level lst) = if ((level > 0) && (level <= 6)) then
+ "<h" ++ (show level) ++ ">" ++
+ (inlineListToHtml options lst) ++
+ "</h" ++ (show level) ++ ">\n"
+ else
+ "<p>" ++ (inlineListToHtml options lst) ++ "</p>\n"
+listItemToHtml options list = "<li>" ++ (concatMap (blockToHtml options) list) ++ "</li>\n"
+
+-- | Convert list of Pandoc inline elements to HTML.
+inlineListToHtml :: WriterOptions -> [Inline] -> String
+inlineListToHtml options lst =
+ -- consolidate adjacent Str and Space elements for more intelligent
+ -- smartypants filtering
+ let lst' = consolidateList lst in
+ concatMap (inlineToHtml options) lst'
+
+-- | Convert Pandoc inline element to HTML.
+inlineToHtml :: WriterOptions -> Inline -> String
+inlineToHtml options (Emph lst) = "<em>" ++ (inlineListToHtml options lst) ++ "</em>"
+inlineToHtml options (Strong lst) = "<strong>" ++ (inlineListToHtml options lst) ++ "</strong>"
+inlineToHtml options (Code str) = "<code>" ++ (codeStringToHtml str) ++ "</code>"
+inlineToHtml options (Str str) = if (writerSmartypants options) then
+ stringToSmartHtml str
+ else
+ stringToHtml str
+inlineToHtml options (TeX str) = (codeStringToHtml str)
+inlineToHtml options (HtmlInline str) = str
+inlineToHtml options (LineBreak) = "<br />\n"
+inlineToHtml options Space = " "
+inlineToHtml options (Link text (Src src tit)) =
+ let title = attributeStringToHtml tit in
+ if (isPrefixOf "mailto:" src) then
+ obfuscateLink options text src
+ else
+ "<a href=\"" ++ (codeStringToHtml src) ++ "\"" ++
+ (if tit /= "" then " title=\"" ++ title ++ "\">" else ">") ++
+ (inlineListToHtml options text) ++ "</a>"
+inlineToHtml options (Link text (Ref [])) = "[" ++ (inlineListToHtml options text) ++ "]"
+inlineToHtml options (Link text (Ref ref)) = "[" ++ (inlineListToHtml options text) ++ "][" ++
+ (inlineListToHtml options ref) ++ "]" -- this is what markdown does, for better or worse
+inlineToHtml options (Image alt (Src source tit)) =
+ let title = attributeStringToHtml tit
+ alternate = inlineListToHtml options alt in
+ "<img src=\"" ++ source ++ "\"" ++
+ (if tit /= "" then " title=\"" ++ title ++ "\"" else "") ++
+ (if alternate /= "" then " alt=\"" ++ alternate ++ "\"" else "") ++ ">"
+inlineToHtml options (Image alternate (Ref [])) =
+ "![" ++ (inlineListToHtml options alternate) ++ "]"
+inlineToHtml options (Image alternate (Ref ref)) =
+ "![" ++ (inlineListToHtml options alternate) ++ "][" ++ (inlineListToHtml options ref) ++ "]"
+inlineToHtml options (NoteRef ref) =
+ "<span class=\"pandocNoteRef\"><a name=\"ref_" ++ ref ++ "\" href=\"#note_" ++ ref ++
+ "\">(" ++ ref ++ ")</a></span>"
+
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
new file mode 100644
index 000000000..b77789e90
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -0,0 +1,164 @@
+-- | Convert Pandoc to LaTeX.
+module Text.Pandoc.Writers.LaTeX (
+ writeLaTeX
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import List ( (\\) )
+
+-- | Convert Pandoc to LaTeX.
+writeLaTeX :: WriterOptions -> Pandoc -> String
+writeLaTeX options (Pandoc meta blocks) =
+ let notes = filter isNoteBlock blocks in -- assumes all notes are at outer level
+ let body = (writerIncludeBefore options) ++
+ (concatMap (blockToLaTeX notes) (replaceReferenceLinks blocks)) ++
+ (writerIncludeAfter options) in
+ let head = if writerStandalone options then
+ latexHeader notes options meta
+ else
+ "" in
+ let foot = if writerStandalone options then "\n\\end{document}\n" else "" in
+ head ++ body ++ foot
+
+-- | Insert bibliographic information into LaTeX header.
+latexHeader :: [Block] -- ^ List of note blocks to use in resolving note refs
+ -> WriterOptions -- ^ Options, including LaTeX header
+ -> Meta -- ^ Meta with bibliographic information
+ -> String
+latexHeader notes options (Meta title authors date) =
+ let titletext = if null title then
+ ""
+ else
+ "\\title{" ++ inlineListToLaTeX notes title ++ "}\n"
+ authorstext = if null authors then
+ ""
+ else
+ "\\author{" ++ (joinWithSep "\\\\" (map stringToLaTeX authors)) ++ "}\n"
+ datetext = if date == "" then
+ ""
+ else
+ "\\date{" ++ stringToLaTeX date ++ "}\n"
+ maketitle = if null title then
+ ""
+ else
+ "\\maketitle\n"
+ secnumline = if (writerNumberSections options) then
+ ""
+ else
+ "\\setcounter{secnumdepth}{0}\n"
+ header = writerHeader options in
+ header ++ secnumline ++ titletext ++ authorstext ++ datetext ++ "\\begin{document}\n" ++ maketitle
+
+
+-- escape things as needed for LaTeX (also ldots, dashes, quotes, etc.)
+
+escapeBrackets = backslashEscape "{}"
+escapeSpecial = backslashEscape "$%&~_#"
+
+escapeBackslash = gsub "\\\\" "\\\\textbackslash{}"
+fixBackslash = gsub "\\\\textbackslash\\\\\\{\\\\\\}" "\\\\textbackslash{}"
+escapeHat = gsub "\\^" "\\\\^{}"
+escapeBar = gsub "\\|" "\\\\textbar{}"
+escapeLt = gsub "<" "\\\\textless{}"
+escapeGt = gsub ">" "\\\\textgreater{}"
+
+escapeDoubleQuotes =
+ gsub "\"" "''" . -- rest are right quotes
+ gsub "([[:space:]])\"" "\\1``" . -- never right quote after space
+ gsub "\"('|`)([^[:punct:][:space:]])" "``{}`\\2" . -- "'word left
+ gsub "\"([^[:punct:][:space:]])" "``\\1" -- "word left
+
+escapeSingleQuotes =
+ gsub "('|`)(\"|``)" "`{}``" . -- '"word left
+ gsub "([^[:punct:][:space:]])`(s|S)" "\\1'\\2" . -- catch possessives
+ gsub "^'([^[:punct:][:space:]])" "`\\1" . -- 'word left
+ gsub "([[:space:]])'" "\\1`" . -- never right quote after space
+ gsub "([[:space:]])'([^[:punct:][:space:]])" "\\1`\\2" -- 'word left (leave possessives)
+
+escapeEllipses = gsub "\\.\\.\\.|\\. \\. \\." "\\ldots{}"
+
+escapeDashes = gsub "([0-9])-([0-9])" "\\1--\\2" .
+ gsub " -- " "---" .
+ gsub "([^[:punct:][:space:]])--([^[:punct:][:space:]])" "\\1---\\2"
+
+escapeSmart = escapeSingleQuotes . escapeDoubleQuotes . escapeDashes . escapeEllipses
+
+-- | Escape string for LaTeX (including smart quotes, dashes, ellipses)
+stringToLaTeX :: String -> String
+stringToLaTeX = escapeSmart . escapeGt . escapeLt . escapeBar . escapeHat .
+ escapeSpecial . fixBackslash . escapeBrackets . escapeBackslash
+
+-- | Remove all code elements from list of inline elements
+-- (because it's illegal to have a \\verb inside a command argument)
+deVerb :: [Inline] -> [Inline]
+deVerb [] = []
+deVerb ((Code str):rest) = (Str str):(deVerb rest)
+deVerb (other:rest) = other:(deVerb rest)
+
+-- | Convert Pandoc block element to LaTeX.
+blockToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
+ -> Block -- ^ Block to convert
+ -> String
+blockToLaTeX notes Blank = "\n"
+blockToLaTeX notes Null = ""
+blockToLaTeX notes (Plain lst) = inlineListToLaTeX notes lst ++ "\n"
+blockToLaTeX notes (Para lst) = (inlineListToLaTeX notes lst) ++ "\n\n"
+blockToLaTeX notes (BlockQuote lst) =
+ "\\begin{quote}\n" ++ (concatMap (blockToLaTeX notes) lst) ++ "\\end{quote}\n"
+blockToLaTeX notes (Note ref lst) = ""
+blockToLaTeX notes (Key _ _) = ""
+blockToLaTeX notes (CodeBlock str) = "\\begin{verbatim}\n" ++ str ++ "\\end{verbatim}\n"
+blockToLaTeX notes (RawHtml str) = ""
+blockToLaTeX notes (BulletList lst) =
+ "\\begin{itemize}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{itemize}\n"
+blockToLaTeX notes (OrderedList lst) =
+ "\\begin{enumerate}\n" ++ (concatMap (listItemToLaTeX notes) lst) ++ "\\end{enumerate}\n"
+blockToLaTeX notes HorizontalRule = "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n"
+blockToLaTeX notes (Header level lst) =
+ if (level > 0) && (level <= 3) then
+ "\\" ++ (concat (replicate (level - 1) "sub")) ++ "section{" ++
+ (inlineListToLaTeX notes (deVerb lst)) ++ "}\n\n"
+ else
+ (inlineListToLaTeX notes lst) ++ "\n\n"
+listItemToLaTeX notes list = "\\item " ++ (concatMap (blockToLaTeX notes) list)
+
+-- | Convert list of inline elements to LaTeX.
+inlineListToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
+ -> [Inline] -- ^ Inlines to convert
+ -> String
+inlineListToLaTeX notes lst =
+ -- first, consolidate Str and Space for more effective smartquotes:
+ let lst' = consolidateList lst in
+ concatMap (inlineToLaTeX notes) lst'
+
+-- | Convert inline element to LaTeX
+inlineToLaTeX :: [Block] -- ^ List of note blocks to use in resolving note refs
+ -> Inline -- ^ Inline to convert
+ -> String
+inlineToLaTeX notes (Emph lst) = "\\emph{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}"
+inlineToLaTeX notes (Strong lst) = "\\textbf{" ++ (inlineListToLaTeX notes (deVerb lst)) ++ "}"
+inlineToLaTeX notes (Code str) = "\\verb" ++ [chr] ++ stuffing ++ [chr]
+ where stuffing = str
+ chr = ((enumFromTo '!' '~') \\ stuffing) !! 0
+inlineToLaTeX notes (Str str) = stringToLaTeX str
+inlineToLaTeX notes (TeX str) = str
+inlineToLaTeX notes (HtmlInline str) = ""
+inlineToLaTeX notes (LineBreak) = "\\\\\n"
+inlineToLaTeX notes Space = " "
+inlineToLaTeX notes (Link text (Src src tit)) =
+ "\\href{" ++ src ++ "}{" ++ (inlineListToLaTeX notes (deVerb text)) ++ "}"
+inlineToLaTeX notes (Link text (Ref [])) = "[" ++ (inlineListToLaTeX notes text) ++ "]"
+inlineToLaTeX notes (Link text (Ref ref)) = "[" ++ (inlineListToLaTeX notes text) ++ "][" ++
+ (inlineListToLaTeX notes ref) ++ "]" -- this is what markdown does, for better or worse
+inlineToLaTeX notes (Image alternate (Src source tit)) = "\\includegraphics{" ++ source ++ "}"
+inlineToLaTeX notes (Image alternate (Ref [])) =
+ "![" ++ (inlineListToLaTeX notes alternate) ++ "]"
+inlineToLaTeX notes (Image alternate (Ref ref)) =
+ "![" ++ (inlineListToLaTeX notes alternate) ++ "][" ++ (inlineListToLaTeX notes ref) ++ "]"
+inlineToLaTeX [] (NoteRef ref) = ""
+inlineToLaTeX ((Note firstref firstblocks):rest) (NoteRef ref) =
+ if (firstref == ref) then
+ "\\footnote{" ++ (stripTrailingNewlines (concatMap (blockToLaTeX rest) firstblocks)) ++ "}"
+ else
+ inlineToLaTeX rest (NoteRef ref)
+
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
new file mode 100644
index 000000000..b73090f62
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -0,0 +1,149 @@
+-- | Converts Pandoc to Markdown.
+module Text.Pandoc.Writers.Markdown (
+ writeMarkdown
+ ) where
+import Text.Regex ( matchRegex, mkRegex )
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+-- | Convert Pandoc to Markdown.
+writeMarkdown :: WriterOptions -> Pandoc -> String
+writeMarkdown options (Pandoc meta blocks) =
+ let body = text (writerIncludeBefore options) <>
+ vcat (map (blockToMarkdown (writerTabStop options)) (formatKeys blocks)) $$
+ text (writerIncludeAfter options) in
+ let head = if (writerStandalone options) then
+ ((metaToMarkdown meta) $$ text (writerHeader options))
+ else
+ empty in
+ render $ head <> body
+
+-- | Escape special characters for Markdown.
+escapeString :: String -> String
+escapeString = backslashEscape "`<\\*_^"
+
+-- | Escape embedded \" in link title.
+escapeLinkTitle :: String -> String
+escapeLinkTitle = gsub "\"" "\\\\\""
+
+-- | Take list of inline elements and return wrapped doc.
+wrappedMarkdown :: [Inline] -> Doc
+wrappedMarkdown lst = fsep $ map (fcat . (map inlineToMarkdown)) (splitBySpace lst)
+
+-- | Insert Blank block between key and non-key
+formatKeys :: [Block] -> [Block]
+formatKeys [] = []
+formatKeys [x] = [x]
+formatKeys ((Key x1 y1):(Key x2 y2):rest) = (Key x1 y1):(formatKeys ((Key x2 y2):rest))
+formatKeys ((Key x1 y1):rest) = (Key x1 y1):Blank:(formatKeys rest)
+formatKeys (x:(Key x1 y1):rest) = x:Blank:(formatKeys ((Key x1 y1):rest))
+formatKeys (x:rest) = x:(formatKeys rest)
+
+-- | Convert bibliographic information into Markdown header.
+metaToMarkdown :: Meta -> Doc
+metaToMarkdown (Meta [] [] "") = empty
+metaToMarkdown (Meta title [] "") = (titleToMarkdown title) <> (text "\n")
+metaToMarkdown (Meta title authors "") =
+ (titleToMarkdown title) <> (text "\n") <> (authorsToMarkdown authors) <> (text "\n")
+metaToMarkdown (Meta title authors date) =
+ (titleToMarkdown title) <> (text "\n") <> (authorsToMarkdown authors) <>
+ (text "\n") <> (dateToMarkdown date) <> (text "\n")
+
+titleToMarkdown :: [Inline] -> Doc
+titleToMarkdown lst = text "% " <> (inlineListToMarkdown lst)
+
+authorsToMarkdown :: [String] -> Doc
+authorsToMarkdown lst = text "% " <> text (joinWithSep ", " (map escapeString lst))
+
+dateToMarkdown :: String -> Doc
+dateToMarkdown str = text "% " <> text (escapeString str)
+
+-- | Convert Pandoc block element to markdown.
+blockToMarkdown :: Int -- ^ Tab stop
+ -> Block -- ^ Block element
+ -> Doc
+blockToMarkdown tabStop Blank = text ""
+blockToMarkdown tabStop Null = empty
+blockToMarkdown tabStop (Plain lst) = wrappedMarkdown lst
+blockToMarkdown tabStop (Para lst) = (wrappedMarkdown lst) <> (text "\n")
+blockToMarkdown tabStop (BlockQuote lst) =
+ (vcat $ map (\line -> (text "> ") <> (text line)) $ lines $ render $ vcat $
+ map (blockToMarkdown tabStop) lst) <> (text "\n")
+blockToMarkdown tabStop (Note ref lst) =
+ let lns = lines $ render $ vcat $ map (blockToMarkdown tabStop) lst in
+ if null lns then
+ empty
+ else
+ let first = head lns
+ rest = tail lns in
+ text ("^(" ++ (escapeString ref) ++ ") ") <> (text first) $$ (vcat $
+ map (\line -> (text "^ ") <> (text line)) rest) <> (text "\n")
+blockToMarkdown tabStop (Key txt (Src src tit)) =
+ text " " <> char '[' <> inlineListToMarkdown txt <> char ']' <> text ": " <> text src <>
+ (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty)
+blockToMarkdown tabStop (CodeBlock str) = (nest tabStop $ vcat $ map text (lines str)) <>
+ (if (endsWith '\n' str) then empty else text "\n") <> text "\n"
+blockToMarkdown tabStop (RawHtml str) = text str
+blockToMarkdown tabStop (BulletList lst) =
+ vcat (map (bulletListItemToMarkdown tabStop) lst) <> text "\n"
+blockToMarkdown tabStop (OrderedList lst) =
+ vcat (zipWith (orderedListItemToMarkdown tabStop) (enumFromTo 1 (length lst)) lst) <>
+ text "\n"
+blockToMarkdown tabStop HorizontalRule = text "\n* * * * *\n"
+blockToMarkdown tabStop (Header level lst) =
+ text ((replicate level '#') ++ " ") <> (inlineListToMarkdown lst) <> (text "\n")
+bulletListItemToMarkdown tabStop list =
+ hang (text "- ") tabStop (vcat (map (blockToMarkdown tabStop) list))
+
+-- | Convert ordered list item (a list of blocks) to markdown.
+orderedListItemToMarkdown :: Int -- ^ tab stop
+ -> Int -- ^ ordinal number of list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> Doc
+orderedListItemToMarkdown tabStop num list =
+ hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat (map (blockToMarkdown tabStop) list))
+ where spacer = if (num < 10) then " " else ""
+
+-- | Convert list of Pandoc inline elements to markdown.
+inlineListToMarkdown :: [Inline] -> Doc
+inlineListToMarkdown lst = hcat $ map inlineToMarkdown lst
+
+-- | Convert Pandoc inline element to markdown.
+inlineToMarkdown :: Inline -> Doc
+inlineToMarkdown (Emph lst) = text "*" <> (inlineListToMarkdown lst) <> text "*"
+inlineToMarkdown (Strong lst) = text "**" <> (inlineListToMarkdown lst) <> text "**"
+inlineToMarkdown (Code str) =
+ case (matchRegex (mkRegex "``") str) of
+ Just match -> text ("` " ++ str ++ " `")
+ Nothing -> case (matchRegex (mkRegex "`") str) of
+ Just match -> text ("`` " ++ str ++ " ``")
+ Nothing -> text ("`" ++ str ++ "`")
+inlineToMarkdown (Str str) = text $ escapeString str
+inlineToMarkdown (TeX str) = text str
+inlineToMarkdown (HtmlInline str) = text str
+inlineToMarkdown (LineBreak) = text " \n"
+inlineToMarkdown Space = char ' '
+inlineToMarkdown (Link txt (Src src tit)) =
+ let linktext = if (null txt) || (txt == [Str ""]) then
+ text "link"
+ else
+ inlineListToMarkdown txt in
+ char '[' <> linktext <> char ']' <> char '(' <> text src <>
+ (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) <> char ')'
+inlineToMarkdown (Link txt (Ref [])) = char '[' <> inlineListToMarkdown txt <> text "][]"
+inlineToMarkdown (Link txt (Ref ref)) = char '[' <> inlineListToMarkdown txt <> char ']' <>
+ char '[' <> inlineListToMarkdown ref <> char ']'
+inlineToMarkdown (Image alternate (Src source tit)) =
+ let alt = if (null alternate) || (alternate == [Str ""]) then
+ text "image"
+ else
+ inlineListToMarkdown alternate in
+ char '!' <> char '[' <> alt <> char ']' <> char '(' <> text source <>
+ (if tit /= "" then (text (" \"" ++ (escapeLinkTitle tit) ++ "\"")) else empty) <> char ')'
+inlineToMarkdown (Image alternate (Ref [])) =
+ char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']'
+inlineToMarkdown (Image alternate (Ref ref)) =
+ char '!' <> char '[' <> inlineListToMarkdown alternate <> char ']' <>
+ char '[' <> inlineListToMarkdown ref <> char ']'
+inlineToMarkdown (NoteRef ref) = char '^' <> char '(' <> text (escapeString ref) <> char ')'
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
new file mode 100644
index 000000000..37d895336
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -0,0 +1,188 @@
+-- | Converts Pandoc to reStructuredText.
+module Text.Pandoc.Writers.RST (
+ writeRST
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import List ( nubBy )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+-- | Convert Pandoc to reStructuredText.
+writeRST :: WriterOptions -> Pandoc -> String
+writeRST options (Pandoc meta blocks) =
+ let (main, refs) = unzip $ map (blockToRST (writerTabStop options))
+ (reformatBlocks $ replaceReferenceLinks blocks)
+ top = if (writerStandalone options) then
+ (metaToRST meta) $$ text (writerHeader options)
+ else
+ empty in
+ let refs' = nubBy (\x y -> (render x) == (render y)) refs in -- remove duplicate keys
+ let body = text (writerIncludeBefore options) <>
+ vcat main $$ text (writerIncludeAfter options) in
+ render $ top <> body $$ vcat refs'
+
+-- | Escape special RST characters.
+escapeString :: String -> String
+escapeString = backslashEscape "`\\|*_"
+
+-- | Convert list of inline elements into one 'Doc' of wrapped text and another
+-- containing references.
+wrappedRST :: [Inline] -> (Doc, Doc)
+wrappedRST lst =
+ let words = splitBySpace lst in
+ (fsep $ map (fcat . (map (fst . inlineToRST))) words, vcat (map (snd . inlineToRST) lst))
+
+-- | Remove reference keys, and make sure there are blanks before each list.
+reformatBlocks :: [Block] -> [Block]
+reformatBlocks [] = []
+reformatBlocks ((Plain x):(OrderedList y):rest) =
+ (Para x):(reformatBlocks ((OrderedList y):rest))
+reformatBlocks ((Plain x):(BulletList y):rest) = (Para x):(reformatBlocks ((BulletList y):rest))
+reformatBlocks ((OrderedList x):rest) =
+ (OrderedList (map reformatBlocks x)):(reformatBlocks rest)
+reformatBlocks ((BulletList x):rest) = (BulletList (map reformatBlocks x)):(reformatBlocks rest)
+reformatBlocks ((BlockQuote x):rest) = (BlockQuote (reformatBlocks x)):(reformatBlocks rest)
+reformatBlocks ((Note ref x):rest) = (Note ref (reformatBlocks x)):(reformatBlocks rest)
+reformatBlocks ((Key x1 y1):rest) = reformatBlocks rest
+reformatBlocks (x:rest) = x:(reformatBlocks rest)
+
+-- | Convert bibliographic information to 'Doc'.
+metaToRST :: Meta -> Doc
+metaToRST (Meta title authors date) =
+ (titleToRST title) <> (authorsToRST authors) <> (dateToRST date)
+
+-- | Convert title to 'Doc'.
+titleToRST :: [Inline] -> Doc
+titleToRST [] = empty
+titleToRST lst =
+ let title = fst $ inlineListToRST lst in
+ let titleLength = length $ render title in
+ let border = text (replicate titleLength '=') in
+ border <> char '\n' <> title <> char '\n' <> border <> text "\n\n"
+
+-- | Convert author list to 'Doc'.
+authorsToRST :: [String] -> Doc
+authorsToRST [] = empty
+authorsToRST (first:rest) = text ":Author: " <> text first <> char '\n' <> (authorsToRST rest)
+
+-- | Convert date to 'Doc'.
+dateToRST :: String -> Doc
+dateToRST [] = empty
+dateToRST str = text ":Date: " <> text (escapeString str) <> char '\n'
+
+-- | Convert Pandoc block element to a 'Doc' containing the main text and
+-- another one containing any references.
+blockToRST :: Int -- ^ tab stop
+ -> Block -- ^ block element to convert
+ -> (Doc, Doc) -- ^ first element is text, second is references for end of file
+blockToRST tabStop Blank = (text "\n", empty)
+blockToRST tabStop Null = (empty, empty)
+blockToRST tabStop (Plain lst) = wrappedRST lst
+blockToRST tabStop (Para [TeX str]) = -- raw latex block
+ let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
+ (hang (text "\n.. raw:: latex\n") 3 (vcat $ map text (lines str')), empty)
+blockToRST tabStop (Para lst) = ((fst $ wrappedRST lst) <> (text "\n"), snd $ wrappedRST lst)
+blockToRST tabStop (BlockQuote lst) =
+ let (main, refs) = unzip $ map (blockToRST tabStop) lst in
+ ((nest tabStop $ vcat $ main) <> text "\n", vcat refs)
+blockToRST tabStop (Note ref blocks) =
+ let (main, refs) = unzip $ map (blockToRST tabStop) blocks in
+ ((hang (text ".. [" <> text (escapeString ref) <> text "] ") 3 (vcat main)), vcat refs)
+blockToRST tabStop (Key txt (Src src tit)) =
+ (text "ERROR - KEY FOUND", empty) -- shouldn't have a key here
+blockToRST tabStop (CodeBlock str) =
+ (hang (text "::\n") tabStop (vcat $ map text (lines ('\n':(str ++ "\n")))), empty)
+blockToRST tabStop (RawHtml str) =
+ let str' = if (endsWith '\n' str) then (str ++ "\n") else (str ++ "\n\n") in
+ (hang (text "\n.. raw:: html\n") 3 (vcat $ map text (lines str')), empty)
+blockToRST tabStop (BulletList lst) =
+ let (main, refs) = unzip $ map (bulletListItemToRST tabStop) lst in
+ (vcat main <> text "\n", vcat refs)
+blockToRST tabStop (OrderedList lst) =
+ let (main, refs) =
+ unzip $ zipWith (orderedListItemToRST tabStop) (enumFromTo 1 (length lst)) lst in
+ (vcat main <> text "\n", vcat refs)
+blockToRST tabStop HorizontalRule = (text "--------------\n", empty)
+blockToRST tabStop (Header level lst) =
+ let (headerText, refs) = inlineListToRST lst in
+ let headerLength = length $ render headerText in
+ let headerChar = if (level > 5) then ' ' else "=-~^'" !! (level - 1) in
+ let border = text $ replicate headerLength headerChar in
+ (headerText <> char '\n' <> border <> char '\n', refs)
+
+-- | Convert bullet list item (list of blocks) to reStructuredText.
+-- Returns a pair of 'Doc', the first the main text, the second references
+bulletListItemToRST :: Int -- ^ tab stop
+ -> [Block] -- ^ list item (list of blocks)
+ -> (Doc, Doc)
+bulletListItemToRST tabStop list =
+ let (main, refs) = unzip $ map (blockToRST tabStop) list in
+ (hang (text "- ") tabStop (vcat main), (vcat refs))
+
+-- | Convert an ordered list item (list of blocks) to reStructuredText.
+-- Returns a pair of 'Doc', the first the main text, the second references
+orderedListItemToRST :: Int -- ^ tab stop
+ -> Int -- ^ ordinal number of list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> (Doc, Doc)
+orderedListItemToRST tabStop num list =
+ let (main, refs) = unzip $ map (blockToRST tabStop) list
+ spacer = if (length (show num) < 2) then " " else "" in
+ (hang (text ((show num) ++ "." ++ spacer)) tabStop (vcat main), (vcat refs))
+
+-- | Convert a list of inline elements to reStructuredText.
+-- Returns a pair of 'Doc', the first the main text, the second references.
+inlineListToRST :: [Inline] -> (Doc, Doc)
+inlineListToRST lst = let (main, refs) = unzip $ map inlineToRST lst in
+ (hcat main, hcat refs)
+
+-- | Convert an inline element to reStructuredText.
+-- Returns a pair of 'Doc', the first the main text, the second references.
+inlineToRST :: Inline -> (Doc, Doc) -- second Doc is list of refs for end of file
+inlineToRST (Emph lst) = let (main, refs) = inlineListToRST lst in
+ (text "*" <> main <> text "*", refs)
+inlineToRST (Strong lst) = let (main, refs) = inlineListToRST lst in
+ (text "**" <> main <> text "**", refs)
+inlineToRST (Code str) = (text $ "``" ++ str ++ "``", empty)
+inlineToRST (Str str) = (text $ escapeString str, empty)
+inlineToRST (TeX str) = (text str, empty)
+inlineToRST (HtmlInline str) = (empty, empty)
+inlineToRST (LineBreak) = inlineToRST Space -- RST doesn't have line breaks
+inlineToRST Space = (char ' ', empty)
+--
+-- Note: can assume reference links have been replaced where possible with explicit links.
+--
+inlineToRST (Link txt (Src src tit)) =
+ let (linktext, ref') = if (null txt) || (txt == [Str ""]) then
+ (text "link", empty)
+ else
+ inlineListToRST $ normalizeSpaces txt in
+ let link = char '`' <> linktext <> text "`_"
+ linktext' = render linktext in
+ let linktext'' = if (':' `elem` linktext') then "`" ++ linktext' ++ "`" else linktext' in
+ let ref = text ".. _" <> text linktext'' <> text ": " <> text src in
+ (link, ref' $$ ref)
+inlineToRST (Link txt (Ref [])) =
+ let (linktext, refs) = inlineListToRST txt in
+ (char '[' <> linktext <> char ']', refs)
+inlineToRST (Link txt (Ref ref)) =
+ let (linktext, refs1) = inlineListToRST txt
+ (reftext, refs2) = inlineListToRST ref in
+ (char '[' <> linktext <> text "][" <> reftext <> char ']', refs1 $$ refs2)
+inlineToRST (Image alternate (Src source tit)) =
+ let (alt, ref') = if (null alternate) || (alternate == [Str ""]) then
+ (text "image", empty)
+ else
+ inlineListToRST $ normalizeSpaces alternate in
+ let link = char '|' <> alt <> char '|' in
+ let ref = text ".. " <> link <> text " image:: " <> text source in
+ (link, ref' $$ ref)
+inlineToRST (Image alternate (Ref [])) =
+ let (alttext, refs) = inlineListToRST alternate in
+ (char '|' <> alttext <> char '|', refs)
+-- The following case won't normally occur...
+inlineToRST (Image alternate (Ref ref)) =
+ let (alttext, refs1) = inlineListToRST alternate
+ (reftext, refs2) = inlineListToRST ref in
+ (char '|' <> alttext <> char '|', refs1 $$ refs2)
+inlineToRST (NoteRef ref) = (text " [" <> text (escapeString ref) <> char ']' <> char '_', empty)
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
new file mode 100644
index 000000000..64f17cc74
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -0,0 +1,194 @@
+-- | Convert Pandoc to rich text format.
+module Text.Pandoc.Writers.RTF (
+ writeRTF
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import List ( isSuffixOf )
+import Char ( ord, chr )
+
+-- | Convert Pandoc to a string in rich text format.
+writeRTF :: WriterOptions -> Pandoc -> String
+writeRTF options (Pandoc meta blocks) =
+ let notes = filter isNoteBlock blocks in -- assumes all notes are at outer level
+ let head = if writerStandalone options then
+ rtfHeader notes (writerHeader options) meta
+ else
+ ""
+ foot = if writerStandalone options then "\n}\n" else ""
+ body = (writerIncludeBefore options) ++
+ (concatMap (blockToRTF notes 0) (replaceReferenceLinks blocks)) ++
+ (writerIncludeAfter options) in
+ head ++ body ++ foot
+
+-- | Convert unicode characters (> 127) into rich text format representation.
+handleUnicode :: String -> String
+handleUnicode [] = []
+handleUnicode (c:cs) = if (ord c) > 127 then
+ '\\':'u':(show (ord c)) ++ "?" ++ (handleUnicode cs)
+ else
+ c:(handleUnicode cs)
+
+escapeSpecial = backslashEscape "{\\}"
+escapeTab = gsub "\\\\t" "\\\\tab "
+
+-- | Escape strings as needed for rich text format.
+stringToRTF :: String -> String
+stringToRTF = handleUnicode . escapeSpecial . escapeTab
+
+-- | Escape raw LaTeX strings for RTF. Don't escape \t; it might
+-- be the first letter of a command!
+latexStringToRTF :: String -> String
+latexStringToRTF = handleUnicode . escapeSpecial
+
+-- | Escape things as needed for code block in RTF.
+codeStringToRTF :: String -> String
+codeStringToRTF str = joinWithSep "\\line\n" (lines (stringToRTF str))
+
+-- | Deal with raw LaTeX.
+latexToRTF :: String -> String
+latexToRTF str = "{\\cf1 " ++ (latexStringToRTF str) ++ "\\cf0 } "
+
+-- | Make a paragraph with first-line indent, block indent, and space after.
+rtfParSpaced :: Int -- ^ space after (in twips)
+ -> Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> String -- ^ string with content
+ -> String
+rtfParSpaced spaceAfter indent firstLineIndent content =
+ "{\\pard \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
+ " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
+
+-- | Default paragraph.
+rtfPar :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> String -- ^ string with content
+ -> String
+rtfPar = rtfParSpaced 180
+
+-- | Compact paragraph (e.g. for compact list items).
+rtfCompact :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> String -- ^ string with content
+ -> String
+rtfCompact = rtfParSpaced 0
+
+-- number of twips to indent
+indentIncrement = 720
+listIncrement = 360
+
+-- | Returns appropriate bullet list marker for indent level.
+bulletMarker :: Int -> String
+bulletMarker indent = case (indent `mod` 720) of
+ 0 -> "\\bullet "
+ otherwise -> "\\endash "
+
+-- | Returns appropriate (list of) ordered list markers for indent level.
+orderedMarkers :: Int -> [String]
+orderedMarkers indent = case (indent `mod` 720) of
+ 0 -> map (\x -> show x ++ ".") [1..]
+ otherwise -> map (\x -> show x ++ ".") $ cycle ['a'..'z']
+
+-- | Returns RTF header.
+rtfHeader :: [Block] -- ^ list of note blocks
+ -> String -- ^ header text
+ -> Meta -- ^ bibliographic information
+ -> String
+rtfHeader notes headerText (Meta title authors date) =
+ let titletext = if null title then
+ ""
+ else
+ rtfPar 0 0 ("\\qc \\b \\fs36 " ++ inlineListToRTF notes title)
+ authorstext = if null authors then
+ ""
+ else
+ rtfPar 0 0 ("\\qc " ++ (joinWithSep "\\" (map stringToRTF authors)))
+ datetext = if date == "" then "" else rtfPar 0 0 ("\\qc " ++ stringToRTF date) in
+ let spacer = if null (titletext ++ authorstext ++ datetext) then "" else rtfPar 0 0 "" in
+ headerText ++ titletext ++ authorstext ++ datetext ++ spacer
+
+-- | Convert Pandoc block element to RTF.
+blockToRTF :: [Block] -- ^ list of note blocks
+ -> Int -- ^ indent level
+ -> Block -- ^ block to convert
+ -> String
+blockToRTF notes indent Blank = rtfPar indent 0 ""
+blockToRTF notes indent Null = ""
+blockToRTF notes indent (Plain lst) = rtfCompact indent 0 (inlineListToRTF notes lst)
+blockToRTF notes indent (Para lst) = rtfPar indent 0 (inlineListToRTF notes lst)
+blockToRTF notes indent (BlockQuote lst) =
+ concatMap (blockToRTF notes (indent + indentIncrement)) lst
+blockToRTF notes indent (Note ref lst) = "" -- there shouldn't be any after filtering
+blockToRTF notes indent (Key _ _) = ""
+blockToRTF notes indent (CodeBlock str) = rtfPar indent 0 ("\\f1 " ++ (codeStringToRTF str))
+blockToRTF notes indent (RawHtml str) = ""
+blockToRTF notes indent (BulletList lst) =
+ spaceAtEnd $ concatMap (listItemToRTF notes indent (bulletMarker indent)) lst
+blockToRTF notes indent (OrderedList lst) =
+ spaceAtEnd $ concat $ zipWith (listItemToRTF notes indent) (orderedMarkers indent) lst
+blockToRTF notes indent HorizontalRule =
+ rtfPar indent 0 "\\qc \\emdash\\emdash\\emdash\\emdash\\emdash"
+blockToRTF notes indent (Header level lst) =
+ rtfPar indent 0 ("\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++
+ (inlineListToRTF notes lst))
+
+-- | Ensure that there's the same amount of space after compact
+-- lists as after regular lists.
+spaceAtEnd :: String -> String
+spaceAtEnd str =
+ if isSuffixOf "\\par}\n" str then
+ (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
+ else
+ str
+
+-- | Convert list item (list of blocks) to RTF.
+listItemToRTF :: [Block] -- ^ list of note blocks
+ -> Int -- ^ indent level
+ -> String -- ^ list start marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> [Char]
+listItemToRTF notes indent marker [] =
+ rtfCompact (indent + listIncrement) (0 - listIncrement)
+ (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
+listItemToRTF notes indent marker list =
+ let (first:rest) = map (blockToRTF notes (indent + listIncrement)) list in
+ let modFirst = gsub "\\\\fi-?[0-9]+" ("\\\\fi" ++ (show (0 - listIncrement)) ++
+ " " ++ marker ++ "\\\\tx" ++ (show listIncrement) ++ "\\\\tab") first in
+ modFirst ++ (concat rest)
+
+-- | Convert list of inline items to RTF.
+inlineListToRTF :: [Block] -- ^ list of note blocks
+ -> [Inline] -- ^ list of inlines to convert
+ -> String
+inlineListToRTF notes lst = concatMap (inlineToRTF notes) lst
+
+-- | Convert inline item to RTF.
+inlineToRTF :: [Block] -- ^ list of note blocks
+ -> Inline -- ^ inline to convert
+ -> String
+inlineToRTF notes (Emph lst) = "{\\i " ++ (inlineListToRTF notes lst) ++ "} "
+inlineToRTF notes (Strong lst) = "{\\b " ++ (inlineListToRTF notes lst) ++ "} "
+inlineToRTF notes (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} "
+inlineToRTF notes (Str str) = stringToRTF str
+inlineToRTF notes (TeX str) = latexToRTF str
+inlineToRTF notes (HtmlInline str) = ""
+inlineToRTF notes (LineBreak) = "\\line "
+inlineToRTF notes Space = " "
+inlineToRTF notes (Link text (Src src tit)) =
+ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ "\"}}{\\fldrslt{\\ul\n"
+ ++ (inlineListToRTF notes text) ++ "\n}}}\n"
+inlineToRTF notes (Link text (Ref [])) = "[" ++ (inlineListToRTF notes text) ++ "]"
+inlineToRTF notes (Link text (Ref ref)) = "[" ++ (inlineListToRTF notes text) ++ "][" ++
+ (inlineListToRTF notes ref) ++ "]" -- this is what markdown does, for better or worse
+inlineToRTF notes (Image alternate (Src source tit)) = "{\\cf1 [image: " ++ source ++ "]\\cf0}"
+inlineToRTF notes (Image alternate (Ref [])) = "![" ++ (inlineListToRTF notes alternate) ++ "]"
+inlineToRTF notes (Image alternate (Ref ref)) = "![" ++ (inlineListToRTF notes alternate) ++
+ "][" ++ (inlineListToRTF notes ref) ++ "]"
+inlineToRTF [] (NoteRef ref) = ""
+inlineToRTF ((Note firstref firstblocks):rest) (NoteRef ref) =
+ if firstref == ref then
+ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
+ (concatMap (blockToRTF rest 0) firstblocks) ++ "}"
+ else
+ inlineToRTF rest (NoteRef ref)
+
diff --git a/src/Text/Pandoc/Writers/S5.hs b/src/Text/Pandoc/Writers/S5.hs
new file mode 100644
index 000000000..2d0b913a3
--- /dev/null
+++ b/src/Text/Pandoc/Writers/S5.hs
@@ -0,0 +1,95 @@
+----------------------------------------------------
+-- Do not edit this file by hand. Edit
+-- 'templates/S5.hs'
+-- and run ./fillTemplates.pl Text/Pandoc/Writers/S5.hs
+----------------------------------------------------
+
+-- | Definitions for creation of S5 powerpoint-like HTML.
+-- (See <http://meyerweb.com/eric/tools/s5/>.)
+module Text.Pandoc.Writers.S5 (
+ -- * Strings
+ s5Javascript,
+ s5CSS,
+ s5Links,
+ -- * Functions
+ writeS5,
+ insertS5Structure
+ ) where
+import Text.Pandoc.Shared ( joinWithSep, WriterOptions )
+import Text.Pandoc.Writers.HTML ( writeHtml )
+import Text.Pandoc.Definition
+
+s5Javascript :: String
+s5Javascript = "<script type=\"text/javascript\">\n// S5 v1.1 slides.js -- released into the Public Domain\n//\n// Please see http://www.meyerweb.com/eric/tools/s5/credits.html for information \n// about all the wonderful and talented contributors to this code!\n\nvar undef;\nvar slideCSS = '';\nvar snum = 0;\nvar smax = 1;\nvar incpos = 0;\nvar number = undef;\nvar s5mode = true;\nvar defaultView = 'slideshow';\nvar controlVis = 'visible';\n\nvar isIE = navigator.appName == 'Microsoft Internet Explorer' && navigator.userAgent.indexOf('Opera') < 1 ? 1 : 0;\nvar isOp = navigator.userAgent.indexOf('Opera') > -1 ? 1 : 0;\nvar isGe = navigator.userAgent.indexOf('Gecko') > -1 && navigator.userAgent.indexOf('Safari') < 1 ? 1 : 0;\n\nfunction hasClass(object, className) {\n\tif (!object.className) return false;\n\treturn (object.className.search('(^|\\\\s)' + className + '(\\\\s|$)') != -1);\n}\n\nfunction hasValue(object, value) {\n\tif (!object) return false;\n\treturn (object.search('(^|\\\\s)' + value + '(\\\\s|$)') != -1);\n}\n\nfunction removeClass(object,className) {\n\tif (!object) return;\n\tobject.className = object.className.replace(new RegExp('(^|\\\\s)'+className+'(\\\\s|$)'), RegExp.$1+RegExp.$2);\n}\n\nfunction addClass(object,className) {\n\tif (!object || hasClass(object, className)) return;\n\tif (object.className) {\n\t\tobject.className += ' '+className;\n\t} else {\n\t\tobject.className = className;\n\t}\n}\n\nfunction GetElementsWithClassName(elementName,className) {\n\tvar allElements = document.getElementsByTagName(elementName);\n\tvar elemColl = new Array();\n\tfor (var i = 0; i< allElements.length; i++) {\n\t\tif (hasClass(allElements[i], className)) {\n\t\t\telemColl[elemColl.length] = allElements[i];\n\t\t}\n\t}\n\treturn elemColl;\n}\n\nfunction isParentOrSelf(element, id) {\n\tif (element == null || element.nodeName=='BODY') return false;\n\telse if (element.id == id) return true;\n\telse return isParentOrSelf(element.parentNode, id);\n}\n\nfunction nodeValue(node) {\n\tvar result = \"\";\n\tif (node.nodeType == 1) {\n\t\tvar children = node.childNodes;\n\t\tfor (var i = 0; i < children.length; ++i) {\n\t\t\tresult += nodeValue(children[i]);\n\t\t}\t\t\n\t}\n\telse if (node.nodeType == 3) {\n\t\tresult = node.nodeValue;\n\t}\n\treturn(result);\n}\n\nfunction slideLabel() {\n\tvar slideColl = GetElementsWithClassName('*','slide');\n\tvar list = document.getElementById('jumplist');\n\tsmax = slideColl.length;\n\tfor (var n = 0; n < smax; n++) {\n\t\tvar obj = slideColl[n];\n\n\t\tvar did = 'slide' + n.toString();\n\t\tobj.setAttribute('id',did);\n\t\tif (isOp) continue;\n\n\t\tvar otext = '';\n\t\tvar menu = obj.firstChild;\n\t\tif (!menu) continue; // to cope with empty slides\n\t\twhile (menu && menu.nodeType == 3) {\n\t\t\tmenu = menu.nextSibling;\n\t\t}\n\t \tif (!menu) continue; // to cope with slides with only text nodes\n\n\t\tvar menunodes = menu.childNodes;\n\t\tfor (var o = 0; o < menunodes.length; o++) {\n\t\t\totext += nodeValue(menunodes[o]);\n\t\t}\n\t\tlist.options[list.length] = new Option(n + ' : ' + otext, n);\n\t}\n}\n\nfunction currentSlide() {\n\tvar cs;\n\tif (document.getElementById) {\n\t\tcs = document.getElementById('currentSlide');\n\t} else {\n\t\tcs = document.currentSlide;\n\t}\n\tcs.innerHTML = '<span id=\"csHere\">' + snum + '<\\/span> ' + \n\t\t'<span id=\"csSep\">\\/<\\/span> ' + \n\t\t'<span id=\"csTotal\">' + (smax-1) + '<\\/span>';\n\tif (snum == 0) {\n\t\tcs.style.visibility = 'hidden';\n\t} else {\n\t\tcs.style.visibility = 'visible';\n\t}\n}\n\nfunction go(step) {\n\tif (document.getElementById('slideProj').disabled || step == 0) return;\n\tvar jl = document.getElementById('jumplist');\n\tvar cid = 'slide' + snum;\n\tvar ce = document.getElementById(cid);\n\tif (incrementals[snum].length > 0) {\n\t\tfor (var i = 0; i < incrementals[snum].length; i++) {\n\t\t\tremoveClass(incrementals[snum][i], 'current');\n\t\t\tremoveClass(incrementals[snum][i], 'incremental');\n\t\t}\n\t}\n\tif (step != 'j') {\n\t\tsnum += step;\n\t\tlmax = smax - 1;\n\t\tif (snum > lmax) snum = lmax;\n\t\tif (snum < 0) snum = 0;\n\t} else\n\t\tsnum = parseInt(jl.value);\n\tvar nid = 'slide' + snum;\n\tvar ne = document.getElementById(nid);\n\tif (!ne) {\n\t\tne = document.getElementById('slide0');\n\t\tsnum = 0;\n\t}\n\tif (step < 0) {incpos = incrementals[snum].length} else {incpos = 0;}\n\tif (incrementals[snum].length > 0 && incpos == 0) {\n\t\tfor (var i = 0; i < incrementals[snum].length; i++) {\n\t\t\tif (hasClass(incrementals[snum][i], 'current'))\n\t\t\t\tincpos = i + 1;\n\t\t\telse\n\t\t\t\taddClass(incrementals[snum][i], 'incremental');\n\t\t}\n\t}\n\tif (incrementals[snum].length > 0 && incpos > 0)\n\t\taddClass(incrementals[snum][incpos - 1], 'current');\n\tce.style.visibility = 'hidden';\n\tne.style.visibility = 'visible';\n\tjl.selectedIndex = snum;\n\tcurrentSlide();\n\tnumber = 0;\n}\n\nfunction goTo(target) {\n\tif (target >= smax || target == snum) return;\n\tgo(target - snum);\n}\n\nfunction subgo(step) {\n\tif (step > 0) {\n\t\tremoveClass(incrementals[snum][incpos - 1],'current');\n\t\tremoveClass(incrementals[snum][incpos], 'incremental');\n\t\taddClass(incrementals[snum][incpos],'current');\n\t\tincpos++;\n\t} else {\n\t\tincpos--;\n\t\tremoveClass(incrementals[snum][incpos],'current');\n\t\taddClass(incrementals[snum][incpos], 'incremental');\n\t\taddClass(incrementals[snum][incpos - 1],'current');\n\t}\n}\n\nfunction toggle() {\n\tvar slideColl = GetElementsWithClassName('*','slide');\n\tvar slides = document.getElementById('slideProj');\n\tvar outline = document.getElementById('outlineStyle');\n\tif (!slides.disabled) {\n\t\tslides.disabled = true;\n\t\toutline.disabled = false;\n\t\ts5mode = false;\n\t\tfontSize('1em');\n\t\tfor (var n = 0; n < smax; n++) {\n\t\t\tvar slide = slideColl[n];\n\t\t\tslide.style.visibility = 'visible';\n\t\t}\n\t} else {\n\t\tslides.disabled = false;\n\t\toutline.disabled = true;\n\t\ts5mode = true;\n\t\tfontScale();\n\t\tfor (var n = 0; n < smax; n++) {\n\t\t\tvar slide = slideColl[n];\n\t\t\tslide.style.visibility = 'hidden';\n\t\t}\n\t\tslideColl[snum].style.visibility = 'visible';\n\t}\n}\n\nfunction showHide(action) {\n\tvar obj = GetElementsWithClassName('*','hideme')[0];\n\tswitch (action) {\n\tcase 's': obj.style.visibility = 'visible'; break;\n\tcase 'h': obj.style.visibility = 'hidden'; break;\n\tcase 'k':\n\t\tif (obj.style.visibility != 'visible') {\n\t\t\tobj.style.visibility = 'visible';\n\t\t} else {\n\t\t\tobj.style.visibility = 'hidden';\n\t\t}\n\tbreak;\n\t}\n}\n\n// 'keys' code adapted from MozPoint (http://mozpoint.mozdev.org/)\nfunction keys(key) {\n\tif (!key) {\n\t\tkey = event;\n\t\tkey.which = key.keyCode;\n\t}\n\tif (key.which == 84) {\n\t\ttoggle();\n\t\treturn;\n\t}\n\tif (s5mode) {\n\t\tswitch (key.which) {\n\t\t\tcase 10: // return\n\t\t\tcase 13: // enter\n\t\t\t\tif (window.event && isParentOrSelf(window.event.srcElement, 'controls')) return;\n\t\t\t\tif (key.target && isParentOrSelf(key.target, 'controls')) return;\n\t\t\t\tif(number != undef) {\n\t\t\t\t\tgoTo(number);\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\t\t\tcase 32: // spacebar\n\t\t\tcase 34: // page down\n\t\t\tcase 39: // rightkey\n\t\t\tcase 40: // downkey\n\t\t\t\tif(number != undef) {\n\t\t\t\t\tgo(number);\n\t\t\t\t} else if (!incrementals[snum] || incpos >= incrementals[snum].length) {\n\t\t\t\t\tgo(1);\n\t\t\t\t} else {\n\t\t\t\t\tsubgo(1);\n\t\t\t\t}\n\t\t\t\tbreak;\n\t\t\tcase 33: // page up\n\t\t\tcase 37: // leftkey\n\t\t\tcase 38: // upkey\n\t\t\t\tif(number != undef) {\n\t\t\t\t\tgo(-1 * number);\n\t\t\t\t} else if (!incrementals[snum] || incpos <= 0) {\n\t\t\t\t\tgo(-1);\n\t\t\t\t} else {\n\t\t\t\t\tsubgo(-1);\n\t\t\t\t}\n\t\t\t\tbreak;\n\t\t\tcase 36: // home\n\t\t\t\tgoTo(0);\n\t\t\t\tbreak;\n\t\t\tcase 35: // end\n\t\t\t\tgoTo(smax-1);\n\t\t\t\tbreak;\n\t\t\tcase 67: // c\n\t\t\t\tshowHide('k');\n\t\t\t\tbreak;\n\t\t}\n\t\tif (key.which < 48 || key.which > 57) {\n\t\t\tnumber = undef;\n\t\t} else {\n\t\t\tif (window.event && isParentOrSelf(window.event.srcElement, 'controls')) return;\n\t\t\tif (key.target && isParentOrSelf(key.target, 'controls')) return;\n\t\t\tnumber = (((number != undef) ? number : 0) * 10) + (key.which - 48);\n\t\t}\n\t}\n\treturn false;\n}\n\nfunction clicker(e) {\n\tnumber = undef;\n\tvar target;\n\tif (window.event) {\n\t\ttarget = window.event.srcElement;\n\t\te = window.event;\n\t} else target = e.target;\n\tif (target.getAttribute('href') != null || hasValue(target.rel, 'external') || isParentOrSelf(target, 'controls') || isParentOrSelf(target,'embed') || isParentOrSelf(target,'object')) return true;\n\tif (!e.which || e.which == 1) {\n\t\tif (!incrementals[snum] || incpos >= incrementals[snum].length) {\n\t\t\tgo(1);\n\t\t} else {\n\t\t\tsubgo(1);\n\t\t}\n\t}\n}\n\nfunction findSlide(hash) {\n\tvar target = null;\n\tvar slides = GetElementsWithClassName('*','slide');\n\tfor (var i = 0; i < slides.length; i++) {\n\t\tvar targetSlide = slides[i];\n\t\tif ( (targetSlide.name && targetSlide.name == hash)\n\t\t || (targetSlide.id && targetSlide.id == hash) ) {\n\t\t\ttarget = targetSlide;\n\t\t\tbreak;\n\t\t}\n\t}\n\twhile(target != null && target.nodeName != 'BODY') {\n\t\tif (hasClass(target, 'slide')) {\n\t\t\treturn parseInt(target.id.slice(5));\n\t\t}\n\t\ttarget = target.parentNode;\n\t}\n\treturn null;\n}\n\nfunction slideJump() {\n\tif (window.location.hash == null) return;\n\tvar sregex = /^#slide(\\d+)$/;\n\tvar matches = sregex.exec(window.location.hash);\n\tvar dest = null;\n\tif (matches != null) {\n\t\tdest = parseInt(matches[1]);\n\t} else {\n\t\tdest = findSlide(window.location.hash.slice(1));\n\t}\n\tif (dest != null)\n\t\tgo(dest - snum);\n}\n\nfunction fixLinks() {\n\tvar thisUri = window.location.href;\n\tthisUri = thisUri.slice(0, thisUri.length - window.location.hash.length);\n\tvar aelements = document.getElementsByTagName('A');\n\tfor (var i = 0; i < aelements.length; i++) {\n\t\tvar a = aelements[i].href;\n\t\tvar slideID = a.match('\\#slide[0-9]{1,2}');\n\t\tif ((slideID) && (slideID[0].slice(0,1) == '#')) {\n\t\t\tvar dest = findSlide(slideID[0].slice(1));\n\t\t\tif (dest != null) {\n\t\t\t\tif (aelements[i].addEventListener) {\n\t\t\t\t\taelements[i].addEventListener(\"click\", new Function(\"e\",\n\t\t\t\t\t\t\"if (document.getElementById('slideProj').disabled) return;\" +\n\t\t\t\t\t\t\"go(\"+dest+\" - snum); \" +\n\t\t\t\t\t\t\"if (e.preventDefault) e.preventDefault();\"), true);\n\t\t\t\t} else if (aelements[i].attachEvent) {\n\t\t\t\t\taelements[i].attachEvent(\"onclick\", new Function(\"\",\n\t\t\t\t\t\t\"if (document.getElementById('slideProj').disabled) return;\" +\n\t\t\t\t\t\t\"go(\"+dest+\" - snum); \" +\n\t\t\t\t\t\t\"event.returnValue = false;\"));\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}\n\nfunction externalLinks() {\n\tif (!document.getElementsByTagName) return;\n\tvar anchors = document.getElementsByTagName('a');\n\tfor (var i=0; i<anchors.length; i++) {\n\t\tvar anchor = anchors[i];\n\t\tif (anchor.getAttribute('href') && hasValue(anchor.rel, 'external')) {\n\t\t\tanchor.target = '_blank';\n\t\t\taddClass(anchor,'external');\n\t\t}\n\t}\n}\n\nfunction createControls() {\n\tvar controlsDiv = document.getElementById(\"controls\");\n\tif (!controlsDiv) return;\n\tvar hider = ' onmouseover=\"showHide(\\'s\\');\" onmouseout=\"showHide(\\'h\\');\"';\n\tvar hideDiv, hideList = '';\n\tif (controlVis == 'hidden') {\n\t\thideDiv = hider;\n\t} else {\n\t\thideList = hider;\n\t}\n\tcontrolsDiv.innerHTML = '<form action=\"#\" id=\"controlForm\"' + hideDiv + '>' +\n\t'<div id=\"navLinks\">' +\n\t'<a accesskey=\"t\" id=\"toggle\" href=\"javascript:toggle();\">&#216;<\\/a>' +\n\t'<a accesskey=\"z\" id=\"prev\" href=\"javascript:go(-1);\">&laquo;<\\/a>' +\n\t'<a accesskey=\"x\" id=\"next\" href=\"javascript:go(1);\">&raquo;<\\/a>' +\n\t'<div id=\"navList\"' + hideList + '><select id=\"jumplist\" onchange=\"go(\\'j\\');\"><\\/select><\\/div>' +\n\t'<\\/div><\\/form>';\n\tif (controlVis == 'hidden') {\n\t\tvar hidden = document.getElementById('navLinks');\n\t} else {\n\t\tvar hidden = document.getElementById('jumplist');\n\t}\n\taddClass(hidden,'hideme');\n}\n\nfunction fontScale() { // causes layout problems in FireFox that get fixed if browser's Reload is used; same may be true of other Gecko-based browsers\n\tif (!s5mode) return false;\n\tvar vScale = 22; // both yield 32 (after rounding) at 1024x768\n\tvar hScale = 32; // perhaps should auto-calculate based on theme's declared value?\n\tif (window.innerHeight) {\n\t\tvar vSize = window.innerHeight;\n\t\tvar hSize = window.innerWidth;\n\t} else if (document.documentElement.clientHeight) {\n\t\tvar vSize = document.documentElement.clientHeight;\n\t\tvar hSize = document.documentElement.clientWidth;\n\t} else if (document.body.clientHeight) {\n\t\tvar vSize = document.body.clientHeight;\n\t\tvar hSize = document.body.clientWidth;\n\t} else {\n\t\tvar vSize = 700; // assuming 1024x768, minus chrome and such\n\t\tvar hSize = 1024; // these do not account for kiosk mode or Opera Show\n\t}\n\tvar newSize = Math.min(Math.round(vSize/vScale),Math.round(hSize/hScale));\n\tfontSize(newSize + 'px');\n\tif (isGe) { // hack to counter incremental reflow bugs\n\t\tvar obj = document.getElementsByTagName('body')[0];\n\t\tobj.style.display = 'none';\n\t\tobj.style.display = 'block';\n\t}\n}\n\nfunction fontSize(value) {\n\tif (!(s5ss = document.getElementById('s5ss'))) {\n\t\tif (!isIE) {\n\t\t\tdocument.getElementsByTagName('head')[0].appendChild(s5ss = document.createElement('style'));\n\t\t\ts5ss.setAttribute('media','screen, projection');\n\t\t\ts5ss.setAttribute('id','s5ss');\n\t\t} else {\n\t\t\tdocument.createStyleSheet();\n\t\t\tdocument.s5ss = document.styleSheets[document.styleSheets.length - 1];\n\t\t}\n\t}\n\tif (!isIE) {\n\t\twhile (s5ss.lastChild) s5ss.removeChild(s5ss.lastChild);\n\t\ts5ss.appendChild(document.createTextNode('body {font-size: ' + value + ' !important;}'));\n\t} else {\n\t\tdocument.s5ss.addRule('body','font-size: ' + value + ' !important;');\n\t}\n}\n\nfunction notOperaFix() {\n\tslideCSS = document.getElementById('slideProj').href;\n\tvar slides = document.getElementById('slideProj');\n\tvar outline = document.getElementById('outlineStyle');\n\tslides.setAttribute('media','screen');\n\toutline.disabled = true;\n\tif (isGe) {\n\t\tslides.setAttribute('href','null'); // Gecko fix\n\t\tslides.setAttribute('href',slideCSS); // Gecko fix\n\t}\n\tif (isIE && document.styleSheets && document.styleSheets[0]) {\n\t\tdocument.styleSheets[0].addRule('img', 'behavior: url(ui/default/iepngfix.htc)');\n\t\tdocument.styleSheets[0].addRule('div', 'behavior: url(ui/default/iepngfix.htc)');\n\t\tdocument.styleSheets[0].addRule('.slide', 'behavior: url(ui/default/iepngfix.htc)');\n\t}\n}\n\nfunction getIncrementals(obj) {\n\tvar incrementals = new Array();\n\tif (!obj) \n\t\treturn incrementals;\n\tvar children = obj.childNodes;\n\tfor (var i = 0; i < children.length; i++) {\n\t\tvar child = children[i];\n\t\tif (hasClass(child, 'incremental')) {\n\t\t\tif (child.nodeName == 'OL' || child.nodeName == 'UL') {\n\t\t\t\tremoveClass(child, 'incremental');\n\t\t\t\tfor (var j = 0; j < child.childNodes.length; j++) {\n\t\t\t\t\tif (child.childNodes[j].nodeType == 1) {\n\t\t\t\t\t\taddClass(child.childNodes[j], 'incremental');\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tincrementals[incrementals.length] = child;\n\t\t\t\tremoveClass(child,'incremental');\n\t\t\t}\n\t\t}\n\t\tif (hasClass(child, 'show-first')) {\n\t\t\tif (child.nodeName == 'OL' || child.nodeName == 'UL') {\n\t\t\t\tremoveClass(child, 'show-first');\n\t\t\t\tif (child.childNodes[isGe].nodeType == 1) {\n\t\t\t\t\tremoveClass(child.childNodes[isGe], 'incremental');\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tincrementals[incrementals.length] = child;\n\t\t\t}\n\t\t}\n\t\tincrementals = incrementals.concat(getIncrementals(child));\n\t}\n\treturn incrementals;\n}\n\nfunction createIncrementals() {\n\tvar incrementals = new Array();\n\tfor (var i = 0; i < smax; i++) {\n\t\tincrementals[i] = getIncrementals(document.getElementById('slide'+i));\n\t}\n\treturn incrementals;\n}\n\nfunction defaultCheck() {\n\tvar allMetas = document.getElementsByTagName('meta');\n\tfor (var i = 0; i< allMetas.length; i++) {\n\t\tif (allMetas[i].name == 'defaultView') {\n\t\t\tdefaultView = allMetas[i].content;\n\t\t}\n\t\tif (allMetas[i].name == 'controlVis') {\n\t\t\tcontrolVis = allMetas[i].content;\n\t\t}\n\t}\n}\n\n// Key trap fix, new function body for trap()\nfunction trap(e) {\n\tif (!e) {\n\t\te = event;\n\t\te.which = e.keyCode;\n\t}\n\ttry {\n\t\tmodifierKey = e.ctrlKey || e.altKey || e.metaKey;\n\t}\n\tcatch(e) {\n\t\tmodifierKey = false;\n\t}\n\treturn modifierKey || e.which == 0;\n}\n\nfunction startup() {\n\tdefaultCheck();\n\tif (!isOp) \n\t\tcreateControls();\n\tslideLabel();\n\tfixLinks();\n\texternalLinks();\n\tfontScale();\n\tif (!isOp) {\n\t\tnotOperaFix();\n\t\tincrementals = createIncrementals();\n\t\tslideJump();\n\t\tif (defaultView == 'outline') {\n\t\t\ttoggle();\n\t\t}\n\t\tdocument.onkeyup = keys;\n\t\tdocument.onkeypress = trap;\n\t\tdocument.onclick = clicker;\n\t}\n}\n\nwindow.onload = startup;\nwindow.onresize = function(){setTimeout('fontScale()', 50);}</script>\n"
+
+s5CoreCSS :: String
+s5CoreCSS = "/* Do not edit or override these styles! The system will likely break if you do. */\n\ndiv#header, div#footer, div#controls, .slide {position: absolute;}\nhtml>body div#header, html>body div#footer, \n html>body div#controls, html>body .slide {position: fixed;}\n.handout {display: none;}\n.layout {display: block;}\n.slide, .hideme, .incremental {visibility: hidden;}\n#slide0 {visibility: visible;}\n"
+
+s5FramingCSS :: String
+s5FramingCSS = "/* The following styles size, place, and layer the slide components.\n Edit these if you want to change the overall slide layout.\n The commented lines can be uncommented (and modified, if necessary) \n to help you with the rearrangement process. */\n\n/* target = 1024x768 */\n\ndiv#header, div#footer, .slide {width: 100%; top: 0; left: 0;}\ndiv#header {top: 0; height: 3em; z-index: 1;}\ndiv#footer {top: auto; bottom: 0; height: 2.5em; z-index: 5;}\n.slide {top: 0; width: 92%; padding: 3.5em 4% 4%; z-index: 2; list-style: none;}\ndiv#controls {left: 50%; bottom: 0; width: 50%; z-index: 100;}\ndiv#controls form {position: absolute; bottom: 0; right: 0; width: 100%;\n margin: 0;}\n#currentSlide {position: absolute; width: 10%; left: 45%; bottom: 1em; z-index: 10;}\nhtml>body #currentSlide {position: fixed;}\n\n/*\ndiv#header {background: #FCC;}\ndiv#footer {background: #CCF;}\ndiv#controls {background: #BBD;}\ndiv#currentSlide {background: #FFC;}\n*/\n"
+
+s5PrettyCSS :: String
+s5PrettyCSS = "/* Following are the presentation styles -- edit away! */\n\nbody {background: #FFF url(bodybg.gif) -16px 0 no-repeat; color: #000; font-size: 2em;}\n:link, :visited {text-decoration: none; color: #00C;}\n#controls :active {color: #88A !important;}\n#controls :focus {outline: 1px dotted #227;}\nh1, h2, h3, h4 {font-size: 100%; margin: 0; padding: 0; font-weight: inherit;}\nul, pre {margin: 0; line-height: 1em;}\nhtml, body {margin: 0; padding: 0;}\n\nblockquote, q {font-style: italic;}\nblockquote {padding: 0 2em 0.5em; margin: 0 1.5em 0.5em; text-align: center; font-size: 1em;}\nblockquote p {margin: 0;}\nblockquote i {font-style: normal;}\nblockquote b {display: block; margin-top: 0.5em; font-weight: normal; font-size: smaller; font-style: normal;}\nblockquote b i {font-style: italic;}\n\nkbd {font-weight: bold; font-size: 1em;}\nsup {font-size: smaller; line-height: 1px;}\n\n.slide code {padding: 2px 0.25em; font-weight: bold; color: #533;}\n.slide code.bad, code del {color: red;}\n.slide code.old {color: silver;}\n.slide pre {padding: 0; margin: 0.25em 0 0.5em 0.5em; color: #533; font-size: 90%;}\n.slide pre code {display: block;}\n.slide ul {margin-left: 5%; margin-right: 7%; list-style: disc;}\n.slide li {margin-top: 0.75em; margin-right: 0;}\n.slide ul ul {line-height: 1;}\n.slide ul ul li {margin: .2em; font-size: 85%; list-style: square;}\n.slide img.leader {display: block; margin: 0 auto;}\n\ndiv#header, div#footer {background: #005; color: #AAB;\n font-family: Verdana, Helvetica, sans-serif;}\ndiv#header {background: #005 url(bodybg.gif) -16px 0 no-repeat;\n line-height: 1px;}\ndiv#footer {font-size: 0.5em; font-weight: bold; padding: 1em 0;}\n#footer h1, #footer h2 {display: block; padding: 0 1em;}\n#footer h2 {font-style: italic;}\n\ndiv.long {font-size: 0.75em;}\n.slide h1 {position: absolute; top: 0.7em; left: 87px; z-index: 1;\n margin: 0; padding: 0.3em 0 0 50px; white-space: nowrap;\n font: bold 150%/1em Helvetica, sans-serif; text-transform: capitalize;\n color: #DDE; background: #005;}\n.slide h3 {font-size: 130%;}\nh1 abbr {font-variant: small-caps;}\n\ndiv#controls {position: absolute; left: 50%; bottom: 0;\n width: 50%;\n text-align: right; font: bold 0.9em Verdana, Helvetica, sans-serif;}\nhtml>body div#controls {position: fixed; padding: 0 0 1em 0;\n top: auto;}\ndiv#controls form {position: absolute; bottom: 0; right: 0; width: 100%;\n margin: 0; padding: 0;}\n#controls #navLinks a {padding: 0; margin: 0 0.5em; \n background: #005; border: none; color: #779; \n cursor: pointer;}\n#controls #navList {height: 1em;}\n#controls #navList #jumplist {position: absolute; bottom: 0; right: 0; background: #DDD; color: #227;}\n\n#currentSlide {text-align: center; font-size: 0.5em; color: #449;}\n\n#slide0 {padding-top: 3.5em; font-size: 90%;}\n#slide0 h1 {position: static; margin: 1em 0 0; padding: 0;\n font: bold 2em Helvetica, sans-serif; white-space: normal;\n color: #000; background: transparent;}\n#slide0 h2 {font: bold italic 1em Helvetica, sans-serif; margin: 0.25em;}\n#slide0 h3 {margin-top: 1.5em; font-size: 1.5em;}\n#slide0 h4 {margin-top: 0; font-size: 1em;}\n\nul.urls {list-style: none; display: inline; margin: 0;}\n.urls li {display: inline; margin: 0;}\n.note {display: none;}\n.external {border-bottom: 1px dotted gray;}\nhtml>body .external {border-bottom: none;}\n.external:after {content: \" \\274F\"; font-size: smaller; color: #77B;}\n\n.incremental, .incremental *, .incremental *:after {color: #DDE; visibility: visible;}\nimg.incremental {visibility: hidden;}\n.slide .current {color: #B02;}\n\n\n/* diagnostics\n\nli:after {content: \" [\" attr(class) \"]\"; color: #F88;}\n */"
+
+s5OperaCSS :: String
+s5OperaCSS = "/* DO NOT CHANGE THESE unless you really want to break Opera Show */\n.slide {\n\tvisibility: visible !important;\n\tposition: static !important;\n\tpage-break-before: always;\n}\n#slide0 {page-break-before: avoid;}\n"
+
+s5OutlineCSS :: String
+s5OutlineCSS = "/* don't change this unless you want the layout stuff to show up in the outline view! */\n\n.layout div, #footer *, #controlForm * {display: none;}\n#footer, #controls, #controlForm, #navLinks, #toggle {\n display: block; visibility: visible; margin: 0; padding: 0;}\n#toggle {float: right; padding: 0.5em;}\nhtml>body #toggle {position: fixed; top: 0; right: 0;}\n\n/* making the outline look pretty-ish */\n\n#slide0 h1, #slide0 h2, #slide0 h3, #slide0 h4 {border: none; margin: 0;}\n#slide0 h1 {padding-top: 1.5em;}\n.slide h1 {margin: 1.5em 0 0; padding-top: 0.25em;\n border-top: 1px solid #888; border-bottom: 1px solid #AAA;}\n#toggle {border: 1px solid; border-width: 0 0 1px 1px; background: #FFF;}\n"
+
+s5PrintCSS :: String
+s5PrintCSS = "/* The following rule is necessary to have all slides appear in print! DO NOT REMOVE IT! */ .slide, ul {page-break-inside: avoid; visibility: visible !important;} h1 {page-break-after: avoid;} body {font-size: 12pt; background: white;} * {color: black;} #slide0 h1 {font-size: 200%; border: none; margin: 0.5em 0 0.25em;} #slide0 h3 {margin: 0; padding: 0;} #slide0 h4 {margin: 0 0 0.5em; padding: 0;} #slide0 {margin-bottom: 3em;} h1 {border-top: 2pt solid gray; border-bottom: 1px dotted silver;} .extra {background: transparent !important;} div.extra, pre.extra, .example {font-size: 10pt; color: #333;} ul.extra a {font-weight: bold;} p.example {display: none;} #header {display: none;} #footer h1 {margin: 0; border-bottom: 1px solid; color: gray; font-style: italic;} #footer h2, #controls {display: none;} /* The following rule keeps the layout stuff out of print. Remove at your own risk! */ .layout, .layout * {display: none !important;} "
+
+s5CSS :: String
+s5CSS = "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n"
+
+s5Links :: String
+s5Links = "<!-- style sheet links -->\n<link rel=\"stylesheet\" href=\"ui/default/slides.css\" type=\"text/css\" media=\"projection\" id=\"slideProj\" />\n<link rel=\"stylesheet\" href=\"ui/default/outline.css\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" />\n<link rel=\"stylesheet\" href=\"ui/default/print.css\" type=\"text/css\" media=\"print\" id=\"slidePrint\" />\n<link rel=\"stylesheet\" href=\"ui/default/opera.css\" type=\"text/css\" media=\"projection\" id=\"operaFix\" />\n<!-- S5 JS -->\n<script src=\"ui/default/slides.js\" type=\"text/javascript\"></script>\n"
+
+-- | Converts 'Pandoc' to an S5 HTML presentation.
+writeS5 :: WriterOptions -> Pandoc -> String
+writeS5 options = writeHtml options . insertS5Structure
+
+-- | Inserts HTML needed for an S5 presentation (e.g. around slides).
+layoutDiv :: [Inline] -- ^ Title of document (for header or footer)
+ -> String -- ^ Date of document (for header or footer)
+ -> [Block] -- ^ List of block elements returned
+layoutDiv title date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 [Str date]), (Header 2 title), (RawHtml "</div>\n</div>\n")]
+
+presentationStart = (RawHtml "<div class=\"presentation\">\n\n")
+
+presentationEnd = (RawHtml "</div>\n")
+
+slideStart = (RawHtml "<div class=\"slide\">\n")
+
+slideEnd = (RawHtml "</div>\n")
+
+-- | Returns 'True' if block is a Header 1.
+isH1 :: Block -> Bool
+isH1 (Header 1 _) = True
+isH1 _ = False
+
+-- | Insert HTML around sections to make individual slides.
+insertSlides :: Bool -> [Block] -> [Block]
+insertSlides beginning blocks =
+ let (beforeHead, rest) = break isH1 blocks in
+ if (null rest) then
+ if beginning then
+ beforeHead
+ else
+ beforeHead ++ [slideEnd]
+ else
+ if beginning then
+ beforeHead ++ slideStart:(head rest):(insertSlides False (tail rest))
+ else
+ beforeHead ++ slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
+
+-- | Insert blocks into 'Pandoc' for slide structure.
+insertS5Structure :: Pandoc -> Pandoc
+insertS5Structure (Pandoc meta []) = Pandoc meta []
+insertS5Structure (Pandoc (Meta title authors date) blocks) =
+ let slides = insertSlides True blocks
+ firstSlide = if (not (null title)) then [slideStart, (Header 1 title), (Header 3 [Str (joinWithSep ", " authors)]), (Header 4 [Str date]), slideEnd] else [] in
+ let newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++ slides ++ [presentationEnd] in
+ Pandoc (Meta title authors date) newBlocks
+