aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ASCIIMathML.js945
-rw-r--r--src/Main.hs512
-rw-r--r--src/Text/Pandoc.hs110
-rw-r--r--src/Text/Pandoc/Blocks.hs145
-rw-r--r--src/Text/Pandoc/CharacterReferences.hs327
-rw-r--r--src/Text/Pandoc/Definition.hs116
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs496
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs651
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs909
-rw-r--r--src/Text/Pandoc/Readers/RST.hs640
-rw-r--r--src/Text/Pandoc/Shared.hs792
-rw-r--r--src/Text/Pandoc/UTF8.hs45
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs248
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs299
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs458
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs310
-rw-r--r--src/Text/Pandoc/Writers/Man.hs293
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs373
-rw-r--r--src/Text/Pandoc/Writers/RST.hs325
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs286
-rw-r--r--src/headers/ConTeXtHeader61
-rw-r--r--src/headers/DocbookHeader3
-rw-r--r--src/headers/LaTeXHeader5
-rw-r--r--src/headers/RTFHeader4
-rw-r--r--src/headers/S5Header3
-rw-r--r--src/templates/ASCIIMathML.hs7
-rw-r--r--src/templates/DefaultHeaders.hs52
-rw-r--r--src/templates/Makefile20
-rw-r--r--src/templates/S5.hs133
-rwxr-xr-xsrc/templates/fillTemplates.pl131
-rw-r--r--src/ui/default/blank.gifbin0 -> 49 bytes
-rw-r--r--src/ui/default/bodybg.gifbin0 -> 10119 bytes
-rw-r--r--src/ui/default/framing.css23
-rw-r--r--src/ui/default/iepngfix.htc42
-rw-r--r--src/ui/default/opera.css7
-rw-r--r--src/ui/default/outline.css15
-rw-r--r--src/ui/default/pretty.css86
-rw-r--r--src/ui/default/print.css24
-rw-r--r--src/ui/default/s5-core.css9
-rw-r--r--src/ui/default/slides.css3
-rw-r--r--src/ui/default/slides.js553
-rw-r--r--src/wrappers/common.sh43
-rw-r--r--src/wrappers/hsmarkdown.in5
-rw-r--r--src/wrappers/html2markdown.in162
-rw-r--r--src/wrappers/markdown2pdf.in81
-rw-r--r--src/wrappers/tempdir.sh18
46 files changed, 9770 insertions, 0 deletions
diff --git a/src/ASCIIMathML.js b/src/ASCIIMathML.js
new file mode 100644
index 000000000..282cc15fb
--- /dev/null
+++ b/src/ASCIIMathML.js
@@ -0,0 +1,945 @@
+/*
+ASCIIMathML.js
+==============
+This file contains JavaScript functions to convert ASCII math notation
+to Presentation MathML. The conversion is done while the (X)HTML page
+loads, and should work with Firefox/Mozilla/Netscape 7+ and Internet
+Explorer 6+MathPlayer (http://www.dessci.com/en/products/mathplayer/).
+Just add the next line to your (X)HTML page with this file in the same folder:
+<script type="text/javascript" src="ASCIIMathML.js"></script>
+This is a convenient and inexpensive solution for authoring MathML.
+
+Version 1.4.7 Dec 15, 2005, (c) Peter Jipsen http://www.chapman.edu/~jipsen
+Latest version at http://www.chapman.edu/~jipsen/mathml/ASCIIMathML.js
+For changes see http://www.chapman.edu/~jipsen/mathml/asciimathchanges.txt
+If you use it on a webpage, please send the URL to jipsen@chapman.edu
+
+Modified July 2006 by John MacFarlane (added CODE to list of contexts
+in which replacement does not occur, modified AMisMathMLAvailable
+to better identify Safari browser, changed mathcolor to "").
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License (at http://www.gnu.org/copyleft/gpl.html)
+for more details.
+*/
+
+var checkForMathML = true; // check if browser can display MathML
+var notifyIfNoMathML = true; // display note if no MathML capability
+var alertIfNoMathML = false; // show alert box if no MathML capability
+var mathcolor = ""; // change it to "" (to inherit) or any other color
+var mathfontfamily = "serif"; // change to "" to inherit (works in IE)
+ // or another family (e.g. "arial")
+var displaystyle = true; // puts limits above and below large operators
+var showasciiformulaonhover = true; // helps students learn ASCIIMath
+var decimalsign = "."; // change to "," if you like, beware of `(1,2)`!
+var AMdelimiter1 = "`", AMescape1 = "\\\\`"; // can use other characters
+var AMdelimiter2 = "$", AMescape2 = "\\\\\\$", AMdelimiter2regexp = "\\$";
+var doubleblankmathdelimiter = false; // if true, x+1 is equal to `x+1`
+ // for IE this works only in <!-- -->
+//var separatetokens;// has been removed (email me if this is a problem)
+var isIE = document.createElementNS==null;
+
+if (document.getElementById==null)
+ alert("This webpage requires a recent browser such as\
+\nMozilla/Netscape 7+ or Internet Explorer 6+MathPlayer")
+
+// all further global variables start with "AM"
+
+function AMcreateElementXHTML(t) {
+ if (isIE) return document.createElement(t);
+ else return document.createElementNS("http://www.w3.org/1999/xhtml",t);
+}
+
+function AMnoMathMLNote() {
+ var nd = AMcreateElementXHTML("h3");
+ nd.setAttribute("align","center")
+ nd.appendChild(AMcreateElementXHTML("p"));
+ nd.appendChild(document.createTextNode("To view the "));
+ var an = AMcreateElementXHTML("a");
+ an.appendChild(document.createTextNode("ASCIIMathML"));
+ an.setAttribute("href","http://www.chapman.edu/~jipsen/asciimath.html");
+ nd.appendChild(an);
+ nd.appendChild(document.createTextNode(" notation use Internet Explorer 6+"));
+ an = AMcreateElementXHTML("a");
+ an.appendChild(document.createTextNode("MathPlayer"));
+ an.setAttribute("href","http://www.dessci.com/en/products/mathplayer/download.htm");
+ nd.appendChild(an);
+ nd.appendChild(document.createTextNode(" or Netscape/Mozilla/Firefox"));
+ nd.appendChild(AMcreateElementXHTML("p"));
+ return nd;
+}
+
+function AMisMathMLavailable() {
+ var regex = /KHTML/; /* This line and the next two modified by JM for better Safari detection */
+ if (navigator.appName.slice(0,8)=="Netscape")
+ if (navigator.appVersion.slice(0,1)>="5" && !regex.test(navigator.userAgent)) return null;
+ else return AMnoMathMLNote();
+ else if (navigator.appName.slice(0,9)=="Microsoft")
+ try {
+ var ActiveX = new ActiveXObject("MathPlayer.Factory.1");
+ return null;
+ } catch (e) {
+ return AMnoMathMLNote();
+ }
+ else return AMnoMathMLNote();
+}
+
+// character lists for Mozilla/Netscape fonts
+var AMcal = [0xEF35,0x212C,0xEF36,0xEF37,0x2130,0x2131,0xEF38,0x210B,0x2110,0xEF39,0xEF3A,0x2112,0x2133,0xEF3B,0xEF3C,0xEF3D,0xEF3E,0x211B,0xEF3F,0xEF40,0xEF41,0xEF42,0xEF43,0xEF44,0xEF45,0xEF46];
+var AMfrk = [0xEF5D,0xEF5E,0x212D,0xEF5F,0xEF60,0xEF61,0xEF62,0x210C,0x2111,0xEF63,0xEF64,0xEF65,0xEF66,0xEF67,0xEF68,0xEF69,0xEF6A,0x211C,0xEF6B,0xEF6C,0xEF6D,0xEF6E,0xEF6F,0xEF70,0xEF71,0x2128];
+var AMbbb = [0xEF8C,0xEF8D,0x2102,0xEF8E,0xEF8F,0xEF90,0xEF91,0x210D,0xEF92,0xEF93,0xEF94,0xEF95,0xEF96,0x2115,0xEF97,0x2119,0x211A,0x211D,0xEF98,0xEF99,0xEF9A,0xEF9B,0xEF9C,0xEF9D,0xEF9E,0x2124];
+
+var CONST = 0, UNARY = 1, BINARY = 2, INFIX = 3, LEFTBRACKET = 4,
+ RIGHTBRACKET = 5, SPACE = 6, UNDEROVER = 7, DEFINITION = 8,
+ LEFTRIGHT = 9, TEXT = 10; // token types
+
+var AMsqrt = {input:"sqrt", tag:"msqrt", output:"sqrt", tex:null, ttype:UNARY},
+ AMroot = {input:"root", tag:"mroot", output:"root", tex:null, ttype:BINARY},
+ AMfrac = {input:"frac", tag:"mfrac", output:"/", tex:null, ttype:BINARY},
+ AMdiv = {input:"/", tag:"mfrac", output:"/", tex:null, ttype:INFIX},
+ AMover = {input:"stackrel", tag:"mover", output:"stackrel", tex:null, ttype:BINARY},
+ AMsub = {input:"_", tag:"msub", output:"_", tex:null, ttype:INFIX},
+ AMsup = {input:"^", tag:"msup", output:"^", tex:null, ttype:INFIX},
+ AMtext = {input:"text", tag:"mtext", output:"text", tex:null, ttype:TEXT},
+ AMmbox = {input:"mbox", tag:"mtext", output:"mbox", tex:null, ttype:TEXT},
+ AMquote = {input:"\"", tag:"mtext", output:"mbox", tex:null, ttype:TEXT};
+
+var AMsymbols = [
+//some greek symbols
+{input:"alpha", tag:"mi", output:"\u03B1", tex:null, ttype:CONST},
+{input:"beta", tag:"mi", output:"\u03B2", tex:null, ttype:CONST},
+{input:"chi", tag:"mi", output:"\u03C7", tex:null, ttype:CONST},
+{input:"delta", tag:"mi", output:"\u03B4", tex:null, ttype:CONST},
+{input:"Delta", tag:"mo", output:"\u0394", tex:null, ttype:CONST},
+{input:"epsi", tag:"mi", output:"\u03B5", tex:"epsilon", ttype:CONST},
+{input:"varepsilon", tag:"mi", output:"\u025B", tex:null, ttype:CONST},
+{input:"eta", tag:"mi", output:"\u03B7", tex:null, ttype:CONST},
+{input:"gamma", tag:"mi", output:"\u03B3", tex:null, ttype:CONST},
+{input:"Gamma", tag:"mo", output:"\u0393", tex:null, ttype:CONST},
+{input:"iota", tag:"mi", output:"\u03B9", tex:null, ttype:CONST},
+{input:"kappa", tag:"mi", output:"\u03BA", tex:null, ttype:CONST},
+{input:"lambda", tag:"mi", output:"\u03BB", tex:null, ttype:CONST},
+{input:"Lambda", tag:"mo", output:"\u039B", tex:null, ttype:CONST},
+{input:"mu", tag:"mi", output:"\u03BC", tex:null, ttype:CONST},
+{input:"nu", tag:"mi", output:"\u03BD", tex:null, ttype:CONST},
+{input:"omega", tag:"mi", output:"\u03C9", tex:null, ttype:CONST},
+{input:"Omega", tag:"mo", output:"\u03A9", tex:null, ttype:CONST},
+{input:"phi", tag:"mi", output:"\u03C6", tex:null, ttype:CONST},
+{input:"varphi", tag:"mi", output:"\u03D5", tex:null, ttype:CONST},
+{input:"Phi", tag:"mo", output:"\u03A6", tex:null, ttype:CONST},
+{input:"pi", tag:"mi", output:"\u03C0", tex:null, ttype:CONST},
+{input:"Pi", tag:"mo", output:"\u03A0", tex:null, ttype:CONST},
+{input:"psi", tag:"mi", output:"\u03C8", tex:null, ttype:CONST},
+{input:"Psi", tag:"mi", output:"\u03A8", tex:null, ttype:CONST},
+{input:"rho", tag:"mi", output:"\u03C1", tex:null, ttype:CONST},
+{input:"sigma", tag:"mi", output:"\u03C3", tex:null, ttype:CONST},
+{input:"Sigma", tag:"mo", output:"\u03A3", tex:null, ttype:CONST},
+{input:"tau", tag:"mi", output:"\u03C4", tex:null, ttype:CONST},
+{input:"theta", tag:"mi", output:"\u03B8", tex:null, ttype:CONST},
+{input:"vartheta", tag:"mi", output:"\u03D1", tex:null, ttype:CONST},
+{input:"Theta", tag:"mo", output:"\u0398", tex:null, ttype:CONST},
+{input:"upsilon", tag:"mi", output:"\u03C5", tex:null, ttype:CONST},
+{input:"xi", tag:"mi", output:"\u03BE", tex:null, ttype:CONST},
+{input:"Xi", tag:"mo", output:"\u039E", tex:null, ttype:CONST},
+{input:"zeta", tag:"mi", output:"\u03B6", tex:null, ttype:CONST},
+
+//binary operation symbols
+{input:"*", tag:"mo", output:"\u22C5", tex:"cdot", ttype:CONST},
+{input:"**", tag:"mo", output:"\u22C6", tex:"star", ttype:CONST},
+{input:"//", tag:"mo", output:"/", tex:null, ttype:CONST},
+{input:"\\\\", tag:"mo", output:"\\", tex:"backslash", ttype:CONST},
+{input:"setminus", tag:"mo", output:"\\", tex:null, ttype:CONST},
+{input:"xx", tag:"mo", output:"\u00D7", tex:"times", ttype:CONST},
+{input:"-:", tag:"mo", output:"\u00F7", tex:"divide", ttype:CONST},
+{input:"@", tag:"mo", output:"\u2218", tex:"circ", ttype:CONST},
+{input:"o+", tag:"mo", output:"\u2295", tex:"oplus", ttype:CONST},
+{input:"ox", tag:"mo", output:"\u2297", tex:"otimes", ttype:CONST},
+{input:"o.", tag:"mo", output:"\u2299", tex:"odot", ttype:CONST},
+{input:"sum", tag:"mo", output:"\u2211", tex:null, ttype:UNDEROVER},
+{input:"prod", tag:"mo", output:"\u220F", tex:null, ttype:UNDEROVER},
+{input:"^^", tag:"mo", output:"\u2227", tex:"wedge", ttype:CONST},
+{input:"^^^", tag:"mo", output:"\u22C0", tex:"bigwedge", ttype:UNDEROVER},
+{input:"vv", tag:"mo", output:"\u2228", tex:"vee", ttype:CONST},
+{input:"vvv", tag:"mo", output:"\u22C1", tex:"bigvee", ttype:UNDEROVER},
+{input:"nn", tag:"mo", output:"\u2229", tex:"cap", ttype:CONST},
+{input:"nnn", tag:"mo", output:"\u22C2", tex:"bigcap", ttype:UNDEROVER},
+{input:"uu", tag:"mo", output:"\u222A", tex:"cup", ttype:CONST},
+{input:"uuu", tag:"mo", output:"\u22C3", tex:"bigcup", ttype:UNDEROVER},
+
+//binary relation symbols
+{input:"!=", tag:"mo", output:"\u2260", tex:"ne", ttype:CONST},
+{input:":=", tag:"mo", output:":=", tex:null, ttype:CONST},
+{input:"lt", tag:"mo", output:"<", tex:null, ttype:CONST},
+{input:"<=", tag:"mo", output:"\u2264", tex:"le", ttype:CONST},
+{input:"lt=", tag:"mo", output:"\u2264", tex:"leq", ttype:CONST},
+{input:">=", tag:"mo", output:"\u2265", tex:"ge", ttype:CONST},
+{input:"geq", tag:"mo", output:"\u2265", tex:null, ttype:CONST},
+{input:"-<", tag:"mo", output:"\u227A", tex:"prec", ttype:CONST},
+{input:"-lt", tag:"mo", output:"\u227A", tex:null, ttype:CONST},
+{input:">-", tag:"mo", output:"\u227B", tex:"succ", ttype:CONST},
+{input:"-<=", tag:"mo", output:"\u2AAF", tex:"preceq", ttype:CONST},
+{input:">-=", tag:"mo", output:"\u2AB0", tex:"succeq", ttype:CONST},
+{input:"in", tag:"mo", output:"\u2208", tex:null, ttype:CONST},
+{input:"!in", tag:"mo", output:"\u2209", tex:"notin", ttype:CONST},
+{input:"sub", tag:"mo", output:"\u2282", tex:"subset", ttype:CONST},
+{input:"sup", tag:"mo", output:"\u2283", tex:"supset", ttype:CONST},
+{input:"sube", tag:"mo", output:"\u2286", tex:"subseteq", ttype:CONST},
+{input:"supe", tag:"mo", output:"\u2287", tex:"supseteq", ttype:CONST},
+{input:"-=", tag:"mo", output:"\u2261", tex:"equiv", ttype:CONST},
+{input:"~=", tag:"mo", output:"\u2245", tex:"cong", ttype:CONST},
+{input:"~~", tag:"mo", output:"\u2248", tex:"approx", ttype:CONST},
+{input:"prop", tag:"mo", output:"\u221D", tex:"propto", ttype:CONST},
+
+//logical symbols
+{input:"and", tag:"mtext", output:"and", tex:null, ttype:SPACE},
+{input:"or", tag:"mtext", output:"or", tex:null, ttype:SPACE},
+{input:"not", tag:"mo", output:"\u00AC", tex:"neg", ttype:CONST},
+{input:"=>", tag:"mo", output:"\u21D2", tex:"implies", ttype:CONST},
+{input:"if", tag:"mo", output:"if", tex:null, ttype:SPACE},
+{input:"<=>", tag:"mo", output:"\u21D4", tex:"iff", ttype:CONST},
+{input:"AA", tag:"mo", output:"\u2200", tex:"forall", ttype:CONST},
+{input:"EE", tag:"mo", output:"\u2203", tex:"exists", ttype:CONST},
+{input:"_|_", tag:"mo", output:"\u22A5", tex:"bot", ttype:CONST},
+{input:"TT", tag:"mo", output:"\u22A4", tex:"top", ttype:CONST},
+{input:"|--", tag:"mo", output:"\u22A2", tex:"vdash", ttype:CONST},
+{input:"|==", tag:"mo", output:"\u22A8", tex:"models", ttype:CONST},
+
+//grouping brackets
+{input:"(", tag:"mo", output:"(", tex:null, ttype:LEFTBRACKET},
+{input:")", tag:"mo", output:")", tex:null, ttype:RIGHTBRACKET},
+{input:"[", tag:"mo", output:"[", tex:null, ttype:LEFTBRACKET},
+{input:"]", tag:"mo", output:"]", tex:null, ttype:RIGHTBRACKET},
+{input:"{", tag:"mo", output:"{", tex:null, ttype:LEFTBRACKET},
+{input:"}", tag:"mo", output:"}", tex:null, ttype:RIGHTBRACKET},
+{input:"|", tag:"mo", output:"|", tex:null, ttype:LEFTRIGHT},
+//{input:"||", tag:"mo", output:"||", tex:null, ttype:LEFTRIGHT},
+{input:"(:", tag:"mo", output:"\u2329", tex:"langle", ttype:LEFTBRACKET},
+{input:":)", tag:"mo", output:"\u232A", tex:"rangle", ttype:RIGHTBRACKET},
+{input:"<<", tag:"mo", output:"\u2329", tex:null, ttype:LEFTBRACKET},
+{input:">>", tag:"mo", output:"\u232A", tex:null, ttype:RIGHTBRACKET},
+{input:"{:", tag:"mo", output:"{:", tex:null, ttype:LEFTBRACKET, invisible:true},
+{input:":}", tag:"mo", output:":}", tex:null, ttype:RIGHTBRACKET, invisible:true},
+
+//miscellaneous symbols
+{input:"int", tag:"mo", output:"\u222B", tex:null, ttype:CONST},
+{input:"dx", tag:"mi", output:"{:d x:}", tex:null, ttype:DEFINITION},
+{input:"dy", tag:"mi", output:"{:d y:}", tex:null, ttype:DEFINITION},
+{input:"dz", tag:"mi", output:"{:d z:}", tex:null, ttype:DEFINITION},
+{input:"dt", tag:"mi", output:"{:d t:}", tex:null, ttype:DEFINITION},
+{input:"oint", tag:"mo", output:"\u222E", tex:null, ttype:CONST},
+{input:"del", tag:"mo", output:"\u2202", tex:"partial", ttype:CONST},
+{input:"grad", tag:"mo", output:"\u2207", tex:"nabla", ttype:CONST},
+{input:"+-", tag:"mo", output:"\u00B1", tex:"pm", ttype:CONST},
+{input:"O/", tag:"mo", output:"\u2205", tex:"emptyset", ttype:CONST},
+{input:"oo", tag:"mo", output:"\u221E", tex:"infty", ttype:CONST},
+{input:"aleph", tag:"mo", output:"\u2135", tex:null, ttype:CONST},
+{input:"...", tag:"mo", output:"...", tex:"ldots", ttype:CONST},
+{input:":.", tag:"mo", output:"\u2234", tex:"therefore", ttype:CONST},
+{input:"/_", tag:"mo", output:"\u2220", tex:"angle", ttype:CONST},
+{input:"\\ ", tag:"mo", output:"\u00A0", tex:null, ttype:CONST},
+{input:"quad", tag:"mo", output:"\u00A0\u00A0", tex:null, ttype:CONST},
+{input:"qquad", tag:"mo", output:"\u00A0\u00A0\u00A0\u00A0", tex:null, ttype:CONST},
+{input:"cdots", tag:"mo", output:"\u22EF", tex:null, ttype:CONST},
+{input:"vdots", tag:"mo", output:"\u22EE", tex:null, ttype:CONST},
+{input:"ddots", tag:"mo", output:"\u22F1", tex:null, ttype:CONST},
+{input:"diamond", tag:"mo", output:"\u22C4", tex:null, ttype:CONST},
+{input:"square", tag:"mo", output:"\u25A1", tex:null, ttype:CONST},
+{input:"|__", tag:"mo", output:"\u230A", tex:"lfloor", ttype:CONST},
+{input:"__|", tag:"mo", output:"\u230B", tex:"rfloor", ttype:CONST},
+{input:"|~", tag:"mo", output:"\u2308", tex:"lceiling", ttype:CONST},
+{input:"~|", tag:"mo", output:"\u2309", tex:"rceiling", ttype:CONST},
+{input:"CC", tag:"mo", output:"\u2102", tex:null, ttype:CONST},
+{input:"NN", tag:"mo", output:"\u2115", tex:null, ttype:CONST},
+{input:"QQ", tag:"mo", output:"\u211A", tex:null, ttype:CONST},
+{input:"RR", tag:"mo", output:"\u211D", tex:null, ttype:CONST},
+{input:"ZZ", tag:"mo", output:"\u2124", tex:null, ttype:CONST},
+{input:"f", tag:"mi", output:"f", tex:null, ttype:UNARY, func:true},
+{input:"g", tag:"mi", output:"g", tex:null, ttype:UNARY, func:true},
+
+//standard functions
+{input:"lim", tag:"mo", output:"lim", tex:null, ttype:UNDEROVER},
+{input:"Lim", tag:"mo", output:"Lim", tex:null, ttype:UNDEROVER},
+{input:"sin", tag:"mo", output:"sin", tex:null, ttype:UNARY, func:true},
+{input:"cos", tag:"mo", output:"cos", tex:null, ttype:UNARY, func:true},
+{input:"tan", tag:"mo", output:"tan", tex:null, ttype:UNARY, func:true},
+{input:"sinh", tag:"mo", output:"sinh", tex:null, ttype:UNARY, func:true},
+{input:"cosh", tag:"mo", output:"cosh", tex:null, ttype:UNARY, func:true},
+{input:"tanh", tag:"mo", output:"tanh", tex:null, ttype:UNARY, func:true},
+{input:"cot", tag:"mo", output:"cot", tex:null, ttype:UNARY, func:true},
+{input:"sec", tag:"mo", output:"sec", tex:null, ttype:UNARY, func:true},
+{input:"csc", tag:"mo", output:"csc", tex:null, ttype:UNARY, func:true},
+{input:"log", tag:"mo", output:"log", tex:null, ttype:UNARY, func:true},
+{input:"ln", tag:"mo", output:"ln", tex:null, ttype:UNARY, func:true},
+{input:"det", tag:"mo", output:"det", tex:null, ttype:UNARY, func:true},
+{input:"dim", tag:"mo", output:"dim", tex:null, ttype:CONST},
+{input:"mod", tag:"mo", output:"mod", tex:null, ttype:CONST},
+{input:"gcd", tag:"mo", output:"gcd", tex:null, ttype:UNARY, func:true},
+{input:"lcm", tag:"mo", output:"lcm", tex:null, ttype:UNARY, func:true},
+{input:"lub", tag:"mo", output:"lub", tex:null, ttype:CONST},
+{input:"glb", tag:"mo", output:"glb", tex:null, ttype:CONST},
+{input:"min", tag:"mo", output:"min", tex:null, ttype:UNDEROVER},
+{input:"max", tag:"mo", output:"max", tex:null, ttype:UNDEROVER},
+
+//arrows
+{input:"uarr", tag:"mo", output:"\u2191", tex:"uparrow", ttype:CONST},
+{input:"darr", tag:"mo", output:"\u2193", tex:"downarrow", ttype:CONST},
+{input:"rarr", tag:"mo", output:"\u2192", tex:"rightarrow", ttype:CONST},
+{input:"->", tag:"mo", output:"\u2192", tex:"to", ttype:CONST},
+{input:"|->", tag:"mo", output:"\u21A6", tex:"mapsto", ttype:CONST},
+{input:"larr", tag:"mo", output:"\u2190", tex:"leftarrow", ttype:CONST},
+{input:"harr", tag:"mo", output:"\u2194", tex:"leftrightarrow", ttype:CONST},
+{input:"rArr", tag:"mo", output:"\u21D2", tex:"Rightarrow", ttype:CONST},
+{input:"lArr", tag:"mo", output:"\u21D0", tex:"Leftarrow", ttype:CONST},
+{input:"hArr", tag:"mo", output:"\u21D4", tex:"Leftrightarrow", ttype:CONST},
+
+//commands with argument
+AMsqrt, AMroot, AMfrac, AMdiv, AMover, AMsub, AMsup,
+{input:"hat", tag:"mover", output:"\u005E", tex:null, ttype:UNARY, acc:true},
+{input:"bar", tag:"mover", output:"\u00AF", tex:"overline", ttype:UNARY, acc:true},
+{input:"vec", tag:"mover", output:"\u2192", tex:null, ttype:UNARY, acc:true},
+{input:"dot", tag:"mover", output:".", tex:null, ttype:UNARY, acc:true},
+{input:"ddot", tag:"mover", output:"..", tex:null, ttype:UNARY, acc:true},
+{input:"ul", tag:"munder", output:"\u0332", tex:"underline", ttype:UNARY, acc:true},
+AMtext, AMmbox, AMquote,
+{input:"bb", tag:"mstyle", atname:"fontweight", atval:"bold", output:"bb", tex:null, ttype:UNARY},
+{input:"mathbf", tag:"mstyle", atname:"fontweight", atval:"bold", output:"mathbf", tex:null, ttype:UNARY},
+{input:"sf", tag:"mstyle", atname:"fontfamily", atval:"sans-serif", output:"sf", tex:null, ttype:UNARY},
+{input:"mathsf", tag:"mstyle", atname:"fontfamily", atval:"sans-serif", output:"mathsf", tex:null, ttype:UNARY},
+{input:"bbb", tag:"mstyle", atname:"mathvariant", atval:"double-struck", output:"bbb", tex:null, ttype:UNARY, codes:AMbbb},
+{input:"mathbb", tag:"mstyle", atname:"mathvariant", atval:"double-struck", output:"mathbb", tex:null, ttype:UNARY, codes:AMbbb},
+{input:"cc", tag:"mstyle", atname:"mathvariant", atval:"script", output:"cc", tex:null, ttype:UNARY, codes:AMcal},
+{input:"mathcal", tag:"mstyle", atname:"mathvariant", atval:"script", output:"mathcal", tex:null, ttype:UNARY, codes:AMcal},
+{input:"tt", tag:"mstyle", atname:"fontfamily", atval:"monospace", output:"tt", tex:null, ttype:UNARY},
+{input:"mathtt", tag:"mstyle", atname:"fontfamily", atval:"monospace", output:"mathtt", tex:null, ttype:UNARY},
+{input:"fr", tag:"mstyle", atname:"mathvariant", atval:"fraktur", output:"fr", tex:null, ttype:UNARY, codes:AMfrk},
+{input:"mathfrak", tag:"mstyle", atname:"mathvariant", atval:"fraktur", output:"mathfrak", tex:null, ttype:UNARY, codes:AMfrk}
+];
+
+function compareNames(s1,s2) {
+ if (s1.input > s2.input) return 1
+ else return -1;
+}
+
+var AMnames = []; //list of input symbols
+
+function AMinitSymbols() {
+ var texsymbols = [], i;
+ for (i=0; i<AMsymbols.length; i++)
+ if (AMsymbols[i].tex)
+ texsymbols[texsymbols.length] = {input:AMsymbols[i].tex,
+ tag:AMsymbols[i].tag, output:AMsymbols[i].output, ttype:AMsymbols[i].ttype};
+ AMsymbols = AMsymbols.concat(texsymbols);
+ AMsymbols.sort(compareNames);
+ for (i=0; i<AMsymbols.length; i++) AMnames[i] = AMsymbols[i].input;
+}
+
+var AMmathml = "http://www.w3.org/1998/Math/MathML";
+
+function AMcreateElementMathML(t) {
+ if (isIE) return document.createElement("m:"+t);
+ else return document.createElementNS(AMmathml,t);
+}
+
+function AMcreateMmlNode(t,frag) {
+// var node = AMcreateElementMathML(name);
+ if (isIE) var node = document.createElement("m:"+t);
+ else var node = document.createElementNS(AMmathml,t);
+ node.appendChild(frag);
+ return node;
+}
+
+function newcommand(oldstr,newstr) {
+ AMsymbols = AMsymbols.concat([{input:oldstr, tag:"mo", output:newstr,
+ tex:null, ttype:DEFINITION}]);
+}
+
+function AMremoveCharsAndBlanks(str,n) {
+//remove n characters and any following blanks
+ var st;
+ if (str.charAt(n)=="\\" && str.charAt(n+1)!="\\" && str.charAt(n+1)!=" ")
+ st = str.slice(n+1);
+ else st = str.slice(n);
+ for (var i=0; i<st.length && st.charCodeAt(i)<=32; i=i+1);
+ return st.slice(i);
+}
+
+function AMposition(arr, str, n) {
+// return position >=n where str appears or would be inserted
+// assumes arr is sorted
+ if (n==0) {
+ var h,m;
+ n = -1;
+ h = arr.length;
+ while (n+1<h) {
+ m = (n+h) >> 1;
+ if (arr[m]<str) n = m; else h = m;
+ }
+ return h;
+ } else
+ for (var i=n; i<arr.length && arr[i]<str; i++);
+ return i; // i=arr.length || arr[i]>=str
+}
+
+function AMgetSymbol(str) {
+//return maximal initial substring of str that appears in names
+//return null if there is none
+ var k = 0; //new pos
+ var j = 0; //old pos
+ var mk; //match pos
+ var st;
+ var tagst;
+ var match = "";
+ var more = true;
+ for (var i=1; i<=str.length && more; i++) {
+ st = str.slice(0,i); //initial substring of length i
+ j = k;
+ k = AMposition(AMnames, st, j);
+ if (k<AMnames.length && str.slice(0,AMnames[k].length)==AMnames[k]){
+ match = AMnames[k];
+ mk = k;
+ i = match.length;
+ }
+ more = k<AMnames.length && str.slice(0,AMnames[k].length)>=AMnames[k];
+ }
+ AMpreviousSymbol=AMcurrentSymbol;
+ if (match!=""){
+ AMcurrentSymbol=AMsymbols[mk].ttype;
+ return AMsymbols[mk];
+ }
+// if str[0] is a digit or - return maxsubstring of digits.digits
+ AMcurrentSymbol=CONST;
+ k = 1;
+ st = str.slice(0,1);
+ var integ = true;
+ while ("0"<=st && st<="9" && k<=str.length) {
+ st = str.slice(k,k+1);
+ k++;
+ }
+ if (st == decimalsign) {
+ st = str.slice(k,k+1);
+ if ("0"<=st && st<="9") {
+ integ = false;
+ k++;
+ while ("0"<=st && st<="9" && k<=str.length) {
+ st = str.slice(k,k+1);
+ k++;
+ }
+ }
+ }
+ if ((integ && k>1) || k>2) {
+ st = str.slice(0,k-1);
+ tagst = "mn";
+ } else {
+ k = 2;
+ st = str.slice(0,1); //take 1 character
+ tagst = (("A">st || st>"Z") && ("a">st || st>"z")?"mo":"mi");
+ }
+ if (st=="-" && AMpreviousSymbol==INFIX) {
+ AMcurrentSymbol = INFIX; //trick "/" into recognizing "-" on second parse
+ return {input:st, tag:tagst, output:st, ttype:UNARY, func:true};
+ }
+ return {input:st, tag:tagst, output:st, ttype:CONST};
+}
+
+function AMremoveBrackets(node) {
+ var st;
+ if (node.nodeName=="mrow") {
+ st = node.firstChild.firstChild.nodeValue;
+ if (st=="(" || st=="[" || st=="{") node.removeChild(node.firstChild);
+ }
+ if (node.nodeName=="mrow") {
+ st = node.lastChild.firstChild.nodeValue;
+ if (st==")" || st=="]" || st=="}") node.removeChild(node.lastChild);
+ }
+}
+
+/*Parsing ASCII math expressions with the following grammar
+v ::= [A-Za-z] | greek letters | numbers | other constant symbols
+u ::= sqrt | text | bb | other unary symbols for font commands
+b ::= frac | root | stackrel binary symbols
+l ::= ( | [ | { | (: | {: left brackets
+r ::= ) | ] | } | :) | :} right brackets
+S ::= v | lEr | uS | bSS Simple expression
+I ::= S_S | S^S | S_S^S | S Intermediate expression
+E ::= IE | I/I Expression
+Each terminal symbol is translated into a corresponding mathml node.*/
+
+var AMnestingDepth,AMpreviousSymbol,AMcurrentSymbol;
+
+function AMparseSexpr(str) { //parses str and returns [node,tailstr]
+ var symbol, node, result, i, st,// rightvert = false,
+ newFrag = document.createDocumentFragment();
+ str = AMremoveCharsAndBlanks(str,0);
+ symbol = AMgetSymbol(str); //either a token or a bracket or empty
+ if (symbol == null || symbol.ttype == RIGHTBRACKET && AMnestingDepth > 0) {
+ return [null,str];
+ }
+ if (symbol.ttype == DEFINITION) {
+ str = symbol.output+AMremoveCharsAndBlanks(str,symbol.input.length);
+ symbol = AMgetSymbol(str);
+ }
+ switch (symbol.ttype) {
+ case UNDEROVER:
+ case CONST:
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ return [AMcreateMmlNode(symbol.tag, //its a constant
+ document.createTextNode(symbol.output)),str];
+ case LEFTBRACKET: //read (expr+)
+ AMnestingDepth++;
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ result = AMparseExpr(str,true);
+ AMnestingDepth--;
+ if (typeof symbol.invisible == "boolean" && symbol.invisible)
+ node = AMcreateMmlNode("mrow",result[0]);
+ else {
+ node = AMcreateMmlNode("mo",document.createTextNode(symbol.output));
+ node = AMcreateMmlNode("mrow",node);
+ node.appendChild(result[0]);
+ }
+ return [node,result[1]];
+ case TEXT:
+ if (symbol!=AMquote) str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ if (str.charAt(0)=="{") i=str.indexOf("}");
+ else if (str.charAt(0)=="(") i=str.indexOf(")");
+ else if (str.charAt(0)=="[") i=str.indexOf("]");
+ else if (symbol==AMquote) i=str.slice(1).indexOf("\"")+1;
+ else i = 0;
+ if (i==-1) i = str.length;
+ st = str.slice(1,i);
+ if (st.charAt(0) == " ") {
+ node = AMcreateElementMathML("mspace");
+ node.setAttribute("width","1ex");
+ newFrag.appendChild(node);
+ }
+ newFrag.appendChild(
+ AMcreateMmlNode(symbol.tag,document.createTextNode(st)));
+ if (st.charAt(st.length-1) == " ") {
+ node = AMcreateElementMathML("mspace");
+ node.setAttribute("width","1ex");
+ newFrag.appendChild(node);
+ }
+ str = AMremoveCharsAndBlanks(str,i+1);
+ return [AMcreateMmlNode("mrow",newFrag),str];
+ case UNARY:
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ result = AMparseSexpr(str);
+ if (result[0]==null) return [AMcreateMmlNode(symbol.tag,
+ document.createTextNode(symbol.output)),str];
+ if (typeof symbol.func == "boolean" && symbol.func) { // functions hack
+ st = str.charAt(0);
+ if (st=="^" || st=="_" || st=="/" || st=="|" || st==",") {
+ return [AMcreateMmlNode(symbol.tag,
+ document.createTextNode(symbol.output)),str];
+ } else {
+ node = AMcreateMmlNode("mrow",
+ AMcreateMmlNode(symbol.tag,document.createTextNode(symbol.output)));
+ node.appendChild(result[0]);
+ return [node,result[1]];
+ }
+ }
+ AMremoveBrackets(result[0]);
+ if (symbol.input == "sqrt") { // sqrt
+ return [AMcreateMmlNode(symbol.tag,result[0]),result[1]];
+ } else if (typeof symbol.acc == "boolean" && symbol.acc) { // accent
+ node = AMcreateMmlNode(symbol.tag,result[0]);
+ node.appendChild(AMcreateMmlNode("mo",document.createTextNode(symbol.output)));
+ return [node,result[1]];
+ } else { // font change command
+ if (!isIE && typeof symbol.codes != "undefined") {
+ for (i=0; i<result[0].childNodes.length; i++)
+ if (result[0].childNodes[i].nodeName=="mi" || result[0].nodeName=="mi") {
+ st = (result[0].nodeName=="mi"?result[0].firstChild.nodeValue:
+ result[0].childNodes[i].firstChild.nodeValue);
+ var newst = [];
+ for (var j=0; j<st.length; j++)
+ if (st.charCodeAt(j)>64 && st.charCodeAt(j)<91) newst = newst +
+ String.fromCharCode(symbol.codes[st.charCodeAt(j)-65]);
+ else newst = newst + st.charAt(j);
+ if (result[0].nodeName=="mi")
+ result[0]=AMcreateElementMathML("mo").
+ appendChild(document.createTextNode(newst));
+ else result[0].replaceChild(AMcreateElementMathML("mo").
+ appendChild(document.createTextNode(newst)),result[0].childNodes[i]);
+ }
+ }
+ node = AMcreateMmlNode(symbol.tag,result[0]);
+ node.setAttribute(symbol.atname,symbol.atval);
+ return [node,result[1]];
+ }
+ case BINARY:
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ result = AMparseSexpr(str);
+ if (result[0]==null) return [AMcreateMmlNode("mo",
+ document.createTextNode(symbol.input)),str];
+ AMremoveBrackets(result[0]);
+ var result2 = AMparseSexpr(result[1]);
+ if (result2[0]==null) return [AMcreateMmlNode("mo",
+ document.createTextNode(symbol.input)),str];
+ AMremoveBrackets(result2[0]);
+ if (symbol.input=="root" || symbol.input=="stackrel")
+ newFrag.appendChild(result2[0]);
+ newFrag.appendChild(result[0]);
+ if (symbol.input=="frac") newFrag.appendChild(result2[0]);
+ return [AMcreateMmlNode(symbol.tag,newFrag),result2[1]];
+ case INFIX:
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ return [AMcreateMmlNode("mo",document.createTextNode(symbol.output)),str];
+ case SPACE:
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ node = AMcreateElementMathML("mspace");
+ node.setAttribute("width","1ex");
+ newFrag.appendChild(node);
+ newFrag.appendChild(
+ AMcreateMmlNode(symbol.tag,document.createTextNode(symbol.output)));
+ node = AMcreateElementMathML("mspace");
+ node.setAttribute("width","1ex");
+ newFrag.appendChild(node);
+ return [AMcreateMmlNode("mrow",newFrag),str];
+ case LEFTRIGHT:
+// if (rightvert) return [null,str]; else rightvert = true;
+ AMnestingDepth++;
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ result = AMparseExpr(str,false);
+ AMnestingDepth--;
+ var st = "";
+ if (result[0].lastChild!=null)
+ st = result[0].lastChild.firstChild.nodeValue;
+ if (st == "|") { // its an absolute value subterm
+ node = AMcreateMmlNode("mo",document.createTextNode(symbol.output));
+ node = AMcreateMmlNode("mrow",node);
+ node.appendChild(result[0]);
+ return [node,result[1]];
+ } else { // the "|" is a \mid
+ node = AMcreateMmlNode("mo",document.createTextNode(symbol.output));
+ node = AMcreateMmlNode("mrow",node);
+ return [node,str];
+ }
+ default:
+//alert("default");
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ return [AMcreateMmlNode(symbol.tag, //its a constant
+ document.createTextNode(symbol.output)),str];
+ }
+}
+
+function AMparseIexpr(str) {
+ var symbol, sym1, sym2, node, result, underover;
+ str = AMremoveCharsAndBlanks(str,0);
+ sym1 = AMgetSymbol(str);
+ result = AMparseSexpr(str);
+ node = result[0];
+ str = result[1];
+ symbol = AMgetSymbol(str);
+ if (symbol.ttype == INFIX && symbol.input != "/") {
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+// if (symbol.input == "/") result = AMparseIexpr(str); else ...
+ result = AMparseSexpr(str);
+ if (result[0] == null) // show box in place of missing argument
+ result[0] = AMcreateMmlNode("mo",document.createTextNode("\u25A1"));
+ else AMremoveBrackets(result[0]);
+ str = result[1];
+// if (symbol.input == "/") AMremoveBrackets(node);
+ if (symbol.input == "_") {
+ sym2 = AMgetSymbol(str);
+ underover = (sym1.ttype == UNDEROVER);
+ if (sym2.input == "^") {
+ str = AMremoveCharsAndBlanks(str,sym2.input.length);
+ var res2 = AMparseSexpr(str);
+ AMremoveBrackets(res2[0]);
+ str = res2[1];
+ node = AMcreateMmlNode((underover?"munderover":"msubsup"),node);
+ node.appendChild(result[0]);
+ node.appendChild(res2[0]);
+ node = AMcreateMmlNode("mrow",node); // so sum does not stretch
+ } else {
+ node = AMcreateMmlNode((underover?"munder":"msub"),node);
+ node.appendChild(result[0]);
+ }
+ } else {
+ node = AMcreateMmlNode(symbol.tag,node);
+ node.appendChild(result[0]);
+ }
+ }
+ return [node,str];
+}
+
+function AMparseExpr(str,rightbracket) {
+ var symbol, node, result, i, nodeList = [],
+ newFrag = document.createDocumentFragment();
+ do {
+ str = AMremoveCharsAndBlanks(str,0);
+ result = AMparseIexpr(str);
+ node = result[0];
+ str = result[1];
+ symbol = AMgetSymbol(str);
+ if (symbol.ttype == INFIX && symbol.input == "/") {
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ result = AMparseIexpr(str);
+ if (result[0] == null) // show box in place of missing argument
+ result[0] = AMcreateMmlNode("mo",document.createTextNode("\u25A1"));
+ else AMremoveBrackets(result[0]);
+ str = result[1];
+ AMremoveBrackets(node);
+ node = AMcreateMmlNode(symbol.tag,node);
+ node.appendChild(result[0]);
+ newFrag.appendChild(node);
+ symbol = AMgetSymbol(str);
+ }
+ else if (node!=undefined) newFrag.appendChild(node);
+ } while ((symbol.ttype != RIGHTBRACKET &&
+ (symbol.ttype != LEFTRIGHT || rightbracket)
+ || AMnestingDepth == 0) && symbol!=null && symbol.output!="");
+ if (symbol.ttype == RIGHTBRACKET || symbol.ttype == LEFTRIGHT) {
+// if (AMnestingDepth > 0) AMnestingDepth--;
+ var len = newFrag.childNodes.length;
+ if (len>0 && newFrag.childNodes[len-1].nodeName == "mrow" && len>1 &&
+ newFrag.childNodes[len-2].nodeName == "mo" &&
+ newFrag.childNodes[len-2].firstChild.nodeValue == ",") { //matrix
+ var right = newFrag.childNodes[len-1].lastChild.firstChild.nodeValue;
+ if (right==")" || right=="]") {
+ var left = newFrag.childNodes[len-1].firstChild.firstChild.nodeValue;
+ if (left=="(" && right==")" && symbol.output != "}" ||
+ left=="[" && right=="]") {
+ var pos = []; // positions of commas
+ var matrix = true;
+ var m = newFrag.childNodes.length;
+ for (i=0; matrix && i<m; i=i+2) {
+ pos[i] = [];
+ node = newFrag.childNodes[i];
+ if (matrix) matrix = node.nodeName=="mrow" &&
+ (i==m-1 || node.nextSibling.nodeName=="mo" &&
+ node.nextSibling.firstChild.nodeValue==",")&&
+ node.firstChild.firstChild.nodeValue==left &&
+ node.lastChild.firstChild.nodeValue==right;
+ if (matrix)
+ for (var j=0; j<node.childNodes.length; j++)
+ if (node.childNodes[j].firstChild.nodeValue==",")
+ pos[i][pos[i].length]=j;
+ if (matrix && i>1) matrix = pos[i].length == pos[i-2].length;
+ }
+ if (matrix) {
+ var row, frag, n, k, table = document.createDocumentFragment();
+ for (i=0; i<m; i=i+2) {
+ row = document.createDocumentFragment();
+ frag = document.createDocumentFragment();
+ node = newFrag.firstChild; // <mrow>(-,-,...,-,-)</mrow>
+ n = node.childNodes.length;
+ k = 0;
+ node.removeChild(node.firstChild); //remove (
+ for (j=1; j<n-1; j++) {
+ if (typeof pos[i][k] != "undefined" && j==pos[i][k]){
+ node.removeChild(node.firstChild); //remove ,
+ row.appendChild(AMcreateMmlNode("mtd",frag));
+ k++;
+ } else frag.appendChild(node.firstChild);
+ }
+ row.appendChild(AMcreateMmlNode("mtd",frag));
+ if (newFrag.childNodes.length>2) {
+ newFrag.removeChild(newFrag.firstChild); //remove <mrow>)</mrow>
+ newFrag.removeChild(newFrag.firstChild); //remove <mo>,</mo>
+ }
+ table.appendChild(AMcreateMmlNode("mtr",row));
+ }
+ node = AMcreateMmlNode("mtable",table);
+ if (typeof symbol.invisible == "boolean" && symbol.invisible) node.setAttribute("columnalign","left");
+ newFrag.replaceChild(node,newFrag.firstChild);
+ }
+ }
+ }
+ }
+ str = AMremoveCharsAndBlanks(str,symbol.input.length);
+ if (typeof symbol.invisible != "boolean" || !symbol.invisible) {
+ node = AMcreateMmlNode("mo",document.createTextNode(symbol.output));
+ newFrag.appendChild(node);
+ }
+ }
+ return [newFrag,str];
+}
+
+function AMparseMath(str) {
+ var result, node = AMcreateElementMathML("mstyle");
+ if (mathcolor != "") node.setAttribute("mathcolor",mathcolor);
+ if (displaystyle) node.setAttribute("displaystyle","true");
+ if (mathfontfamily != "") node.setAttribute("fontfamily",mathfontfamily);
+ AMnestingDepth = 0;
+ node.appendChild(AMparseExpr(str.replace(/^\s+/g,""),false)[0]);
+ node = AMcreateMmlNode("math",node);
+ if (showasciiformulaonhover) //fixed by djhsu so newline
+ node.setAttribute("title",str.replace(/\s+/g," "));//does not show in Gecko
+ if (mathfontfamily != "" && (isIE || mathfontfamily != "serif")) {
+ var fnode = AMcreateElementXHTML("font");
+ fnode.setAttribute("face",mathfontfamily);
+ fnode.appendChild(node);
+ return fnode;
+ }
+ return node;
+}
+
+function AMstrarr2docFrag(arr, linebreaks) {
+ var newFrag=document.createDocumentFragment();
+ var expr = false;
+ for (var i=0; i<arr.length; i++) {
+ if (expr) newFrag.appendChild(AMparseMath(arr[i]));
+ else {
+ var arri = (linebreaks ? arr[i].split("\n\n") : [arr[i]]);
+ newFrag.appendChild(AMcreateElementXHTML("span").
+ appendChild(document.createTextNode(arri[0])));
+ for (var j=1; j<arri.length; j++) {
+ newFrag.appendChild(AMcreateElementXHTML("p"));
+ newFrag.appendChild(AMcreateElementXHTML("span").
+ appendChild(document.createTextNode(arri[j])));
+ }
+ }
+ expr = !expr;
+ }
+ return newFrag;
+}
+
+function AMprocessNodeR(n, linebreaks) {
+ var mtch, str, arr, frg, i;
+ if (n.childNodes.length == 0) {
+ if ((n.nodeType!=8 || linebreaks) &&
+ n.parentNode.nodeName!="form" && n.parentNode.nodeName!="FORM" &&
+ n.parentNode.nodeName!="textarea" && n.parentNode.nodeName!="TEXTAREA" &&
+ n.parentNode.nodeName!="code" && n.parentNode.nodeName!="CODE" && /* added by JM */
+ n.parentNode.nodeName!="pre" && n.parentNode.nodeName!="PRE") {
+ str = n.nodeValue;
+ if (!(str == null)) {
+ str = str.replace(/\r\n\r\n/g,"\n\n");
+ if (doubleblankmathdelimiter) {
+ str = str.replace(/\x20\x20\./g," "+AMdelimiter1+".");
+ str = str.replace(/\x20\x20,/g," "+AMdelimiter1+",");
+ str = str.replace(/\x20\x20/g," "+AMdelimiter1+" ");
+ }
+ str = str.replace(/\x20+/g," ");
+ str = str.replace(/\s*\r\n/g," ");
+ mtch = false;
+ str = str.replace(new RegExp(AMescape2, "g"),
+ function(st){mtch=true;return "AMescape2"});
+ str = str.replace(new RegExp(AMescape1, "g"),
+ function(st){mtch=true;return "AMescape1"});
+ str = str.replace(new RegExp(AMdelimiter2regexp, "g"),AMdelimiter1);
+ arr = str.split(AMdelimiter1);
+ for (i=0; i<arr.length; i++)
+ arr[i]=arr[i].replace(/AMescape2/g,AMdelimiter2).
+ replace(/AMescape1/g,AMdelimiter1);
+ if (arr.length>1 || mtch) {
+ if (checkForMathML) {
+ checkForMathML = false;
+ var nd = AMisMathMLavailable();
+ AMnoMathML = nd != null;
+ if (AMnoMathML && notifyIfNoMathML)
+ if (alertIfNoMathML)
+ alert("To view the ASCIIMathML notation use Internet Explorer 6 +\nMathPlayer (free from www.dessci.com)\n\
+ or Firefox/Mozilla/Netscape");
+ else AMbody.insertBefore(nd,AMbody.childNodes[0]);
+ }
+ if (!AMnoMathML) {
+ frg = AMstrarr2docFrag(arr,n.nodeType==8);
+ var len = frg.childNodes.length;
+ n.parentNode.replaceChild(frg,n);
+ return len-1;
+ } else return 0;
+ }
+ }
+ } else return 0;
+ } else if (n.nodeName!="math") {
+ for (i=0; i<n.childNodes.length; i++)
+ i += AMprocessNodeR(n.childNodes[i], linebreaks);
+ }
+ return 0;
+}
+
+function AMprocessNode(n, linebreaks, spanclassAM) {
+ var frag,st;
+ if (spanclassAM!=null) {
+ frag = document.getElementsByTagName("span")
+ for (var i=0;i<frag.length;i++)
+ if (frag[i].className == "AM")
+ AMprocessNodeR(frag[i],linebreaks);
+ } else {
+ try {
+ st = n.innerHTML;
+ } catch(err) {}
+ if (st==null ||
+ st.indexOf(AMdelimiter1)!=-1 || st.indexOf(AMdelimiter2)!=-1)
+ AMprocessNodeR(n,linebreaks);
+ }
+ if (isIE) { //needed to match size and font of formula to surrounding text
+ frag = document.getElementsByTagName('math');
+ for (var i=0;i<frag.length;i++) frag[i].update()
+ }
+}
+
+var AMbody;
+var AMnoMathML = false, AMtranslated = false;
+
+function translate(spanclassAM) {
+ if (!AMtranslated) { // run this only once
+ AMtranslated = true;
+ AMinitSymbols();
+ AMbody = document.getElementsByTagName("body")[0];
+ AMprocessNode(AMbody, false, spanclassAM);
+ }
+}
+
+if (isIE) { // avoid adding MathPlayer info explicitly to each webpage
+ document.write("<object id=\"mathplayer\"\
+ classid=\"clsid:32F66A20-7614-11D4-BD11-00104BD3F987\"></object>");
+ document.write("<?import namespace=\"m\" implementation=\"#mathplayer\"?>");
+}
+
+// GO1.1 Generic onload by Brothercake
+// http://www.brothercake.com/
+//onload function (replaces the onload="translate()" in the <body> tag)
+function generic()
+{
+ translate();
+};
+//setup onload function
+if(typeof window.addEventListener != 'undefined')
+{
+ //.. gecko, safari, konqueror and standard
+ window.addEventListener('load', generic, false);
+}
+else if(typeof document.addEventListener != 'undefined')
+{
+ //.. opera 7
+ document.addEventListener('load', generic, false);
+}
+else if(typeof window.attachEvent != 'undefined')
+{
+ //.. win/ie
+ window.attachEvent('onload', generic);
+}
+//** remove this condition to degrade older browsers
+else
+{
+ //.. mac/ie5 and anything else that gets this far
+ //if there's an existing onload function
+ if(typeof window.onload == 'function')
+ {
+ //store it
+ var existing = onload;
+ //add new onload handler
+ window.onload = function()
+ {
+ //call existing onload function
+ existing();
+ //call generic onload function
+ generic();
+ };
+ }
+ else
+ {
+ //setup onload function
+ window.onload = generic;
+ }
+}
diff --git a/src/Main.hs b/src/Main.hs
new file mode 100644
index 000000000..ae9f61f7f
--- /dev/null
+++ b/src/Main.hs
@@ -0,0 +1,512 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Main
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Parses command-line options and calls the appropriate readers and
+writers.
+-}
+module Main where
+import Text.Pandoc
+import Text.Pandoc.UTF8
+import Text.Pandoc.Shared ( joinWithSep )
+import Text.Regex ( mkRegex, matchRegex )
+import System.Environment ( getArgs, getProgName, getEnvironment )
+import System.Exit ( exitWith, ExitCode (..) )
+import System.Console.GetOpt
+import System.IO
+import Data.Maybe ( fromMaybe )
+import Data.List ( isPrefixOf )
+import Data.Char ( toLower )
+import Control.Monad ( (>>=) )
+
+copyrightMessage :: String
+copyrightMessage = "\nCopyright (C) 2006-7 John MacFarlane\n\
+ \Web: http://johnmacfarlane.net/pandoc\n\
+ \This is free software; see the source for copying conditions. There is no\n\
+ \warranty, not even for merchantability or fitness for a particular purpose."
+
+-- | Association list of formats and readers.
+readers :: [(String, ParserState -> String -> Pandoc)]
+readers = [("native" , readPandoc)
+ ,("markdown" , readMarkdown)
+ ,("rst" , readRST)
+ ,("html" , readHtml)
+ ,("latex" , readLaTeX)
+ ]
+
+-- | Reader for native Pandoc format.
+readPandoc :: ParserState -> String -> Pandoc
+readPandoc state input = read input
+
+-- | Association list of formats and pairs of writers and default headers.
+writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ]
+writers = [("native" , (writeDoc, ""))
+ ,("html" , (writeHtmlString, ""))
+ ,("s5" , (writeS5String, defaultS5Header))
+ ,("docbook" , (writeDocbook, defaultDocbookHeader))
+ ,("latex" , (writeLaTeX, defaultLaTeXHeader))
+ ,("context" , (writeConTeXt, defaultConTeXtHeader))
+ ,("man" , (writeMan, ""))
+ ,("markdown" , (writeMarkdown, ""))
+ ,("rst" , (writeRST, ""))
+ ,("rtf" , (writeRTF, defaultRTFHeader))
+ ]
+
+-- | Writer for Pandoc native format.
+writeDoc :: WriterOptions -> Pandoc -> String
+writeDoc options = prettyPandoc
+
+-- | Data structure for command line options.
+data Opt = Opt
+ { optPreserveTabs :: Bool -- ^ Convert tabs to spaces
+ , optTabStop :: Int -- ^ Number of spaces per tab
+ , optStandalone :: Bool -- ^ Include header, footer
+ , optReader :: String -- ^ Reader format
+ , optWriter :: String -- ^ Writer format
+ , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX
+ , optCSS :: String -- ^ CSS file to link to
+ , optTableOfContents :: Bool -- ^ Include table of contents
+ , optIncludeInHeader :: String -- ^ File to include in header
+ , optIncludeBeforeBody :: String -- ^ File to include at top of body
+ , optIncludeAfterBody :: String -- ^ File to include at end of body
+ , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT"
+ , optTitlePrefix :: String -- ^ Optional prefix for HTML title
+ , optOutputFile :: String -- ^ Name of output file
+ , optNumberSections :: Bool -- ^ Number sections in LaTeX
+ , optIncremental :: Bool -- ^ Use incremental lists in S5
+ , optSmart :: Bool -- ^ Use smart typography
+ , optUseASCIIMathML :: Bool -- ^ Use ASCIIMathML
+ , optASCIIMathMLURL :: Maybe String -- ^ URL to ASCIIMathML.js
+ , optDumpArgs :: Bool -- ^ Output command-line arguments
+ , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
+ , optStrict :: Bool -- ^ Use strict markdown syntax
+ , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , optWrapText :: Bool -- ^ Wrap text
+ }
+
+-- | Defaults for command-line options.
+defaultOpts :: Opt
+defaultOpts = Opt
+ { optPreserveTabs = False
+ , optTabStop = 4
+ , optStandalone = False
+ , optReader = "" -- null for default reader
+ , optWriter = "" -- null for default writer
+ , optParseRaw = False
+ , optCSS = ""
+ , optTableOfContents = False
+ , optIncludeInHeader = ""
+ , optIncludeBeforeBody = ""
+ , optIncludeAfterBody = ""
+ , optCustomHeader = "DEFAULT"
+ , optTitlePrefix = ""
+ , optOutputFile = "-" -- "-" means stdout
+ , optNumberSections = False
+ , optIncremental = False
+ , optSmart = False
+ , optUseASCIIMathML = False
+ , optASCIIMathMLURL = Nothing
+ , optDumpArgs = False
+ , optIgnoreArgs = False
+ , optStrict = False
+ , optReferenceLinks = False
+ , optWrapText = True
+ }
+
+-- | A list of functions, each transforming the options data structure
+-- in response to a command-line option.
+options :: [OptDescr (Opt -> IO Opt)]
+options =
+ [ Option "fr" ["from","read"]
+ (ReqArg
+ (\arg opt -> return opt { optReader = map toLower arg })
+ "FORMAT")
+ "" -- ("(" ++ (joinWithSep ", " $ map fst readers) ++ ")")
+
+ , Option "tw" ["to","write"]
+ (ReqArg
+ (\arg opt -> return opt { optWriter = map toLower arg })
+ "FORMAT")
+ "" -- ("(" ++ (joinWithSep ", " $ map fst writers) ++ ")")
+
+ , Option "s" ["standalone"]
+ (NoArg
+ (\opt -> return opt { optStandalone = True }))
+ "" -- "Include needed header and footer on output"
+
+ , Option "o" ["output"]
+ (ReqArg
+ (\arg opt -> return opt { optOutputFile = arg })
+ "FILENAME")
+ "" -- "Name of output file"
+
+ , Option "p" ["preserve-tabs"]
+ (NoArg
+ (\opt -> return opt { optPreserveTabs = True }))
+ "" -- "Preserve tabs instead of converting to spaces"
+
+ , Option "" ["tab-stop"]
+ (ReqArg
+ (\arg opt -> return opt { optTabStop = (read arg) } )
+ "TABSTOP")
+ "" -- "Tab stop (default 4)"
+
+ , Option "" ["strict"]
+ (NoArg
+ (\opt -> return opt { optStrict = True } ))
+ "" -- "Disable markdown syntax extensions"
+
+ , Option "" ["reference-links"]
+ (NoArg
+ (\opt -> return opt { optReferenceLinks = True } ))
+ "" -- "Use reference links in parsing HTML"
+
+ , Option "R" ["parse-raw"]
+ (NoArg
+ (\opt -> return opt { optParseRaw = True }))
+ "" -- "Parse untranslatable HTML codes and LaTeX environments as raw"
+
+ , Option "S" ["smart"]
+ (NoArg
+ (\opt -> return opt { optSmart = True }))
+ "" -- "Use smart quotes, dashes, and ellipses"
+
+ , Option "m" ["asciimathml"]
+ (OptArg
+ (\arg opt -> return opt { optUseASCIIMathML = True,
+ optASCIIMathMLURL = arg,
+ optStandalone = True })
+ "URL")
+ "" -- "Use ASCIIMathML script in html output"
+
+ , Option "i" ["incremental"]
+ (NoArg
+ (\opt -> return opt { optIncremental = True }))
+ "" -- "Make list items display incrementally in S5"
+
+ , Option "N" ["number-sections"]
+ (NoArg
+ (\opt -> return opt { optNumberSections = True }))
+ "" -- "Number sections in LaTeX"
+
+ , Option "" ["no-wrap"]
+ (NoArg
+ (\opt -> return opt { optWrapText = False }))
+ "" -- "Do not wrap text in output"
+
+ , Option "" ["toc", "table-of-contents"]
+ (NoArg
+ (\opt -> return opt { optTableOfContents = True }))
+ "" -- "Include table of contents"
+
+ , Option "c" ["css"]
+ (ReqArg
+ (\arg opt -> return opt { optCSS = arg,
+ optStandalone = True })
+ "CSS")
+ "" -- "Link to CSS style sheet"
+
+ , Option "H" ["include-in-header"]
+ (ReqArg
+ (\arg opt -> do
+ text <- readFile arg
+ return opt { optIncludeInHeader = fromUTF8 text,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "File to include at end of header (implies -s)"
+
+ , Option "B" ["include-before-body"]
+ (ReqArg
+ (\arg opt -> do
+ text <- readFile arg
+ return opt { optIncludeBeforeBody = fromUTF8 text })
+ "FILENAME")
+ "" -- "File to include before document body"
+
+ , Option "A" ["include-after-body"]
+ (ReqArg
+ (\arg opt -> do
+ text <- readFile arg
+ return opt { optIncludeAfterBody = fromUTF8 text })
+ "FILENAME")
+ "" -- "File to include after document body"
+
+ , Option "C" ["custom-header"]
+ (ReqArg
+ (\arg opt -> do
+ text <- readFile arg
+ return opt { optCustomHeader = fromUTF8 text,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "File to use for custom header (implies -s)"
+
+ , Option "T" ["title-prefix"]
+ (ReqArg
+ (\arg opt -> return opt { optTitlePrefix = arg,
+ optStandalone = True })
+ "STRING")
+ "" -- "String to prefix to HTML window title"
+
+ , Option "D" ["print-default-header"]
+ (ReqArg
+ (\arg opt -> do
+ let header = case (lookup arg writers) of
+ Just (writer, head) -> head
+ Nothing -> error ("Unknown reader: " ++ arg)
+ hPutStr stdout header
+ exitWith ExitSuccess)
+ "FORMAT")
+ "" -- "Print default header for FORMAT"
+
+ , Option "" ["dump-args"]
+ (NoArg
+ (\opt -> return opt { optDumpArgs = True }))
+ "" -- "Print output filename and arguments to stdout."
+
+ , Option "" ["ignore-args"]
+ (NoArg
+ (\opt -> return opt { optIgnoreArgs = True }))
+ "" -- "Ignore command-line arguments."
+
+ , Option "v" ["version"]
+ (NoArg
+ (\_ -> do
+ prg <- getProgName
+ hPutStrLn stderr (prg ++ " " ++ pandocVersion ++
+ copyrightMessage)
+ exitWith $ ExitFailure 4))
+ "" -- "Print version"
+
+ , Option "h" ["help"]
+ (NoArg
+ (\_ -> do
+ prg <- getProgName
+ hPutStr stderr (usageMessage prg options)
+ exitWith $ ExitFailure 2))
+ "" -- "Show help"
+ ]
+
+-- Returns usage message
+usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
+usageMessage programName options = usageInfo
+ (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
+ (joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++
+ (joinWithSep ", " $ map fst writers) ++ "\nOptions:")
+ options
+
+-- Determine default reader based on source file extensions
+defaultReaderName :: [String] -> String
+defaultReaderName [] = "markdown"
+defaultReaderName (x:xs) =
+ let x' = map toLower x in
+ case (matchRegex (mkRegex ".*\\.(.*)") x') of
+ Nothing -> defaultReaderName xs -- no extension
+ Just ["xhtml"] -> "html"
+ Just ["html"] -> "html"
+ Just ["htm"] -> "html"
+ Just ["tex"] -> "latex"
+ Just ["latex"] -> "latex"
+ Just ["ltx"] -> "latex"
+ Just ["rst"] -> "rst"
+ Just ["native"] -> "native"
+ Just _ -> "markdown"
+
+-- Determine default writer based on output file extension
+defaultWriterName :: String -> String
+defaultWriterName "-" = "html" -- no output file
+defaultWriterName x =
+ let x' = map toLower x in
+ case (matchRegex (mkRegex ".*\\.(.*)") x') of
+ Nothing -> "markdown" -- no extension
+ Just [""] -> "markdown" -- empty extension
+ Just ["tex"] -> "latex"
+ Just ["latex"] -> "latex"
+ Just ["ltx"] -> "latex"
+ Just ["context"] -> "context"
+ Just ["ctx"] -> "context"
+ Just ["rtf"] -> "rtf"
+ Just ["rst"] -> "rst"
+ Just ["s5"] -> "s5"
+ Just ["native"] -> "native"
+ Just ["txt"] -> "markdown"
+ Just ["text"] -> "markdown"
+ Just ["md"] -> "markdown"
+ Just ["markdown"] -> "markdown"
+ Just ["db"] -> "docbook"
+ Just ["xml"] -> "docbook"
+ Just ["sgml"] -> "docbook"
+ Just [[x]] | x `elem` ['1'..'9'] -> "man"
+ Just _ -> "html"
+
+main = do
+
+ rawArgs <- getArgs
+ prg <- getProgName
+ let compatMode = (prg == "hsmarkdown")
+
+ let (actions, args, errors) = if compatMode
+ then ([], rawArgs, [])
+ else getOpt Permute options rawArgs
+
+ if (not (null errors))
+ then do
+ name <- getProgName
+ mapM (\e -> hPutStrLn stderr e) errors
+ hPutStr stderr (usageMessage name options)
+ exitWith $ ExitFailure 2
+ else
+ return ()
+
+ let defaultOpts' = if compatMode
+ then defaultOpts { optReader = "markdown"
+ , optWriter = "html"
+ , optStrict = True }
+ else defaultOpts
+
+ -- thread option data structure through all supplied option actions
+ opts <- foldl (>>=) (return defaultOpts') actions
+
+ let Opt { optPreserveTabs = preserveTabs
+ , optTabStop = tabStop
+ , optStandalone = standalone
+ , optReader = readerName
+ , optWriter = writerName
+ , optParseRaw = parseRaw
+ , optCSS = css
+ , optTableOfContents = toc
+ , optIncludeInHeader = includeHeader
+ , optIncludeBeforeBody = includeBefore
+ , optIncludeAfterBody = includeAfter
+ , optCustomHeader = customHeader
+ , optTitlePrefix = titlePrefix
+ , optOutputFile = outputFile
+ , optNumberSections = numberSections
+ , optIncremental = incremental
+ , optSmart = smart
+ , optUseASCIIMathML = useAsciiMathML
+ , optASCIIMathMLURL = asciiMathMLURL
+ , optDumpArgs = dumpArgs
+ , optIgnoreArgs = ignoreArgs
+ , optStrict = strict
+ , optReferenceLinks = referenceLinks
+ , optWrapText = wrap
+ } = opts
+
+ if dumpArgs
+ then do
+ hPutStrLn stdout outputFile
+ mapM (\arg -> hPutStrLn stdout arg) args
+ exitWith $ ExitSuccess
+ else return ()
+
+ let sources = if ignoreArgs then [] else args
+
+ -- assign reader and writer based on options and filenames
+ let readerName' = if null readerName
+ then defaultReaderName sources
+ else readerName
+
+ let writerName' = if null writerName
+ then defaultWriterName outputFile
+ else writerName
+
+ reader <- case (lookup readerName' readers) of
+ Just r -> return r
+ Nothing -> error ("Unknown reader: " ++ readerName')
+
+ (writer, defaultHeader) <- case (lookup writerName' writers) of
+ Just (w,h) -> return (w, h)
+ Nothing -> error ("Unknown writer: " ++ writerName')
+
+ output <- if (outputFile == "-")
+ then return stdout
+ else openFile outputFile WriteMode
+
+ environment <- getEnvironment
+ let columns = case lookup "COLUMNS" environment of
+ Just cols -> read cols
+ Nothing -> stateColumns defaultParserState
+
+ let tabFilter _ [] = ""
+ tabFilter _ ('\n':xs) = '\n':(tabFilter tabStop xs)
+ -- remove DOS line endings
+ tabFilter _ ('\r':'\n':xs) = '\n':(tabFilter tabStop xs)
+ tabFilter _ ('\r':xs) = '\n':(tabFilter tabStop xs)
+ tabFilter spsToNextStop ('\t':xs) =
+ if preserveTabs
+ then '\t':(tabFilter tabStop xs)
+ else replicate spsToNextStop ' ' ++ tabFilter tabStop xs
+ tabFilter 1 (x:xs) =
+ x:(tabFilter tabStop xs)
+ tabFilter spsToNextStop (x:xs) =
+ x:(tabFilter (spsToNextStop - 1) xs)
+
+ let startParserState =
+ defaultParserState { stateParseRaw = parseRaw,
+ stateTabStop = tabStop,
+ stateStandalone = standalone && (not strict),
+ stateSmart = smart || writerName' `elem`
+ ["latex", "context"],
+ stateColumns = columns,
+ stateStrict = strict }
+ let csslink = if (css == "")
+ then ""
+ else "<link rel=\"stylesheet\" href=\"" ++ css ++
+ "\" type=\"text/css\" media=\"all\" />\n"
+ let header = (if (customHeader == "DEFAULT")
+ then defaultHeader
+ else customHeader) ++ csslink ++ includeHeader
+ let writerOptions = WriterOptions { writerStandalone = standalone &&
+ (not strict),
+ writerHeader = header,
+ writerTitlePrefix = titlePrefix,
+ writerTabStop = tabStop,
+ writerTableOfContents = toc &&
+ (not strict) &&
+ writerName/="s5",
+ writerUseASCIIMathML = useAsciiMathML,
+ writerASCIIMathMLURL = asciiMathMLURL,
+ writerS5 = (writerName=="s5"),
+ writerIgnoreNotes = False,
+ writerIncremental = incremental,
+ writerNumberSections = numberSections,
+ writerIncludeBefore = includeBefore,
+ writerIncludeAfter = includeAfter,
+ writerStrictMarkdown = strict,
+ writerReferenceLinks = referenceLinks,
+ writerWrapText = wrap }
+
+ (readSources sources) >>= (hPutStrLn output . toUTF8 .
+ (writer writerOptions) .
+ (reader startParserState) . tabFilter tabStop .
+ fromUTF8 . (joinWithSep "\n")) >>
+ hClose output
+
+ where
+ readSources [] = mapM readSource ["-"]
+ readSources sources = mapM readSource sources
+ readSource "-" = getContents
+ readSource source = readFile source
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
new file mode 100644
index 000000000..7633bf7ef
--- /dev/null
+++ b/src/Text/Pandoc.hs
@@ -0,0 +1,110 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This helper module exports the main writers, readers, and data
+structure definitions from the Pandoc libraries.
+
+A typical application will chain together a reader and a writer
+to convert strings from one format to another. For example, the
+following simple program will act as a filter converting markdown
+fragments to reStructuredText, using reference-style links instead of
+inline links:
+
+> module Main where
+> import Text.Pandoc
+>
+> markdownToRST :: String -> String
+> markdownToRST = toUTF8 .
+> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
+> (readMarkdown defaultParserState) . fromUTF8
+>
+> main = interact markdownToRST
+
+-}
+
+module Text.Pandoc
+ (
+ -- * Definitions
+ module Text.Pandoc.Definition
+ -- * Readers: converting /to/ Pandoc format
+ , readMarkdown
+ , readRST
+ , readLaTeX
+ , readHtml
+ -- * Parser state used in readers
+ , ParserState (..)
+ , defaultParserState
+ , ParserContext (..)
+ , QuoteContext (..)
+ , KeyTable
+ , NoteTable
+ , HeaderType (..)
+ -- * Writers: converting /from/ Pandoc format
+ , writeMarkdown
+ , writeRST
+ , writeLaTeX
+ , writeConTeXt
+ , writeHtml
+ , writeHtmlString
+ , writeS5
+ , writeS5String
+ , writeDocbook
+ , writeMan
+ , writeRTF
+ , prettyPandoc
+ -- * Writer options used in writers
+ , WriterOptions (..)
+ , defaultWriterOptions
+ -- * Default headers for various output formats
+ , module Text.Pandoc.Writers.DefaultHeaders
+ -- * Functions for converting to and from UTF-8
+ , module Text.Pandoc.UTF8
+ -- * Version
+ , pandocVersion
+ ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Readers.Markdown
+import Text.Pandoc.Readers.RST
+import Text.Pandoc.Readers.LaTeX
+import Text.Pandoc.Readers.HTML
+import Text.Pandoc.Writers.Markdown
+import Text.Pandoc.Writers.RST
+import Text.Pandoc.Writers.LaTeX
+import Text.Pandoc.Writers.ConTeXt
+import Text.Pandoc.Writers.HTML
+import Text.Pandoc.Writers.S5
+import Text.Pandoc.Writers.Docbook
+import Text.Pandoc.Writers.Man
+import Text.Pandoc.Writers.RTF
+import Text.Pandoc.Writers.DefaultHeaders
+import Text.Pandoc.UTF8
+import Text.Pandoc.Shared
+
+-- | Version number of pandoc library.
+pandocVersion :: String
+pandocVersion = "0.45"
diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs
new file mode 100644
index 000000000..cfc22cb3e
--- /dev/null
+++ b/src/Text/Pandoc/Blocks.hs
@@ -0,0 +1,145 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Blocks
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for the manipulation of fixed-width blocks of text.
+These are used in the construction of plain-text tables.
+-}
+
+module Text.Pandoc.Blocks
+ (
+ TextBlock (..),
+ docToBlock,
+ blockToDoc,
+ widthOfBlock,
+ heightOfBlock,
+ hcatBlocks,
+ hsepBlocks,
+ centerAlignBlock,
+ leftAlignBlock,
+ rightAlignBlock
+ )
+where
+import Text.PrettyPrint
+import Data.List ( intersperse )
+
+-- | A fixed-width block of text. Parameters are width of block,
+-- height of block, and list of lines.
+data TextBlock = TextBlock Int Int [String]
+instance Show TextBlock where
+ show x = show $ blockToDoc x
+
+-- | Break lines in a list of lines so that none are greater than
+-- a given width.
+breakLines :: Int -- ^ Maximum length of lines.
+ -> [String] -- ^ List of lines.
+ -> [String]
+breakLines width [] = []
+breakLines width (l:ls) =
+ if length l > width
+ then (take width l):(breakLines width ((drop width l):ls))
+ else l:(breakLines width ls)
+
+-- | Convert a @Doc@ element into a @TextBlock@ with a specified width.
+docToBlock :: Int -- ^ Width of text block.
+ -> Doc -- ^ @Doc@ to convert.
+ -> TextBlock
+docToBlock width doc =
+ let rendered = renderStyle (style {lineLength = width,
+ ribbonsPerLine = 1}) doc
+ lns = breakLines width $ lines rendered
+ in TextBlock width (length lns) lns
+
+-- | Convert a @TextBlock@ to a @Doc@ element.
+blockToDoc :: TextBlock -> Doc
+blockToDoc (TextBlock _ _ lns) =
+ if null lns
+ then empty
+ else vcat $ map text lns
+
+-- | Returns width of a @TextBlock@ (number of columns).
+widthOfBlock :: TextBlock -> Int
+widthOfBlock (TextBlock width _ _) = width
+
+-- | Returns height of a @TextBlock@ (number of rows).
+heightOfBlock :: TextBlock -> Int
+heightOfBlock (TextBlock _ height _) = height
+
+-- | Pads a string out to a given width using spaces.
+hPad :: Int -- ^ Desired width.
+ -> String -- ^ String to pad.
+ -> String
+hPad width line =
+ let lineLength = length line
+ in if lineLength <= width
+ then line ++ replicate (width - lineLength) ' '
+ else take width line
+
+-- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in
+-- which they appear side by side.
+hcatBlocks :: [TextBlock] -> TextBlock
+hcatBlocks [] = TextBlock 0 0 []
+hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd.
+hcatBlocks ((TextBlock width1 height1 lns1):xs) =
+ let (TextBlock width2 height2 lns2) = hcatBlocks xs
+ height = max height1 height2
+ width = width1 + width2
+ lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) ""
+ lns2' = lns2 ++ replicate (height - height2) ""
+ lns = zipWith (++) lns1' lns2'
+ in TextBlock width height lns
+
+-- | Like @hcatBlocks@, but inserts space between the @TextBlock@s.
+hsepBlocks :: [TextBlock] -> TextBlock
+hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "]))
+
+isWhitespace x = x `elem` " \t"
+
+-- | Left-aligns the contents of a @TextBlock@ within the block.
+leftAlignBlock :: TextBlock -> TextBlock
+leftAlignBlock (TextBlock width height lns) =
+ TextBlock width height $ map (dropWhile isWhitespace) lns
+
+-- | Right-aligns the contents of a @TextBlock@ within the block.
+rightAlignBlock :: TextBlock -> TextBlock
+rightAlignBlock (TextBlock width height lns) =
+ let rightAlignLine ln =
+ let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln
+ in reverse (rest ++ spaces)
+ in TextBlock width height $ map rightAlignLine lns
+
+-- | Centers the contents of a @TextBlock@ within the block.
+centerAlignBlock :: TextBlock -> TextBlock
+centerAlignBlock (TextBlock width height lns) =
+ let centerAlignLine ln =
+ let ln' = hPad width ln
+ (startSpaces, rest) = span isWhitespace ln'
+ endSpaces = takeWhile isWhitespace (reverse ln')
+ numSpaces = length (startSpaces ++ endSpaces)
+ startSpaces' = replicate (quot numSpaces 2) ' '
+ in startSpaces' ++ rest
+ in TextBlock width height $ map centerAlignLine lns
+
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
new file mode 100644
index 000000000..466f5d8f4
--- /dev/null
+++ b/src/Text/Pandoc/CharacterReferences.hs
@@ -0,0 +1,327 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.CharacterReferences
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for parsing character references.
+-}
+module Text.Pandoc.CharacterReferences (
+ characterReference,
+ decodeCharacterReferences,
+ ) where
+import Data.Char ( chr )
+import Text.ParserCombinators.Parsec
+import qualified Data.Map as Map
+
+-- | Parse character entity.
+characterReference :: GenParser Char st Char
+characterReference = try $ do
+ st <- char '&'
+ character <- numRef <|> entity
+ end <- char ';'
+ return character
+
+numRef :: GenParser Char st Char
+numRef = do
+ char '#'
+ num <- hexNum <|> decNum
+ return $ chr $ num
+
+hexNum :: GenParser Char st Int
+hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++)
+
+decNum :: GenParser Char st Int
+decNum = many1 digit >>= return . read
+
+entity :: GenParser Char st Char
+entity = do
+ body <- many1 alphaNum
+ return $ Map.findWithDefault '?' body entityTable
+
+-- | Convert entities in a string to characters.
+decodeCharacterReferences :: String -> String
+decodeCharacterReferences str =
+ case parse (many (characterReference <|> anyChar)) str str of
+ Left err -> error $ "\nError: " ++ show err
+ Right result -> result
+
+entityTable :: Map.Map String Char
+entityTable = Map.fromList entityTableList
+
+entityTableList :: [(String, Char)]
+entityTableList = [
+ ("quot", chr 34),
+ ("amp", chr 38),
+ ("lt", chr 60),
+ ("gt", chr 62),
+ ("nbsp", chr 160),
+ ("iexcl", chr 161),
+ ("cent", chr 162),
+ ("pound", chr 163),
+ ("curren", chr 164),
+ ("yen", chr 165),
+ ("brvbar", chr 166),
+ ("sect", chr 167),
+ ("uml", chr 168),
+ ("copy", chr 169),
+ ("ordf", chr 170),
+ ("laquo", chr 171),
+ ("not", chr 172),
+ ("shy", chr 173),
+ ("reg", chr 174),
+ ("macr", chr 175),
+ ("deg", chr 176),
+ ("plusmn", chr 177),
+ ("sup2", chr 178),
+ ("sup3", chr 179),
+ ("acute", chr 180),
+ ("micro", chr 181),
+ ("para", chr 182),
+ ("middot", chr 183),
+ ("cedil", chr 184),
+ ("sup1", chr 185),
+ ("ordm", chr 186),
+ ("raquo", chr 187),
+ ("frac14", chr 188),
+ ("frac12", chr 189),
+ ("frac34", chr 190),
+ ("iquest", chr 191),
+ ("Agrave", chr 192),
+ ("Aacute", chr 193),
+ ("Acirc", chr 194),
+ ("Atilde", chr 195),
+ ("Auml", chr 196),
+ ("Aring", chr 197),
+ ("AElig", chr 198),
+ ("Ccedil", chr 199),
+ ("Egrave", chr 200),
+ ("Eacute", chr 201),
+ ("Ecirc", chr 202),
+ ("Euml", chr 203),
+ ("Igrave", chr 204),
+ ("Iacute", chr 205),
+ ("Icirc", chr 206),
+ ("Iuml", chr 207),
+ ("ETH", chr 208),
+ ("Ntilde", chr 209),
+ ("Ograve", chr 210),
+ ("Oacute", chr 211),
+ ("Ocirc", chr 212),
+ ("Otilde", chr 213),
+ ("Ouml", chr 214),
+ ("times", chr 215),
+ ("Oslash", chr 216),
+ ("Ugrave", chr 217),
+ ("Uacute", chr 218),
+ ("Ucirc", chr 219),
+ ("Uuml", chr 220),
+ ("Yacute", chr 221),
+ ("THORN", chr 222),
+ ("szlig", chr 223),
+ ("agrave", chr 224),
+ ("aacute", chr 225),
+ ("acirc", chr 226),
+ ("atilde", chr 227),
+ ("auml", chr 228),
+ ("aring", chr 229),
+ ("aelig", chr 230),
+ ("ccedil", chr 231),
+ ("egrave", chr 232),
+ ("eacute", chr 233),
+ ("ecirc", chr 234),
+ ("euml", chr 235),
+ ("igrave", chr 236),
+ ("iacute", chr 237),
+ ("icirc", chr 238),
+ ("iuml", chr 239),
+ ("eth", chr 240),
+ ("ntilde", chr 241),
+ ("ograve", chr 242),
+ ("oacute", chr 243),
+ ("ocirc", chr 244),
+ ("otilde", chr 245),
+ ("ouml", chr 246),
+ ("divide", chr 247),
+ ("oslash", chr 248),
+ ("ugrave", chr 249),
+ ("uacute", chr 250),
+ ("ucirc", chr 251),
+ ("uuml", chr 252),
+ ("yacute", chr 253),
+ ("thorn", chr 254),
+ ("yuml", chr 255),
+ ("OElig", chr 338),
+ ("oelig", chr 339),
+ ("Scaron", chr 352),
+ ("scaron", chr 353),
+ ("Yuml", chr 376),
+ ("fnof", chr 402),
+ ("circ", chr 710),
+ ("tilde", chr 732),
+ ("Alpha", chr 913),
+ ("Beta", chr 914),
+ ("Gamma", chr 915),
+ ("Delta", chr 916),
+ ("Epsilon", chr 917),
+ ("Zeta", chr 918),
+ ("Eta", chr 919),
+ ("Theta", chr 920),
+ ("Iota", chr 921),
+ ("Kappa", chr 922),
+ ("Lambda", chr 923),
+ ("Mu", chr 924),
+ ("Nu", chr 925),
+ ("Xi", chr 926),
+ ("Omicron", chr 927),
+ ("Pi", chr 928),
+ ("Rho", chr 929),
+ ("Sigma", chr 931),
+ ("Tau", chr 932),
+ ("Upsilon", chr 933),
+ ("Phi", chr 934),
+ ("Chi", chr 935),
+ ("Psi", chr 936),
+ ("Omega", chr 937),
+ ("alpha", chr 945),
+ ("beta", chr 946),
+ ("gamma", chr 947),
+ ("delta", chr 948),
+ ("epsilon", chr 949),
+ ("zeta", chr 950),
+ ("eta", chr 951),
+ ("theta", chr 952),
+ ("iota", chr 953),
+ ("kappa", chr 954),
+ ("lambda", chr 955),
+ ("mu", chr 956),
+ ("nu", chr 957),
+ ("xi", chr 958),
+ ("omicron", chr 959),
+ ("pi", chr 960),
+ ("rho", chr 961),
+ ("sigmaf", chr 962),
+ ("sigma", chr 963),
+ ("tau", chr 964),
+ ("upsilon", chr 965),
+ ("phi", chr 966),
+ ("chi", chr 967),
+ ("psi", chr 968),
+ ("omega", chr 969),
+ ("thetasym", chr 977),
+ ("upsih", chr 978),
+ ("piv", chr 982),
+ ("ensp", chr 8194),
+ ("emsp", chr 8195),
+ ("thinsp", chr 8201),
+ ("zwnj", chr 8204),
+ ("zwj", chr 8205),
+ ("lrm", chr 8206),
+ ("rlm", chr 8207),
+ ("ndash", chr 8211),
+ ("mdash", chr 8212),
+ ("lsquo", chr 8216),
+ ("rsquo", chr 8217),
+ ("sbquo", chr 8218),
+ ("ldquo", chr 8220),
+ ("rdquo", chr 8221),
+ ("bdquo", chr 8222),
+ ("dagger", chr 8224),
+ ("Dagger", chr 8225),
+ ("bull", chr 8226),
+ ("hellip", chr 8230),
+ ("permil", chr 8240),
+ ("prime", chr 8242),
+ ("Prime", chr 8243),
+ ("lsaquo", chr 8249),
+ ("rsaquo", chr 8250),
+ ("oline", chr 8254),
+ ("frasl", chr 8260),
+ ("euro", chr 8364),
+ ("image", chr 8465),
+ ("weierp", chr 8472),
+ ("real", chr 8476),
+ ("trade", chr 8482),
+ ("alefsym", chr 8501),
+ ("larr", chr 8592),
+ ("uarr", chr 8593),
+ ("rarr", chr 8594),
+ ("darr", chr 8595),
+ ("harr", chr 8596),
+ ("crarr", chr 8629),
+ ("lArr", chr 8656),
+ ("uArr", chr 8657),
+ ("rArr", chr 8658),
+ ("dArr", chr 8659),
+ ("hArr", chr 8660),
+ ("forall", chr 8704),
+ ("part", chr 8706),
+ ("exist", chr 8707),
+ ("empty", chr 8709),
+ ("nabla", chr 8711),
+ ("isin", chr 8712),
+ ("notin", chr 8713),
+ ("ni", chr 8715),
+ ("prod", chr 8719),
+ ("sum", chr 8721),
+ ("minus", chr 8722),
+ ("lowast", chr 8727),
+ ("radic", chr 8730),
+ ("prop", chr 8733),
+ ("infin", chr 8734),
+ ("ang", chr 8736),
+ ("and", chr 8743),
+ ("or", chr 8744),
+ ("cap", chr 8745),
+ ("cup", chr 8746),
+ ("int", chr 8747),
+ ("there4", chr 8756),
+ ("sim", chr 8764),
+ ("cong", chr 8773),
+ ("asymp", chr 8776),
+ ("ne", chr 8800),
+ ("equiv", chr 8801),
+ ("le", chr 8804),
+ ("ge", chr 8805),
+ ("sub", chr 8834),
+ ("sup", chr 8835),
+ ("nsub", chr 8836),
+ ("sube", chr 8838),
+ ("supe", chr 8839),
+ ("oplus", chr 8853),
+ ("otimes", chr 8855),
+ ("perp", chr 8869),
+ ("sdot", chr 8901),
+ ("lceil", chr 8968),
+ ("rceil", chr 8969),
+ ("lfloor", chr 8970),
+ ("rfloor", chr 8971),
+ ("lang", chr 9001),
+ ("rang", chr 9002),
+ ("loz", chr 9674),
+ ("spades", chr 9824),
+ ("clubs", chr 9827),
+ ("hearts", chr 9829),
+ ("diams", chr 9830)
+ ]
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
new file mode 100644
index 000000000..7d1125c5a
--- /dev/null
+++ b/src/Text/Pandoc/Definition.hs
@@ -0,0 +1,116 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Definition
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definition of 'Pandoc' data structure for format-neutral representation
+of documents.
+-}
+module Text.Pandoc.Definition where
+
+data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show)
+
+-- | Bibliographic information for the document: title (list of 'Inline'),
+-- authors (list of strings), date (string).
+data Meta = Meta [Inline] -- title
+ [String] -- authors
+ String -- date
+ deriving (Eq, Show, Read)
+
+-- | Alignment of a table column.
+data Alignment = AlignLeft
+ | AlignRight
+ | AlignCenter
+ | AlignDefault deriving (Eq, Show, Read)
+
+-- | List attributes.
+type ListAttributes = (Int, ListNumberStyle, ListNumberDelim)
+
+-- | Style of list numbers.
+data ListNumberStyle = DefaultStyle
+ | Decimal
+ | LowerRoman
+ | UpperRoman
+ | LowerAlpha
+ | UpperAlpha deriving (Eq, Show, Read)
+
+-- | Delimiter of list numbers.
+data ListNumberDelim = DefaultDelim
+ | Period
+ | OneParen
+ | TwoParens deriving (Eq, Show, Read)
+
+-- | Block element.
+data Block
+ = Plain [Inline] -- ^ Plain text, not a paragraph
+ | Para [Inline] -- ^ Paragraph
+ | CodeBlock String -- ^ Code block (literal)
+ | RawHtml String -- ^ Raw HTML block (literal)
+ | BlockQuote [Block] -- ^ Block quote (list of blocks)
+ | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes
+ -- and a list of items, each a list of blocks)
+ | BulletList [[Block]] -- ^ Bullet list (list of items, each
+ -- a list of blocks)
+ | DefinitionList [([Inline],[Block])] -- ^ Definition list
+ -- (list of items, each a pair of an inline list,
+ -- the term, and a block list)
+ | Header Int [Inline] -- ^ Header - level (integer) and text (inlines)
+ | HorizontalRule -- ^ Horizontal rule
+ | Table [Inline] [Alignment] [Float] [[Block]] [[[Block]]] -- ^ Table,
+ -- with caption, column alignments,
+ -- relative column widths, column headers
+ -- (each a list of blocks), and rows
+ -- (each a list of lists of blocks)
+ | Null -- ^ Nothing
+ deriving (Eq, Read, Show)
+
+-- | Type of quotation marks to use in Quoted inline.
+data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read)
+
+type Target = (String, String) -- ^ Link target (URL, title)
+
+-- | Inline elements.
+data Inline
+ = Str String -- ^ Text (string)
+ | Emph [Inline] -- ^ Emphasized text (list of inlines)
+ | Strong [Inline] -- ^ Strongly emphasized text (list of inlines)
+ | Strikeout [Inline] -- ^ Strikeout text (list of inlines)
+ | Superscript [Inline] -- ^ Superscripted text (list of inlines)
+ | Subscript [Inline] -- ^ Subscripted text (list of inlines)
+ | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines)
+ | Code String -- ^ Inline code (literal)
+ | Space -- ^ Inter-word space
+ | EmDash -- ^ Em dash
+ | EnDash -- ^ En dash
+ | Apostrophe -- ^ Apostrophe
+ | Ellipses -- ^ Ellipses
+ | LineBreak -- ^ Hard line break
+ | TeX String -- ^ LaTeX code (literal)
+ | HtmlInline String -- ^ HTML code (literal)
+ | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
+ | Image [Inline] Target -- ^ Image: alt text (list of inlines), target
+ -- and target
+ | Note [Block] -- ^ Footnote or endnote
+ deriving (Show, Eq, Read)
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
new file mode 100644
index 000000000..70a071152
--- /dev/null
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -0,0 +1,496 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.HTML
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of HTML to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.HTML (
+ readHtml,
+ rawHtmlInline,
+ rawHtmlBlock,
+ anyHtmlBlockTag,
+ anyHtmlInlineTag,
+ anyHtmlTag,
+ anyHtmlEndTag,
+ htmlEndTag,
+ extractTagType,
+ htmlBlockElement
+ ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.CharacterReferences ( characterReference,
+ decodeCharacterReferences )
+import Data.Maybe ( fromMaybe )
+import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf )
+import Data.Char ( toUpper, toLower, isAlphaNum )
+
+-- | Convert HTML-formatted string to 'Pandoc' document.
+readHtml :: ParserState -- ^ Parser state
+ -> String -- ^ String to parse
+ -> Pandoc
+readHtml = readWith parseHtml
+
+--
+-- Constants
+--
+
+eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
+ "map", "area", "object", "script"]
+
+inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
+ "br", "cite", "code", "dfn", "em", "font", "i", "img",
+ "input", "kbd", "label", "q", "s", "samp", "select",
+ "small", "span", "strike", "strong", "sub", "sup",
+ "textarea", "tt", "u", "var"] ++ eitherBlockOrInline
+
+blockHtmlTags = ["address", "blockquote", "center", "dir", "div",
+ "dl", "fieldset", "form", "h1", "h2", "h3", "h4",
+ "h5", "h6", "hr", "isindex", "menu", "noframes",
+ "noscript", "ol", "p", "pre", "table", "ul", "dd",
+ "dt", "frameset", "li", "tbody", "td", "tfoot",
+ "th", "thead", "tr"] ++ eitherBlockOrInline
+
+--
+-- HTML utility functions
+--
+
+-- | Read blocks until end tag.
+blocksTilEnd tag = do
+ blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
+ return $ filter (/= Null) blocks
+
+-- | Read inlines until end tag.
+inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
+
+-- | Parse blocks between open and close tag.
+blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
+
+-- | Parse inlines between open and close tag.
+inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
+
+-- | Extract type from a tag: e.g. @br@ from @\<br\>@
+extractTagType :: String -> String
+extractTagType ('<':rest) =
+ let isSpaceOrSlash c = c `elem` "/ \n\t" in
+ map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
+extractTagType _ = ""
+
+-- | Parse any HTML tag (opening or self-closing) and return text of tag
+anyHtmlTag = try $ do
+ char '<'
+ spaces
+ tag <- many1 alphaNum
+ attribs <- many htmlAttribute
+ spaces
+ ender <- option "" (string "/")
+ let ender' = if null ender then "" else " /"
+ spaces
+ char '>'
+ return $ "<" ++ tag ++
+ concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
+
+anyHtmlEndTag = try $ do
+ char '<'
+ spaces
+ char '/'
+ spaces
+ tagType <- many1 alphaNum
+ spaces
+ char '>'
+ return $ "</" ++ tagType ++ ">"
+
+htmlTag :: String -> GenParser Char st (String, [(String, String)])
+htmlTag tag = try $ do
+ char '<'
+ spaces
+ stringAnyCase tag
+ attribs <- many htmlAttribute
+ spaces
+ optional (string "/")
+ spaces
+ char '>'
+ return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
+
+-- parses a quoted html attribute value
+quoted quoteChar = do
+ result <- between (char quoteChar) (char quoteChar)
+ (many (noneOf [quoteChar]))
+ return (result, [quoteChar])
+
+htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
+
+-- minimized boolean attribute
+htmlMinimizedAttribute = try $ do
+ many1 space
+ name <- many1 (choice [letter, oneOf ".-_:"])
+ return (name, name, name)
+
+htmlRegularAttribute = try $ do
+ many1 space
+ name <- many1 (choice [letter, oneOf ".-_:"])
+ spaces
+ char '='
+ spaces
+ (content, quoteStr) <- choice [ (quoted '\''),
+ (quoted '"'),
+ (do
+ a <- many (alphaNum <|> (oneOf "-._:"))
+ return (a,"")) ]
+ return (name, content,
+ (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
+
+-- | Parse an end tag of type 'tag'
+htmlEndTag tag = try $ do
+ char '<'
+ spaces
+ char '/'
+ spaces
+ stringAnyCase tag
+ spaces
+ char '>'
+ return $ "</" ++ tag ++ ">"
+
+-- | Returns @True@ if the tag is (or can be) an inline tag.
+isInline tag = (extractTagType tag) `elem` inlineHtmlTags
+
+-- | Returns @True@ if the tag is (or can be) a block tag.
+isBlock tag = (extractTagType tag) `elem` blockHtmlTags
+
+anyHtmlBlockTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isBlock tag then return tag else fail "inline tag"
+
+anyHtmlInlineTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isInline tag then return tag else fail "not an inline tag"
+
+-- | Parses material between script tags.
+-- Scripts must be treated differently, because they can contain '<>' etc.
+htmlScript = try $ do
+ open <- string "<script"
+ rest <- manyTill anyChar (htmlEndTag "script")
+ return $ open ++ rest ++ "</script>"
+
+htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
+
+rawHtmlBlock = try $ do
+ notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
+ body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag
+ sp <- many space
+ state <- getState
+ if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
+
+-- | Parses an HTML comment.
+htmlComment = try $ do
+ string "<!--"
+ comment <- manyTill anyChar (try (string "-->"))
+ return $ "<!--" ++ comment ++ "-->"
+
+--
+-- parsing documents
+--
+
+xmlDec = try $ do
+ string "<?"
+ rest <- manyTill anyChar (char '>')
+ return $ "<?" ++ rest ++ ">"
+
+definition = try $ do
+ string "<!"
+ rest <- manyTill anyChar (char '>')
+ return $ "<!" ++ rest ++ ">"
+
+nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
+ ((rawHtmlBlock >> return ' ') <|> anyChar)
+
+parseTitle = try $ do
+ (tag, _) <- htmlTag "title"
+ contents <- inlinesTilEnd tag
+ spaces
+ return contents
+
+-- parse header and return meta-information (for now, just title)
+parseHead = try $ do
+ htmlTag "head"
+ spaces
+ skipMany nonTitleNonHead
+ contents <- option [] parseTitle
+ skipMany nonTitleNonHead
+ htmlTag "/head"
+ return (contents, [], "")
+
+skipHtmlTag tag = optional (htmlTag tag)
+
+-- h1 class="title" representation of title in body
+bodyTitle = try $ do
+ (tag, attribs) <- htmlTag "h1"
+ cl <- case (extractAttribute "class" attribs) of
+ Just "title" -> return ""
+ otherwise -> fail "not title"
+ inlinesTilEnd "h1"
+
+parseHtml = do
+ sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
+ skipHtmlTag "html"
+ spaces
+ (title, authors, date) <- option ([], [], "") parseHead
+ spaces
+ skipHtmlTag "body"
+ spaces
+ optional bodyTitle -- skip title in body, because it's represented in meta
+ blocks <- parseBlocks
+ spaces
+ optional (htmlEndTag "body")
+ spaces
+ optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html>
+ eof
+ return $ Pandoc (Meta title authors date) blocks
+
+--
+-- parsing blocks
+--
+
+parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
+
+block = choice [ codeBlock
+ , header
+ , hrule
+ , list
+ , blockQuote
+ , para
+ , plain
+ , rawHtmlBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
+
+headerLevel n = try $ do
+ let level = "h" ++ show n
+ (tag, attribs) <- htmlTag level
+ contents <- inlinesTilEnd level
+ return $ Header n (normalizeSpaces contents)
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+ (tag, attribs) <- htmlTag "hr"
+ state <- getState
+ if not (null attribs) && stateParseRaw state
+ then unexpected "attributes in hr" -- parse as raw in this case
+ else return HorizontalRule
+
+--
+-- code blocks
+--
+
+-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are
+-- skipped, because they are not portable to output formats other than HTML.
+codeBlock = try $ do
+ htmlTag "pre"
+ result <- manyTill
+ (many1 (satisfy (/= '<')) <|>
+ ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
+ (htmlEndTag "pre")
+ let result' = concat result
+ -- drop leading newline if any
+ let result'' = if "\n" `isPrefixOf` result'
+ then drop 1 result'
+ else result'
+ -- drop trailing newline if any
+ let result''' = if "\n" `isSuffixOf` result''
+ then init result''
+ else result''
+ return $ CodeBlock $ decodeCharacterReferences result'''
+
+--
+-- block quotes
+--
+
+blockQuote = try $ htmlTag "blockquote" >> spaces >>
+ blocksTilEnd "blockquote" >>= (return . BlockQuote)
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+orderedList = try $ do
+ (_, attribs) <- htmlTag "ol"
+ (start, style) <- option (1, DefaultStyle) $
+ do failIfStrict
+ let sta = fromMaybe "1" $
+ lookup "start" attribs
+ let sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ let sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
+ return (read sta, sty')
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ol"
+ return $ OrderedList (start, style, DefaultDelim) items
+
+bulletList = try $ do
+ htmlTag "ul"
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ul"
+ return $ BulletList items
+
+definitionList = try $ do
+ failIfStrict -- def lists not part of standard markdown
+ tag <- htmlTag "dl"
+ spaces
+ items <- sepEndBy1 definitionListItem spaces
+ htmlEndTag "dl"
+ return $ DefinitionList items
+
+definitionListItem = try $ do
+ terms <- sepEndBy1 (inlinesIn "dt") spaces
+ defs <- sepEndBy1 (blocksIn "dd") spaces
+ let term = joinWithSep [LineBreak] terms
+ return (term, concat defs)
+
+--
+-- paragraph block
+--
+
+para = try $ htmlTag "p" >> inlinesTilEnd "p" >>=
+ return . Para . normalizeSpaces
+
+--
+-- plain block
+--
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- inline
+--
+
+inline = choice [ charRef
+ , strong
+ , emph
+ , superscript
+ , subscript
+ , strikeout
+ , spanStrikeout
+ , code
+ , str
+ , linebreak
+ , whitespace
+ , link
+ , image
+ , rawHtmlInline
+ ] <?> "inline"
+
+code = try $ do
+ htmlTag "code"
+ result <- manyTill anyChar (htmlEndTag "code")
+ -- remove internal line breaks, leading and trailing space,
+ -- and decode character references
+ return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
+ joinWithSep " " $ lines result
+
+rawHtmlInline = do
+ result <- htmlScript <|> htmlComment <|> anyHtmlInlineTag
+ state <- getState
+ if stateParseRaw state then return (HtmlInline result) else return (Str "")
+
+betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
+ return . normalizeSpaces
+
+emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
+
+strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
+
+superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
+
+subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
+
+strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
+ return . Strikeout
+
+spanStrikeout = try $ do
+ failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
+ (tag, attributes) <- htmlTag "span"
+ result <- case (extractAttribute "class" attributes) of
+ Just "strikeout" -> inlinesTilEnd "span"
+ _ -> fail "not a strikeout"
+ return $ Strikeout result
+
+whitespace = many1 space >> return Space
+
+-- hard line break
+linebreak = htmlTag "br" >> optional newline >> return LineBreak
+
+str = many1 (noneOf "<& \t\n") >>= return . Str
+
+--
+-- links and images
+--
+
+-- extract contents of attribute (attribute names are case-insensitive)
+extractAttribute name [] = Nothing
+extractAttribute name ((attrName, contents):rest) =
+ let name' = map toLower name
+ attrName' = map toLower attrName
+ in if attrName' == name'
+ then Just (decodeCharacterReferences contents)
+ else extractAttribute name rest
+
+link = try $ do
+ (tag, attributes) <- htmlTag "a"
+ url <- case (extractAttribute "href" attributes) of
+ Just url -> return url
+ Nothing -> fail "no href"
+ let title = fromMaybe "" $ extractAttribute "title" attributes
+ label <- inlinesTilEnd "a"
+ return $ Link (normalizeSpaces label) (url, title)
+
+image = try $ do
+ (tag, attributes) <- htmlTag "img"
+ url <- case (extractAttribute "src" attributes) of
+ Just url -> return url
+ Nothing -> fail "no src"
+ let title = fromMaybe "" $ extractAttribute "title" attributes
+ let alt = fromMaybe "" (extractAttribute "alt" attributes)
+ return $ Image [Str alt] (url, title)
+
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
new file mode 100644
index 000000000..37cc2bfe4
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -0,0 +1,651 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of LaTeX to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.LaTeX (
+ readLaTeX,
+ rawLaTeXInline,
+ rawLaTeXEnvironment
+ ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Data.Maybe ( fromMaybe )
+import Data.Char ( chr )
+import Data.List ( isPrefixOf, isSuffixOf )
+
+-- | Parse LaTeX from string and return 'Pandoc' document.
+readLaTeX :: ParserState -- ^ Parser state, including options for parser
+ -> String -- ^ String to parse
+ -> Pandoc
+readLaTeX = readWith parseLaTeX
+
+-- characters with special meaning
+specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
+
+--
+-- utility functions
+--
+
+-- | Returns text between brackets and its matching pair.
+bracketedText openB closeB = do
+ result <- charsInBalanced' openB closeB
+ return $ [openB] ++ result ++ [closeB]
+
+-- | Returns an option or argument of a LaTeX command.
+optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
+
+-- | True if the string begins with '{'.
+isArg ('{':rest) = True
+isArg other = False
+
+-- | Returns list of options and arguments of a LaTeX command.
+commandArgs = many optOrArg
+
+-- | Parses LaTeX command, returns (name, star, list of options or arguments).
+command = do
+ char '\\'
+ name <- many1 letter
+ star <- option "" (string "*") -- some commands have starred versions
+ args <- commandArgs
+ return (name, star, args)
+
+begin name = try $ do
+ string $ "\\begin{" ++ name ++ "}"
+ optional commandArgs
+ spaces
+ return name
+
+end name = try $ do
+ string $ "\\end{" ++ name ++ "}"
+ spaces
+ return name
+
+-- | Returns a list of block elements containing the contents of an
+-- environment.
+environment name = try $ begin name >> spaces >> manyTill block (end name)
+
+anyEnvironment = try $ do
+ string "\\begin{"
+ name <- many letter
+ star <- option "" (string "*") -- some environments have starred variants
+ char '}'
+ optional commandArgs
+ spaces
+ contents <- manyTill block (end (name ++ star))
+ return $ BlockQuote contents
+
+--
+-- parsing documents
+--
+
+-- | Process LaTeX preamble, extracting metadata.
+processLaTeXPreamble = try $ manyTill
+ (choice [bibliographic, comment, unknownCommand, nullBlock])
+ (try (string "\\begin{document}")) >>
+ spaces
+
+-- | Parse LaTeX and return 'Pandoc'.
+parseLaTeX = do
+ optional processLaTeXPreamble -- preamble might not be present (fragment)
+ spaces
+ blocks <- parseBlocks
+ spaces
+ optional $ try (string "\\end{document}" >> many anyChar)
+ -- might not be present (fragment)
+ spaces
+ eof
+ state <- getState
+ let blocks' = filter (/= Null) blocks
+ let title' = stateTitle state
+ let authors' = stateAuthors state
+ let date' = stateDate state
+ return $ Pandoc (Meta title' authors' date') blocks'
+
+--
+-- parsing blocks
+--
+
+parseBlocks = spaces >> many block
+
+block = choice [ hrule
+ , codeBlock
+ , header
+ , list
+ , blockQuote
+ , mathBlock
+ , comment
+ , bibliographic
+ , para
+ , specialEnvironment
+ , itemBlock
+ , unknownEnvironment
+ , unknownCommand ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = try $ do
+ char '\\'
+ subs <- many (try (string "sub"))
+ string "section"
+ optional (char '*')
+ char '{'
+ title <- manyTill inline (char '}')
+ spaces
+ return $ Header (length subs + 1) (normalizeSpaces title)
+
+--
+-- hrule block
+--
+
+hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
+ "\\newpage" ] >> spaces >> return HorizontalRule
+
+--
+-- code blocks
+--
+
+codeBlock = codeBlock1 <|> codeBlock2
+
+codeBlock1 = try $ do
+ string "\\begin{verbatim}" -- don't use begin function because it
+ -- gobbles whitespace
+ optional blanklines -- we want to gobble blank lines, but not
+ -- leading space
+ contents <- manyTill anyChar (try (string "\\end{verbatim}"))
+ spaces
+ return $ CodeBlock (stripTrailingNewlines contents)
+
+codeBlock2 = try $ do
+ string "\\begin{Verbatim}" -- used by fancyvrb package
+ option "" blanklines
+ contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
+ spaces
+ return $ CodeBlock (stripTrailingNewlines contents)
+
+--
+-- block quotes
+--
+
+blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
+ return . BlockQuote
+
+--
+-- math block
+--
+
+mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
+ mathBlockWith (begin "displaymath") (end "displaymath") <|>
+ mathBlockWith (string "\\[") (string "\\]") <?> "math block"
+
+mathBlockWith start end = try $ do
+ start
+ spaces
+ result <- manyTill anyChar end
+ spaces
+ return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
+
+--
+-- list blocks
+--
+
+list = bulletList <|> orderedList <|> definitionList <?> "list"
+
+listItem = try $ do
+ ("item", _, args) <- command
+ spaces
+ state <- getState
+ let oldParserContext = stateParserContext state
+ updateState (\state -> state {stateParserContext = ListItemState})
+ blocks <- many block
+ updateState (\state -> state {stateParserContext = oldParserContext})
+ opt <- case args of
+ ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
+ parseFromString (many inline) $ tail $ init x
+ _ -> return []
+ return (opt, blocks)
+
+orderedList = try $ do
+ string "\\begin{enumerate}"
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+ try $ do failIfStrict
+ char '['
+ res <- anyOrderedListMarker
+ char ']'
+ return res
+ spaces
+ option "" $ try $ do string "\\setlength{\\itemindent}"
+ char '{'
+ manyTill anyChar (char '}')
+ spaces
+ start <- option 1 $ try $ do failIfStrict
+ string "\\setcounter{enum"
+ many1 (oneOf "iv")
+ string "}{"
+ num <- many1 digit
+ char '}'
+ spaces
+ return $ (read num) + 1
+ items <- many listItem
+ end "enumerate"
+ spaces
+ return $ OrderedList (start, style, delim) $ map snd items
+
+bulletList = try $ do
+ begin "itemize"
+ spaces
+ items <- many listItem
+ end "itemize"
+ spaces
+ return (BulletList $ map snd items)
+
+definitionList = try $ do
+ begin "description"
+ spaces
+ items <- many listItem
+ end "description"
+ spaces
+ return (DefinitionList items)
+
+--
+-- paragraph block
+--
+
+para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
+
+--
+-- title authors date
+--
+
+bibliographic = choice [ maketitle, title, authors, date ]
+
+maketitle = try (string "\\maketitle") >> spaces >> return Null
+
+title = try $ do
+ string "\\title{"
+ tit <- manyTill inline (char '}')
+ spaces
+ updateState (\state -> state { stateTitle = tit })
+ return Null
+
+authors = try $ do
+ string "\\author{"
+ authors <- manyTill anyChar (char '}')
+ spaces
+ let authors' = map removeLeadingTrailingSpace $ lines $
+ substitute "\\\\" "\n" authors
+ updateState (\state -> state { stateAuthors = authors' })
+ return Null
+
+date = try $ do
+ string "\\date{"
+ date' <- manyTill anyChar (char '}')
+ spaces
+ updateState (\state -> state { stateDate = date' })
+ return Null
+
+--
+-- item block
+-- for use in unknown environments that aren't being parsed as raw latex
+--
+
+-- this forces items to be parsed in different blocks
+itemBlock = try $ do
+ ("item", _, args) <- command
+ state <- getState
+ if (stateParserContext state == ListItemState)
+ then fail "item should be handled by list block"
+ else if null args
+ then return Null
+ else return $ Plain [Str (stripFirstAndLast (head args))]
+
+--
+-- raw LaTeX
+--
+
+specialEnvironment = do -- these are always parsed as raw
+ lookAhead (choice (map (\name -> begin name) ["tabular", "figure",
+ "tabbing", "eqnarry", "picture", "table", "verse", "theorem"]))
+ rawLaTeXEnvironment
+
+-- | Parse any LaTeX environment and return a Para block containing
+-- the whole literal environment as raw TeX.
+rawLaTeXEnvironment :: GenParser Char st Block
+rawLaTeXEnvironment = try $ do
+ string "\\begin{"
+ name <- many1 letter
+ star <- option "" (string "*") -- for starred variants
+ let name' = name ++ star
+ char '}'
+ args <- option [] commandArgs
+ let argStr = concat args
+ contents <- manyTill (choice [ (many1 (noneOf "\\")),
+ (do
+ (Para [TeX str]) <- rawLaTeXEnvironment
+ return str),
+ string "\\" ])
+ (end name')
+ spaces
+ return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++
+ concat contents ++ "\\end{" ++ name' ++ "}"]
+
+unknownEnvironment = try $ do
+ state <- getState
+ result <- if stateParseRaw state -- check whether we should include raw TeX
+ then rawLaTeXEnvironment -- if so, get whole raw environment
+ else anyEnvironment -- otherwise just the contents
+ return result
+
+unknownCommand = try $ do
+ notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
+ "document"]
+ (name, star, args) <- command
+ spaces
+ let argStr = concat args
+ state <- getState
+ if name == "item" && (stateParserContext state) == ListItemState
+ then fail "should not be parsed as raw"
+ else string ""
+ if stateParseRaw state
+ then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
+ else return $ Plain [Str (joinWithSep " " args)]
+
+-- latex comment
+comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
+
+--
+-- inline
+--
+
+inline = choice [ str
+ , endline
+ , whitespace
+ , quoted
+ , apostrophe
+ , spacer
+ , strong
+ , math
+ , ellipses
+ , emDash
+ , enDash
+ , hyphen
+ , emph
+ , strikeout
+ , superscript
+ , subscript
+ , ref
+ , lab
+ , code
+ , url
+ , link
+ , image
+ , footnote
+ , linebreak
+ , accentedChar
+ , specialChar
+ , rawLaTeXInline
+ , escapedChar
+ , unescapedChar
+ ] <?> "inline"
+
+accentedChar = normalAccentedChar <|> specialAccentedChar
+
+normalAccentedChar = try $ do
+ char '\\'
+ accent <- oneOf "'`^\"~"
+ character <- (try $ char '{' >> letter >>~ char '}') <|> letter
+ let table = fromMaybe [] $ lookup character accentTable
+ let result = case lookup accent table of
+ Just num -> chr num
+ Nothing -> '?'
+ return $ Str [result]
+
+-- an association list of letters and association list of accents
+-- and decimal character numbers.
+accentTable =
+ [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
+ ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
+ ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
+ ('N', [('~', 209)]),
+ ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
+ ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
+ ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
+ ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
+ ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
+ ('n', [('~', 241)]),
+ ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
+ ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
+
+specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
+ oslash, pound, euro, copyright, sect ]
+
+ccedil = try $ do
+ char '\\'
+ letter <- oneOfStrings ["cc", "cC"]
+ let num = if letter == "cc" then 231 else 199
+ return $ Str [chr num]
+
+aring = try $ do
+ char '\\'
+ letter <- oneOfStrings ["aa", "AA"]
+ let num = if letter == "aa" then 229 else 197
+ return $ Str [chr num]
+
+iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 239])
+
+icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 238])
+
+szlig = try (string "\\ss") >> return (Str [chr 223])
+
+oslash = try $ do
+ char '\\'
+ letter <- choice [char 'o', char 'O']
+ let num = if letter == 'o' then 248 else 216
+ return $ Str [chr num]
+
+aelig = try $ do
+ char '\\'
+ letter <- oneOfStrings ["ae", "AE"]
+ let num = if letter == "ae" then 230 else 198
+ return $ Str [chr num]
+
+pound = try (string "\\pounds") >> return (Str [chr 163])
+
+euro = try (string "\\euro") >> return (Str [chr 8364])
+
+copyright = try (string "\\copyright") >> return (Str [chr 169])
+
+sect = try (string "\\S") >> return (Str [chr 167])
+
+escapedChar = do
+ result <- escaped (oneOf " $%&_#{}\n")
+ return $ if result == Str "\n" then Str " " else result
+
+-- ignore standalone, nonescaped special characters
+unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "")
+
+specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
+
+backslash = try (string "\\textbackslash") >> return (Str "\\")
+
+tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
+
+caret = try (string "\\^{}") >> return (Str "^")
+
+bar = try (string "\\textbar") >> return (Str "\\")
+
+lt = try (string "\\textless") >> return (Str "<")
+
+gt = try (string "\\textgreater") >> return (Str ">")
+
+doubleQuote = char '"' >> return (Str "\"")
+
+code = code1 <|> code2
+
+code1 = try $ do
+ string "\\verb"
+ marker <- anyChar
+ result <- manyTill anyChar (char marker)
+ return $ Code $ removeLeadingTrailingSpace result
+
+code2 = try $ do
+ string "\\texttt{"
+ result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
+ return $ Code result
+
+emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
+ manyTill inline (char '}') >>= return . Emph
+
+strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
+ return . Strikeout
+
+superscript = try $ string "\\textsuperscript{" >>
+ manyTill inline (char '}') >>= return . Superscript
+
+-- note: \textsubscript isn't a standard latex command, but we use
+-- a defined version in pandoc.
+subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
+ return . Subscript
+
+apostrophe = char '\'' >> return Apostrophe
+
+quoted = doubleQuoted <|> singleQuoted
+
+singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
+ return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
+ return . Quoted DoubleQuote . normalizeSpaces
+
+singleQuoteStart = char '`'
+
+singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
+
+doubleQuoteStart = string "``"
+
+doubleQuoteEnd = try $ string "''"
+
+ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
+ return Ellipses
+
+enDash = try (string "--") >> return EnDash
+
+emDash = try (string "---") >> return EmDash
+
+hyphen = char '-' >> return (Str "-")
+
+lab = try $ do
+ string "\\label{"
+ result <- manyTill anyChar (char '}')
+ return $ Str $ "(" ++ result ++ ")"
+
+ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
+
+strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
+ return . Strong
+
+whitespace = many1 (oneOf "~ \t") >> return Space
+
+-- hard line break
+linebreak = try (string "\\\\") >> return LineBreak
+
+spacer = try (string "\\,") >> return (Str "")
+
+str = many1 (noneOf specialChars) >>= return . Str
+
+-- endline internal to paragraph
+endline = try $ newline >> notFollowedBy blankline >> return Space
+
+-- math
+math = math1 <|> math2 <?> "math"
+
+math1 = try $ do
+ char '$'
+ result <- many (noneOf "$")
+ char '$'
+ return $ TeX ("$" ++ result ++ "$")
+
+math2 = try $ do
+ string "\\("
+ result <- many (noneOf "$")
+ string "\\)"
+ return $ TeX ("$" ++ result ++ "$")
+
+--
+-- links and images
+--
+
+url = try $ do
+ string "\\url"
+ url <- charsInBalanced '{' '}'
+ return $ Link [Code url] (url, "")
+
+link = try $ do
+ string "\\href{"
+ url <- manyTill anyChar (char '}')
+ char '{'
+ label <- manyTill inline (char '}')
+ return $ Link (normalizeSpaces label) (url, "")
+
+image = try $ do
+ ("includegraphics", _, args) <- command
+ let args' = filter isArg args -- filter out options
+ let src = if null args' then
+ ("", "")
+ else
+ (stripFirstAndLast (head args'), "")
+ return $ Image [Str "image"] src
+
+footnote = try $ do
+ (name, _, (contents:[])) <- command
+ if ((name == "footnote") || (name == "thanks"))
+ then string ""
+ else fail "not a footnote or thanks command"
+ let contents' = stripFirstAndLast contents
+ -- parse the extracted block, which may contain various block elements:
+ rest <- getInput
+ setInput $ contents'
+ blocks <- parseBlocks
+ setInput rest
+ return $ Note blocks
+
+-- | Parse any LaTeX command and return it in a raw TeX inline element.
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = try $ do
+ (name, star, args) <- command
+ state <- getState
+ if ((name == "begin") || (name == "end") || (name == "item"))
+ then fail "not an inline command"
+ else string ""
+ return $ TeX ("\\" ++ name ++ star ++ concat args)
+
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
new file mode 100644
index 000000000..df84c0ac7
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -0,0 +1,909 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Markdown
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of markdown-formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Markdown (
+ readMarkdown
+ ) where
+
+import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy )
+import Data.Ord ( comparing )
+import Data.Char ( isAlphaNum )
+import Network.URI ( isURI )
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
+import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
+ anyHtmlInlineTag, anyHtmlTag,
+ anyHtmlEndTag, htmlEndTag, extractTagType,
+ htmlBlockElement )
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.ParserCombinators.Parsec
+
+-- | Read markdown from an input string and return a Pandoc document.
+readMarkdown :: ParserState -> String -> Pandoc
+readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+--
+
+spaceChars = " \t"
+bulletListMarkers = "*+-"
+hruleChars = "*-_"
+setextHChars = "=-"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = "\\[]*_~`<>$!^-.&'\""
+
+--
+-- auxiliary functions
+--
+
+indentSpaces = try $ do
+ state <- getState
+ let tabStop = stateTabStop state
+ try (count tabStop (char ' ')) <|>
+ (many (char ' ') >> string "\t") <?> "indentation"
+
+nonindentSpaces = do
+ state <- getState
+ let tabStop = stateTabStop state
+ sps <- many (char ' ')
+ if length sps < tabStop
+ then return sps
+ else unexpected "indented line"
+
+-- | Fail unless we're at beginning of a line.
+failUnlessBeginningOfLine = do
+ pos <- getPosition
+ if sourceColumn pos == 1 then return () else fail "not beginning of line"
+
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart = do
+ state <- getState
+ if stateSmart state then return () else fail "Smart typography feature"
+
+-- | Parse an inline Str element with a given content.
+inlineString str = try $ do
+ (Str res) <- inline
+ if res == str then return res else fail $ "unexpected Str content"
+
+-- | Parse a sequence of inline elements between a string
+-- @opener@ and a string @closer@, including inlines
+-- between balanced pairs of @opener@ and a @closer@.
+inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
+inlinesInBalanced opener closer = try $ do
+ string opener
+ result <- manyTill ( (do lookAhead (inlineString opener)
+ -- because it might be a link...
+ bal <- inlinesInBalanced opener closer
+ return $ [Str opener] ++ bal ++ [Str closer])
+ <|> (count 1 inline))
+ (try (string closer))
+ return $ concat result
+
+--
+-- document structure
+--
+
+titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
+
+authorsLine = try $ do
+ char '%'
+ skipSpaces
+ authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
+ newline
+ return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
+
+dateLine = try $ do
+ char '%'
+ skipSpaces
+ date <- many (noneOf "\n")
+ newline
+ return $ decodeCharacterReferences $ removeTrailingSpace date
+
+titleBlock = try $ do
+ failIfStrict
+ title <- option [] titleLine
+ author <- option [] authorsLine
+ date <- option "" dateLine
+ optional blanklines
+ return (title, author, date)
+
+parseMarkdown = do
+ -- markdown allows raw HTML
+ updateState (\state -> state { stateParseRaw = True })
+ startPos <- getPosition
+ -- go through once just to get list of reference keys
+ -- docMinusKeys is the raw document with blanks where the keys were...
+ docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
+ return . concat
+ setInput docMinusKeys
+ setPosition startPos
+ st <- getState
+ -- go through again for notes unless strict...
+ if stateStrict st
+ then return ()
+ else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
+ return . concat
+ st <- getState
+ let reversedNotes = stateNotes st
+ updateState $ \st -> st { stateNotes = reverse reversedNotes }
+ setInput docMinusNotes
+ setPosition startPos
+ -- now parse it for real...
+ (title, author, date) <- option ([],[],"") titleBlock
+ blocks <- parseBlocks
+ return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
+
+--
+-- initial pass for references and notes
+--
+
+referenceKey = try $ do
+ startPos <- getPosition
+ nonindentSpaces
+ label <- reference
+ char ':'
+ skipSpaces
+ optional (char '<')
+ src <- many (noneOf "> \n\t")
+ optional (char '>')
+ tit <- option "" referenceTitle
+ blanklines
+ endPos <- getPosition
+ let newkey = (label, (removeTrailingSpace src, tit))
+ st <- getState
+ let oldkeys = stateKeys st
+ updateState $ \st -> st { stateKeys = newkey : oldkeys }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+referenceTitle = try $ do
+ (many1 spaceChar >> option '\n' newline) <|> newline
+ skipSpaces
+ tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
+ <|> do delim <- char '\'' <|> char '"'
+ manyTill anyChar (try (char delim >> skipSpaces >>
+ notFollowedBy (noneOf ")\n")))
+ return $ decodeCharacterReferences tit
+
+noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
+
+rawLine = do
+ notFollowedBy blankline
+ notFollowedBy' noteMarker
+ contents <- many1 nonEndline
+ end <- option "" (newline >> optional indentSpaces >> return "\n")
+ return $ contents ++ end
+
+rawLines = many1 rawLine >>= return . concat
+
+noteBlock = try $ do
+ startPos <- getPosition
+ ref <- noteMarker
+ char ':'
+ optional blankline
+ optional indentSpaces
+ raw <- sepBy rawLines (try (blankline >> indentSpaces))
+ optional blanklines
+ endPos <- getPosition
+ -- parse the extracted text, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
+ let newnote = (ref, contents)
+ st <- getState
+ let oldnotes = stateNotes st
+ updateState $ \st -> st { stateNotes = newnote : oldnotes }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+--
+-- parsing blocks
+--
+
+parseBlocks = manyTill block eof
+
+block = choice [ header
+ , table
+ , codeBlock
+ , hrule
+ , list
+ , blockQuote
+ , htmlBlock
+ , rawLaTeXEnvironment'
+ , para
+ , plain
+ , nullBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = atxHeader <|> setextHeader <?> "header"
+
+atxHeader = try $ do
+ level <- many1 (char '#') >>= return . length
+ notFollowedBy (char '.' <|> char ')') -- this would be a list
+ skipSpaces
+ text <- manyTill inline atxClosing >>= return . normalizeSpaces
+ return $ Header level text
+
+atxClosing = try $ skipMany (char '#') >> blanklines
+
+setextHeader = try $ do
+ -- first, see if this block has any chance of being a setextHeader:
+ lookAhead (anyLine >> oneOf setextHChars)
+ text <- many1Till inline newline >>= return . normalizeSpaces
+ level <- choice $ zipWith
+ (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev)
+ setextHChars [1..(length setextHChars)]
+ return $ Header level text
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+ skipSpaces
+ start <- oneOf hruleChars
+ count 2 (skipSpaces >> char start)
+ skipMany (skipSpaces >> char start)
+ newline
+ optional blanklines
+ return HorizontalRule
+
+--
+-- code blocks
+--
+
+indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
+
+codeBlock = do
+ contents <- many1 (indentedLine <|>
+ try (do b <- blanklines
+ l <- indentedLine
+ return $ b ++ l))
+ optional blanklines
+ return $ CodeBlock $ stripTrailingNewlines $ concat contents
+
+--
+-- block quotes
+--
+
+emacsBoxQuote = try $ do
+ failIfStrict
+ string ",----"
+ manyTill anyChar newline
+ raw <- manyTill
+ (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
+ (try (string "`----"))
+ blanklines
+ return raw
+
+emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
+
+emailBlockQuote = try $ do
+ emailBlockQuoteStart
+ raw <- sepBy (many (nonEndline <|>
+ (try (endline >> notFollowedBy emailBlockQuoteStart >>
+ return '\n'))))
+ (try (newline >> emailBlockQuoteStart))
+ newline <|> (eof >> return '\n')
+ optional blanklines
+ return raw
+
+blockQuote = do
+ raw <- emailBlockQuote <|> emacsBoxQuote
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
+ return $ BlockQuote contents
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+bulletListStart = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ nonindentSpaces
+ notFollowedBy' hrule -- because hrules start out just like lists
+ oneOf bulletListMarkers
+ spaceChar
+ skipSpaces
+
+anyOrderedListStart = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ nonindentSpaces
+ notFollowedBy $ string "p." >> spaceChar >> digit -- page number
+ state <- getState
+ if stateStrict state
+ then do many1 digit
+ char '.'
+ spaceChar
+ return (1, DefaultStyle, DefaultDelim)
+ else anyOrderedListMarker >>~ spaceChar
+
+orderedListStart style delim = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ nonindentSpaces
+ state <- getState
+ num <- if stateStrict state
+ then do many1 digit
+ char '.'
+ return 1
+ else orderedListMarker style delim
+ if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then char '\t' <|> (spaceChar >> spaceChar)
+ else spaceChar
+ skipSpaces
+
+-- parse a line of a list item (start = parser for beginning of list item)
+listLine start = try $ do
+ notFollowedBy' start
+ notFollowedBy blankline
+ notFollowedBy' (do indentSpaces
+ many (spaceChar)
+ bulletListStart <|> (anyOrderedListStart >> return ()))
+ line <- manyTill anyChar newline
+ return $ line ++ "\n"
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start = try $ do
+ start
+ result <- many1 (listLine start)
+ blanks <- many blankline
+ return $ concat result ++ blanks
+
+-- continuation of a list item - indented and separated by blankline
+-- or (in compact lists) endline.
+-- note: nested lists are parsed as continuations
+listContinuation start = try $ do
+ lookAhead indentSpaces
+ result <- many1 (listContinuationLine start)
+ blanks <- many blankline
+ return $ concat result ++ blanks
+
+listContinuationLine start = try $ do
+ notFollowedBy blankline
+ notFollowedBy' start
+ optional indentSpaces
+ result <- manyTill anyChar newline
+ return $ result ++ "\n"
+
+listItem start = try $ do
+ first <- rawListItem start
+ continuations <- many (listContinuation start)
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state {stateParserContext = ListItemState}
+ -- parse the extracted block, which may contain various block elements:
+ let raw = concat (first:continuations)
+ contents <- parseFromString parseBlocks raw
+ updateState (\st -> st {stateParserContext = oldContext})
+ return contents
+
+orderedList = try $ do
+ (start, style, delim) <- lookAhead anyOrderedListStart
+ items <- many1 (listItem (orderedListStart style delim))
+ return $ OrderedList (start, style, delim) $ compactify items
+
+bulletList = many1 (listItem bulletListStart) >>=
+ return . BulletList . compactify
+
+-- definition lists
+
+definitionListItem = try $ do
+ notFollowedBy blankline
+ notFollowedBy' indentSpaces
+ -- first, see if this has any chance of being a definition list:
+ lookAhead (anyLine >> char ':')
+ term <- manyTill inline newline
+ raw <- many1 defRawBlock
+ state <- getState
+ let oldContext = stateParserContext state
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ concat raw
+ updateState (\st -> st {stateParserContext = oldContext})
+ return ((normalizeSpaces term), contents)
+
+defRawBlock = try $ do
+ char ':'
+ state <- getState
+ let tabStop = stateTabStop state
+ try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
+ firstline <- anyLine
+ rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
+ trailing <- option "" blanklines
+ return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
+
+definitionList = do
+ failIfStrict
+ items <- many1 definitionListItem
+ let (terms, defs) = unzip items
+ let defs' = compactify defs
+ let items' = zip terms defs'
+ return $ DefinitionList items'
+
+--
+-- paragraph block
+--
+
+para = try $ do
+ result <- many1 inline
+ newline
+ blanklines <|> do st <- getState
+ if stateStrict st
+ then lookAhead (blockQuote <|> header) >> return ""
+ else lookAhead emacsBoxQuote >> return ""
+ return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- raw html
+--
+
+htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
+
+htmlBlock = do
+ st <- getState
+ if stateStrict st
+ then try $ do failUnlessBeginningOfLine
+ first <- htmlElement
+ finalSpace <- many (oneOf spaceChars)
+ finalNewlines <- many newline
+ return $ RawHtml $ first ++ finalSpace ++ finalNewlines
+ else rawHtmlBlocks
+
+-- True if tag is self-closing
+isSelfClosing tag =
+ isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
+
+strictHtmlBlock = try $ do
+ tag <- anyHtmlBlockTag
+ let tag' = extractTagType tag
+ if isSelfClosing tag || tag' == "hr"
+ then return tag
+ else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
+ (htmlElement <|> (count 1 anyChar)))
+ end <- htmlEndTag tag'
+ return $ tag ++ concat contents ++ end
+
+rawHtmlBlocks = do
+ htmlBlocks <- many1 rawHtmlBlock
+ let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
+ let combined' = if not (null combined) && last combined == '\n'
+ then init combined -- strip extra newline
+ else combined
+ return $ RawHtml combined'
+
+--
+-- LaTeX
+--
+
+rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
+
+--
+-- Tables
+--
+
+-- Parse a dashed line with optional trailing spaces; return its length
+-- and the length including trailing space.
+dashedLine ch = do
+ dashes <- many1 (char ch)
+ sp <- many spaceChar
+ return $ (length dashes, length $ dashes ++ sp)
+
+-- Parse a table header with dashed lines of '-' preceded by
+-- one line of text.
+simpleTableHeader = try $ do
+ rawContent <- anyLine
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeads = tail $ splitByIndices (init indices) rawContent
+ let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ return (rawHeads, aligns, indices)
+
+-- Parse a table footer - dashed lines followed by blank line.
+tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
+
+-- Parse a table separator - dashed line.
+tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
+
+-- Parse a raw line and split it into chunks by indices.
+rawTableLine indices = do
+ notFollowedBy' (blanklines <|> tableFooter)
+ line <- many1Till anyChar newline
+ return $ map removeLeadingTrailingSpace $ tail $
+ splitByIndices (init indices) line
+
+-- Parse a table line and return a list of lists of blocks (columns).
+tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
+
+-- Parse a multiline table row and return a list of blocks (columns).
+multilineRow indices = do
+ colLines <- many1 (rawTableLine indices)
+ optional blanklines
+ let cols = map unlines $ transpose colLines
+ mapM (parseFromString (many plain)) cols
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int -- Number of columns on terminal
+ -> [Int] -- Indices
+ -> [Float] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns indices =
+ let lengths = zipWith (-) indices (0:indices)
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
+
+-- Parses a table caption: inlines beginning with 'Table:'
+-- and followed by blank lines.
+tableCaption = try $ do
+ nonindentSpaces
+ string "Table:"
+ result <- many1 inline
+ blanklines
+ return $ normalizeSpaces result
+
+-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
+tableWith headerParser lineParser footerParser = try $ do
+ (rawHeads, aligns, indices) <- headerParser
+ lines <- many1Till (lineParser indices) footerParser
+ caption <- option [] tableCaption
+ heads <- mapM (parseFromString (many plain)) rawHeads
+ state <- getState
+ let numColumns = stateColumns state
+ let widths = widthsFromIndices numColumns indices
+ return $ Table caption aligns widths heads lines
+
+-- Parse a simple table with '---' header and one line per row.
+simpleTable = tableWith simpleTableHeader tableLine blanklines
+
+-- Parse a multiline table: starts with row of '-' on top, then header
+-- (which may be multiline), then the rows,
+-- which may be multiline, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+
+multilineTableHeader = try $ do
+ tableSep
+ rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeadsList = transpose $ map
+ (\ln -> tail $ splitByIndices (init indices) ln)
+ rawContent
+ let rawHeads = map (joinWithSep " ") rawHeadsList
+ let aligns = zipWith alignType rawHeadsList lengths
+ return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String] -> Int -> Alignment
+alignType [] len = AlignDefault
+alignType strLst len =
+ let str = head $ sortBy (comparing length) $
+ map removeTrailingSpace strLst
+ leftSpace = if null str then False else (str !! 0) `elem` " \t"
+ rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
+ in case (leftSpace, rightSpace) of
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
+
+table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
+
+--
+-- inline
+--
+
+inline = choice [ str
+ , smartPunctuation
+ , whitespace
+ , endline
+ , code
+ , charRef
+ , strong
+ , emph
+ , note
+ , inlineNote
+ , link
+ , image
+ , math
+ , strikeout
+ , superscript
+ , subscript
+ , autoLink
+ , rawHtmlInline'
+ , rawLaTeXInline'
+ , escapedChar
+ , symbol
+ , ltSign ] <?> "inline"
+
+escapedChar = do
+ char '\\'
+ state <- getState
+ result <- option '\\' $ if stateStrict state
+ then oneOf "\\`*_{}[]()>#+-.!~"
+ else satisfy (not . isAlphaNum)
+ return $ Str [result]
+
+ltSign = do
+ st <- getState
+ if stateStrict st
+ then char '<'
+ else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
+ return $ Str ['<']
+
+specialCharsMinusLt = filter (/= '<') specialChars
+
+symbol = do
+ result <- oneOf specialCharsMinusLt
+ return $ Str [result]
+
+-- parses inline code, between n `s and n `s
+code = try $ do
+ starts <- many1 (char '`')
+ skipSpaces
+ result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ (char '\n' >> return " "))
+ (try (skipSpaces >> count (length starts) (char '`') >>
+ notFollowedBy (char '`')))
+ return $ Code $ removeLeadingTrailingSpace $ concat result
+
+mathWord = many1 ((noneOf " \t\n\\$") <|>
+ (try (char '\\') >>~ notFollowedBy (char '$')))
+
+math = try $ do
+ failIfStrict
+ char '$'
+ notFollowedBy space
+ words <- sepBy1 mathWord (many1 space)
+ char '$'
+ return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
+
+emph = ((enclosed (char '*') (char '*') inline) <|>
+ (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=
+ return . Emph . normalizeSpaces
+
+strong = ((enclosed (string "**") (try $ string "**") inline) <|>
+ (enclosed (string "__") (try $ string "__") inline)) >>=
+ return . Strong . normalizeSpaces
+
+strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
+ return . Strikeout . normalizeSpaces
+
+superscript = failIfStrict >> enclosed (char '^') (char '^')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Superscript
+
+subscript = failIfStrict >> enclosed (char '~') (char '~')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Subscript
+
+smartPunctuation = failUnlessSmart >>
+ choice [ quoted, apostrophe, dash, ellipses ]
+
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+
+quoted = doubleQuoted <|> singleQuoted
+
+withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
+ return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
+ return . Quoted DoubleQuote . normalizeSpaces
+
+failIfInQuoteContext context = do
+ st <- getState
+ if stateQuoteContext st == context
+ then fail "already inside quotes"
+ else return ()
+
+singleQuoteStart = do
+ failIfInQuoteContext InSingleQuote
+ char '\8216' <|>
+ do char '\''
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum))) -- possess/contraction
+ return '\''
+
+singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
+
+doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
+ (char '"' <|> char '\8220') >>
+ notFollowedBy (oneOf " \t\n")
+
+doubleQuoteEnd = char '"' <|> char '\8221'
+
+ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
+
+dash = enDash <|> emDash
+
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
+
+emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
+ skipSpaces >> return EmDash
+
+whitespace = do
+ sps <- many1 (oneOf spaceChars)
+ if length sps >= 2
+ then option Space (endline >> return LineBreak)
+ else return Space <?> "whitespace"
+
+nonEndline = satisfy (/='\n')
+
+strChar = noneOf (specialChars ++ spaceChars ++ "\n")
+
+str = many1 strChar >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ st <- getState
+ if stateStrict st
+ then do notFollowedBy emailBlockQuoteStart
+ notFollowedBy (char '#') -- atx header
+ else return ()
+ -- parse potential list-starts differently if in a list:
+ if stateParserContext st == ListItemState
+ then notFollowedBy' (bulletListStart <|>
+ (anyOrderedListStart >> return ()))
+ else return ()
+ return Space
+
+--
+-- links
+--
+
+-- a reference label for a link
+reference = notFollowedBy' (string "[^") >> -- footnote reference
+ inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
+
+-- source for a link, with optional title
+source = try $ do
+ char '('
+ optional (char '<')
+ src <- many (noneOf ")> \t\n")
+ optional (char '>')
+ tit <- option "" linkTitle
+ skipSpaces
+ char ')'
+ return (removeTrailingSpace src, tit)
+
+linkTitle = try $ do
+ (many1 spaceChar >> option '\n' newline) <|> newline
+ skipSpaces
+ delim <- char '\'' <|> char '"'
+ tit <- manyTill anyChar (try (char delim >> skipSpaces >>
+ notFollowedBy (noneOf ")\n")))
+ return $ decodeCharacterReferences tit
+
+link = try $ do
+ label <- reference
+ src <- source <|> referenceLink label
+ return $ Link label src
+
+-- a link like [this][ref] or [this][] or [this]
+referenceLink label = do
+ ref <- option [] (try (optional (char ' ') >>
+ optional (newline >> skipSpaces) >> reference))
+ let ref' = if null ref then label else ref
+ state <- getState
+ case lookupKeySrc (stateKeys state) ref' of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+
+emailAddress = try $ do
+ name <- many1 (alphaNum <|> char '+')
+ char '@'
+ first <- many1 alphaNum
+ rest <- many1 (char '.' >> many1 alphaNum)
+ return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
+
+uri = try $ do
+ str <- many1 (noneOf "\n\t >")
+ if isURI str
+ then return str
+ else fail "not a URI"
+
+autoLink = try $ do
+ char '<'
+ src <- uri <|> emailAddress
+ char '>'
+ let src' = if "mailto:" `isPrefixOf` src
+ then drop 7 src
+ else src
+ st <- getState
+ return $ if stateStrict st
+ then Link [Str src'] (src, "")
+ else Link [Code src'] (src, "")
+
+image = try $ do
+ char '!'
+ (Link label src) <- link
+ return $ Image label src
+
+note = try $ do
+ failIfStrict
+ ref <- noteMarker
+ state <- getState
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just contents -> return $ Note contents
+
+inlineNote = try $ do
+ failIfStrict
+ char '^'
+ contents <- inlinesInBalanced "[" "]"
+ return $ Note [Para contents]
+
+rawLaTeXInline' = failIfStrict >> rawLaTeXInline
+
+rawHtmlInline' = do
+ st <- getState
+ result <- choice $ if stateStrict st
+ then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
+ else [htmlBlockElement, anyHtmlInlineTag]
+ return $ HtmlInline result
+
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
new file mode 100644
index 000000000..1239eb688
--- /dev/null
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -0,0 +1,640 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.RST
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion from reStructuredText to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.RST (
+ readRST
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.ParserCombinators.Parsec
+import Data.List ( findIndex, delete )
+
+-- | Parse reStructuredText string and return Pandoc document.
+readRST :: ParserState -> String -> Pandoc
+readRST state str = (readWith parseRST) state (str ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+---
+
+bulletListMarkers = "*+-"
+underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = "\\`|*_<>$:[-"
+
+--
+-- parsing documents
+--
+
+isAnonKey (ref, src) = ref == [Str "_"]
+
+isHeader :: Int -> Block -> Bool
+isHeader n (Header x _) = x == n
+isHeader _ _ = False
+
+-- | Promote all headers in a list of blocks. (Part of
+-- title transformation for RST.)
+promoteHeaders :: Int -> [Block] -> [Block]
+promoteHeaders num ((Header level text):rest) =
+ (Header (level - num) text):(promoteHeaders num rest)
+promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
+promoteHeaders num [] = []
+
+-- | If list of blocks starts with a header (or a header and subheader)
+-- of level that are not found elsewhere, return it as a title and
+-- promote all the other headers.
+titleTransform :: [Block] -- ^ list of blocks
+ -> ([Block], [Inline]) -- ^ modified list of blocks, title
+titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
+ if (any (isHeader 1) rest) || (any (isHeader 2) rest)
+ then ((Header 1 head1):(Header 2 head2):rest, [])
+ else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
+titleTransform ((Header 1 head1):rest) = -- title, no subtitle
+ if (any (isHeader 1) rest)
+ then ((Header 1 head1):rest, [])
+ else ((promoteHeaders 1 rest), head1)
+titleTransform blocks = (blocks, [])
+
+parseRST = do
+ startPos <- getPosition
+ -- go through once just to get list of reference keys
+ -- docMinusKeys is the raw document with blanks where the keys were...
+ docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
+ setInput docMinusKeys
+ setPosition startPos
+ st <- getState
+ let reversedKeys = stateKeys st
+ updateState $ \st -> st { stateKeys = reverse reversedKeys }
+ -- now parse it for real...
+ blocks <- parseBlocks
+ let blocks' = filter (/= Null) blocks
+ state <- getState
+ let (blocks'', title) = if stateStandalone state
+ then titleTransform blocks'
+ else (blocks', [])
+ let authors = stateAuthors state
+ let date = stateDate state
+ let title' = if (null title) then (stateTitle state) else title
+ return $ Pandoc (Meta title' authors date) blocks''
+
+--
+-- parsing blocks
+--
+
+parseBlocks = manyTill block eof
+
+block = choice [ codeBlock
+ , rawHtmlBlock
+ , rawLaTeXBlock
+ , fieldList
+ , blockQuote
+ , imageBlock
+ , unknownDirective
+ , header
+ , hrule
+ , list
+ , lineBlock
+ , para
+ , plain
+ , nullBlock ] <?> "block"
+
+--
+-- field list
+--
+
+fieldListItem indent = try $ do
+ string indent
+ char ':'
+ name <- many1 alphaNum
+ string ": "
+ skipSpaces
+ first <- manyTill anyChar newline
+ rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
+ indentedBlock
+ return (name, joinWithSep " " (first:(lines rest)))
+
+fieldList = try $ do
+ indent <- lookAhead $ many (oneOf " \t")
+ items <- many1 $ fieldListItem indent
+ blanklines
+ let authors = case lookup "Authors" items of
+ Just auth -> [auth]
+ Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
+ if null authors
+ then return ()
+ else updateState $ \st -> st {stateAuthors = authors}
+ case (lookup "Date" items) of
+ Just dat -> updateState $ \st -> st {stateDate = dat}
+ Nothing -> return ()
+ case (lookup "Title" items) of
+ Just tit -> parseFromString (many inline) tit >>=
+ \t -> updateState $ \st -> st {stateTitle = t}
+ Nothing -> return ()
+ let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") &&
+ (x /= "Date") && (x /= "Title")) items
+ if null remaining
+ then return Null
+ else do terms <- mapM (return . (:[]) . Str . fst) remaining
+ defs <- mapM (parseFromString (many block) . snd)
+ remaining
+ return $ DefinitionList $ zip terms defs
+
+--
+-- line block
+--
+
+lineBlockLine = try $ do
+ string "| "
+ white <- many (oneOf " \t")
+ line <- manyTill inline newline
+ return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
+
+lineBlock = try $ do
+ lines <- many1 lineBlockLine
+ blanklines
+ return $ Para (concat lines)
+
+--
+-- paragraph block
+--
+
+para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
+
+codeBlockStart = string "::" >> blankline >> blankline
+
+-- paragraph that ends in a :: starting a code block
+paraBeforeCodeBlock = try $ do
+ result <- many1 (notFollowedBy' codeBlockStart >> inline)
+ lookAhead (string "::")
+ return $ Para $ if last result == Space
+ then normalizeSpaces result
+ else (normalizeSpaces result) ++ [Str ":"]
+
+-- regular paragraph
+paraNormal = try $ do
+ result <- many1 inline
+ newline
+ blanklines
+ return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- image block
+--
+
+imageBlock = try $ do
+ string ".. image:: "
+ src <- manyTill anyChar newline
+ fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
+ many1 $ fieldListItem indent
+ optional blanklines
+ case lookup "alt" fields of
+ Just alt -> return $ Plain [Image [Str alt] (src, alt)]
+ Nothing -> return $ Plain [Image [Str "image"] (src, "")]
+--
+-- header blocks
+--
+
+header = doubleHeader <|> singleHeader <?> "header"
+
+-- a header with lines on top and bottom
+doubleHeader = try $ do
+ c <- oneOf underlineChars
+ rest <- many (char c) -- the top line
+ let lenTop = length (c:rest)
+ skipSpaces
+ newline
+ txt <- many1 (notFollowedBy blankline >> inline)
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ if (len > lenTop) then fail "title longer than border" else return ()
+ blankline -- spaces and newline
+ count lenTop (char c) -- the bottom line
+ blanklines
+ -- check to see if we've had this kind of header before.
+ -- if so, get appropriate level. if not, add to list.
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return $ Header level (normalizeSpaces txt)
+
+-- a header with line on the bottom only
+singleHeader = try $ do
+ notFollowedBy' whitespace
+ txt <- many1 (do {notFollowedBy blankline; inline})
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ blankline
+ c <- oneOf underlineChars
+ rest <- count (len - 1) (char c)
+ many (char c)
+ blanklines
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return $ Header level (normalizeSpaces txt)
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+ chr <- oneOf underlineChars
+ count 3 (char chr)
+ skipMany (char chr)
+ blankline
+ blanklines
+ return HorizontalRule
+
+--
+-- code blocks
+--
+
+-- read a line indented by a given string
+indentedLine indents = try $ do
+ string indents
+ result <- manyTill anyChar newline
+ return $ result ++ "\n"
+
+-- two or more indented lines, possibly separated by blank lines.
+-- any amount of indentation will work.
+indentedBlock = do
+ indents <- lookAhead $ many1 (oneOf " \t")
+ lns <- many $ choice $ [ indentedLine indents,
+ try $ do b <- blanklines
+ l <- indentedLine indents
+ return (b ++ l) ]
+ optional blanklines
+ return $ concat lns
+
+codeBlock = try $ do
+ codeBlockStart
+ result <- indentedBlock
+ return $ CodeBlock $ stripTrailingNewlines result
+
+--
+-- raw html
+--
+
+rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
+ indentedBlock >>= return . RawHtml
+
+--
+-- raw latex
+--
+
+rawLaTeXBlock = try $ do
+ string ".. raw:: latex"
+ blanklines
+ result <- indentedBlock
+ return $ Para [(TeX result)]
+
+--
+-- block quotes
+--
+
+blockQuote = do
+ raw <- indentedBlock
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ return $ BlockQuote contents
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+definitionListItem = try $ do
+ term <- many1Till inline endline
+ raw <- indentedBlock
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ return (normalizeSpaces term, contents)
+
+definitionList = many1 definitionListItem >>= return . DefinitionList
+
+-- parses bullet list start and returns its length (inc. following whitespace)
+bulletListStart = try $ do
+ notFollowedBy' hrule -- because hrules start out just like lists
+ marker <- oneOf bulletListMarkers
+ white <- many1 spaceChar
+ return $ length (marker:white)
+
+-- parses ordered list start and returns its length (inc following whitespace)
+orderedListStart style delim = try $ do
+ (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
+ white <- many1 spaceChar
+ return $ markerLen + length white
+
+-- parse a line of a list item
+listLine markerLength = try $ do
+ notFollowedBy blankline
+ indentWith markerLength
+ line <- manyTill anyChar newline
+ return $ line ++ "\n"
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith num = do
+ state <- getState
+ let tabStop = stateTabStop state
+ if (num < tabStop)
+ then count num (char ' ')
+ else choice [ try (count num (char ' ')),
+ (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start = do
+ markerLength <- start
+ firstLine <- manyTill anyChar newline
+ restLines <- many (listLine markerLength)
+ return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+
+-- continuation of a list item - indented and separated by blankline or
+-- (in compact lists) endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation markerLength = try $ do
+ blanks <- many1 blankline
+ result <- many1 (listLine markerLength)
+ return $ blanks ++ concat result
+
+listItem start = try $ do
+ (markerLength, first) <- rawListItem start
+ rest <- many (listContinuation markerLength)
+ blanks <- choice [ try (many blankline >>~ lookAhead start),
+ many1 blankline ] -- whole list must end with blank.
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state {stateParserContext = ListItemState}
+ -- parse the extracted block, which may itself contain block elements
+ parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
+ updateState (\st -> st {stateParserContext = oldContext})
+ return parsed
+
+orderedList = do
+ (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
+ items <- many1 (listItem (orderedListStart style delim))
+ let items' = compactify items
+ return $ OrderedList (start, style, delim) items'
+
+bulletList = many1 (listItem bulletListStart) >>=
+ return . BulletList . compactify
+
+--
+-- unknown directive (e.g. comment)
+--
+
+unknownDirective = try $ do
+ string ".. "
+ manyTill anyChar newline
+ many (string " :" >> many1 (noneOf "\n:") >> char ':' >>
+ many1 (noneOf "\n") >> newline)
+ optional blanklines
+ return Null
+
+--
+-- reference key
+--
+
+referenceKey = do
+ startPos <- getPosition
+ key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
+ st <- getState
+ let oldkeys = stateKeys st
+ updateState $ \st -> st { stateKeys = key : oldkeys }
+ optional blanklines
+ endPos <- getPosition
+ -- return enough blanks to replace key
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+targetURI = do
+ skipSpaces
+ optional newline
+ contents <- many1 (try (many spaceChar >> newline >>
+ many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
+ blanklines
+ return contents
+
+imageKey = try $ do
+ string ".. |"
+ ref <- manyTill inline (char '|')
+ skipSpaces
+ string "image::"
+ src <- targetURI
+ return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+anonymousKey = try $ do
+ oneOfStrings [".. __:", "__"]
+ src <- targetURI
+ state <- getState
+ return ([Str "_"], (removeLeadingTrailingSpace src, ""))
+
+regularKeyQuoted = try $ do
+ string ".. _`"
+ ref <- manyTill inline (char '`')
+ char ':'
+ src <- targetURI
+ return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+regularKey = try $ do
+ string ".. _"
+ ref <- manyTill inline (char ':')
+ src <- targetURI
+ return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+ --
+ -- inline
+ --
+
+inline = choice [ link
+ , str
+ , whitespace
+ , endline
+ , strong
+ , emph
+ , code
+ , image
+ , hyphens
+ , superscript
+ , subscript
+ , escapedChar
+ , symbol ] <?> "inline"
+
+hyphens = do
+ result <- many1 (char '-')
+ option Space endline
+ -- don't want to treat endline after hyphen or dash as a space
+ return $ Str result
+
+escapedChar = escaped anyChar
+
+symbol = do
+ result <- oneOf specialChars
+ return $ Str [result]
+
+-- parses inline code, between codeStart and codeEnd
+code = try $ do
+ string "``"
+ result <- manyTill anyChar (try (string "``"))
+ return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+
+emph = enclosed (char '*') (char '*') inline >>=
+ return . Emph . normalizeSpaces
+
+strong = enclosed (string "**") (try $ string "**") inline >>=
+ return . Strong . normalizeSpaces
+
+interpreted role = try $ do
+ optional $ try $ string "\\ "
+ result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
+ nextChar <- lookAhead anyChar
+ try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
+ return [Str result]
+
+superscript = interpreted "sup" >>= (return . Superscript)
+
+subscript = interpreted "sub" >>= (return . Subscript)
+
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
+
+str = notFollowedBy' oneWordReference >>
+ many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ -- parse potential list-starts at beginning of line differently in a list:
+ st <- getState
+ if (stateParserContext st) == ListItemState
+ then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
+ notFollowedBy' bulletListStart
+ else return ()
+ return Space
+
+--
+-- links
+--
+
+link = choice [explicitLink, referenceLink, autoLink] <?> "link"
+
+explicitLink = try $ do
+ char '`'
+ notFollowedBy (char '`') -- `` is marks start of inline code
+ label <- manyTill inline (try (do {spaces; char '<'}))
+ src <- manyTill (noneOf ">\n ") (char '>')
+ skipSpaces
+ string "`_"
+ return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
+
+reference = try $ do
+ char '`'
+ notFollowedBy (char '`')
+ label <- many1Till inline (char '`')
+ char '_'
+ return label
+
+oneWordReference = do
+ raw <- many1 alphaNum
+ char '_'
+ notFollowedBy alphaNum -- because this_is_not a link
+ return [Str raw]
+
+referenceLink = try $ do
+ label <- reference <|> oneWordReference
+ key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable key of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ -- if anonymous link, remove first anon key so it won't be used again
+ let keyTable' = if (key == [Str "_"]) -- anonymous link?
+ then delete ([Str "_"], src) keyTable -- remove first anon key
+ else keyTable
+ setState $ state { stateKeys = keyTable' }
+ return $ Link (normalizeSpaces label) src
+
+uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
+ "mailto:", "news:", "telnet:" ]
+
+uri = try $ do
+ scheme <- uriScheme
+ identifier <- many1 (noneOf " \t\n")
+ return $ scheme ++ identifier
+
+autoURI = do
+ src <- uri
+ return $ Link [Str src] (src, "")
+
+emailChar = alphaNum <|> oneOf "-+_."
+
+emailAddress = try $ do
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ return $ addr ++ '@':dom
+
+domainChar = alphaNum <|> char '-'
+
+domain = do
+ first <- many1 domainChar
+ dom <- many1 (try (do{ char '.'; many1 domainChar }))
+ return $ joinWithSep "." (first:dom)
+
+autoEmail = do
+ src <- emailAddress
+ return $ Link [Str src] ("mailto:" ++ src, "")
+
+autoLink = autoURI <|> autoEmail
+
+-- For now, we assume that all substitution references are for images.
+image = try $ do
+ char '|'
+ ref <- manyTill inline (char '|')
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable ref of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ return $ Image (normalizeSpaces ref) src
+
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
new file mode 100644
index 000000000..f27c3ae75
--- /dev/null
+++ b/src/Text/Pandoc/Shared.hs
@@ -0,0 +1,792 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Shared
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions and definitions used by the various Pandoc modules.
+-}
+module Text.Pandoc.Shared (
+ -- * List processing
+ splitBy,
+ splitByIndices,
+ substitute,
+ joinWithSep,
+ -- * Text processing
+ backslashEscapes,
+ escapeStringUsing,
+ stripTrailingNewlines,
+ removeLeadingTrailingSpace,
+ removeLeadingSpace,
+ removeTrailingSpace,
+ stripFirstAndLast,
+ camelCaseToHyphenated,
+ toRomanNumeral,
+ wrapped,
+ wrapIfNeeded,
+ -- * Parsing
+ (>>~),
+ anyLine,
+ many1Till,
+ notFollowedBy',
+ oneOfStrings,
+ spaceChar,
+ skipSpaces,
+ blankline,
+ blanklines,
+ enclosed,
+ stringAnyCase,
+ parseFromString,
+ lineClump,
+ charsInBalanced,
+ charsInBalanced',
+ romanNumeral,
+ withHorizDisplacement,
+ nullBlock,
+ failIfStrict,
+ escaped,
+ anyOrderedListMarker,
+ orderedListMarker,
+ charRef,
+ readWith,
+ testStringWith,
+ ParserState (..),
+ defaultParserState,
+ HeaderType (..),
+ ParserContext (..),
+ QuoteContext (..),
+ NoteTable,
+ KeyTable,
+ lookupKeySrc,
+ refsMatch,
+ -- * Native format prettyprinting
+ prettyPandoc,
+ -- * Pandoc block and inline list processing
+ orderedListMarkers,
+ normalizeSpaces,
+ compactify,
+ Element (..),
+ hierarchicalize,
+ isHeaderBlock,
+ -- * Writer options
+ WriterOptions (..),
+ defaultWriterOptions
+ ) where
+
+import Text.Pandoc.Definition
+import Text.ParserCombinators.Parsec
+import Text.PrettyPrint.HughesPJ ( Doc, fsep )
+import Text.Pandoc.CharacterReferences ( characterReference )
+import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
+import Data.List ( find, isPrefixOf )
+import Control.Monad ( join )
+
+--
+-- List processing
+--
+
+-- | Split list by groups of one or more sep.
+splitBy :: (Eq a) => a -> [a] -> [[a]]
+splitBy _ [] = []
+splitBy sep lst =
+ let (first, rest) = break (== sep) lst
+ rest' = dropWhile (== sep) rest
+ in first:(splitBy sep rest')
+
+-- | Split list into chunks divided at specified indices.
+splitByIndices :: [Int] -> [a] -> [[a]]
+splitByIndices [] lst = [lst]
+splitByIndices (x:xs) lst =
+ let (first, rest) = splitAt x lst in
+ first:(splitByIndices (map (\y -> y - x) xs) rest)
+
+-- | Replace each occurrence of one sublist in a list with another.
+substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
+substitute _ _ [] = []
+substitute [] _ lst = lst
+substitute target replacement lst =
+ if target `isPrefixOf` lst
+ then replacement ++ (substitute target replacement $ drop (length target) lst)
+ else (head lst):(substitute target replacement $ tail lst)
+
+-- | Joins a list of lists, separated by another list.
+joinWithSep :: [a] -- ^ List to use as separator
+ -> [[a]] -- ^ Lists to join
+ -> [a]
+joinWithSep _ [] = []
+joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
+
+--
+-- Text processing
+--
+
+-- | Returns an association list of backslash escapes for the
+-- designated characters.
+backslashEscapes :: [Char] -- ^ list of special characters to escape
+ -> [(Char, String)]
+backslashEscapes = map (\ch -> (ch, ['\\',ch]))
+
+-- | Escape a string of characters, using an association list of
+-- characters and strings.
+escapeStringUsing :: [(Char, String)] -> String -> String
+escapeStringUsing _ [] = ""
+escapeStringUsing escapeTable (x:xs) =
+ case (lookup x escapeTable) of
+ Just str -> str ++ rest
+ Nothing -> x:rest
+ where rest = escapeStringUsing escapeTable xs
+
+-- | Strip trailing newlines from string.
+stripTrailingNewlines :: String -> String
+stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
+
+-- | Remove leading and trailing space (including newlines) from string.
+removeLeadingTrailingSpace :: String -> String
+removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
+
+-- | Remove leading space (including newlines) from string.
+removeLeadingSpace :: String -> String
+removeLeadingSpace = dropWhile (`elem` " \n\t")
+
+-- | Remove trailing space (including newlines) from string.
+removeTrailingSpace :: String -> String
+removeTrailingSpace = reverse . removeLeadingSpace . reverse
+
+-- | Strip leading and trailing characters from string
+stripFirstAndLast :: String -> String
+stripFirstAndLast str =
+ drop 1 $ take ((length str) - 1) str
+
+-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
+camelCaseToHyphenated :: String -> String
+camelCaseToHyphenated [] = ""
+camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
+ a:'-':(toLower b):(camelCaseToHyphenated rest)
+camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
+
+-- | Convert number < 4000 to uppercase roman numeral.
+toRomanNumeral :: Int -> String
+toRomanNumeral x =
+ if x >= 4000 || x < 0
+ then "?"
+ else case x of
+ _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
+ _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
+ _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
+ _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
+ _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
+ _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
+ _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
+ _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
+ _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
+ _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
+ _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
+ _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
+ _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
+ _ -> ""
+
+-- | Wrap inlines to line length.
+wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
+wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
+ return . fsep
+
+wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
+ [Inline] -> m Doc
+wrapIfNeeded opts = if writerWrapText opts
+ then wrapped
+ else ($)
+
+--
+-- Parsing
+--
+
+-- | Like >>, but returns the operation on the left.
+-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
+(>>~) :: (Monad m) => m a -> m b -> m a
+a >>~ b = a >>= \x -> b >> return x
+
+-- | Parse any line of text
+anyLine :: GenParser Char st [Char]
+anyLine = manyTill anyChar newline
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: GenParser tok st a
+ -> GenParser tok st end
+ -> GenParser tok st [a]
+many1Till p end = do
+ first <- p
+ rest <- manyTill p end
+ return (first:rest)
+
+-- | A more general form of @notFollowedBy@. This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' p = try $ join $ do a <- try p
+ return (unexpected (show a))
+ <|>
+ return (return ())
+-- (This version due to Andrew Pimlott on the Haskell mailing list.)
+
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> GenParser Char st String
+oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+
+-- | Parses a space or tab.
+spaceChar :: CharParser st Char
+spaceChar = char ' ' <|> char '\t'
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: GenParser Char st ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: GenParser Char st Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: GenParser Char st [Char]
+blanklines = many1 blankline
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: GenParser Char st t -- ^ start parser
+ -> GenParser Char st end -- ^ end parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+enclosed start end parser = try $
+ start >> notFollowedBy space >> many1Till parser end
+
+-- | Parse string, case insensitive.
+stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase [] = string ""
+stringAnyCase (x:xs) = do
+ firstChar <- char (toUpper x) <|> char (toLower x)
+ rest <- stringAnyCase xs
+ return (firstChar:rest)
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString parser str = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ setInput oldInput
+ setPosition oldPos
+ return result
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: GenParser Char st String
+lineClump = blanklines
+ <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close, which must be different. For example,
+-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
+-- and return "hello (there)". Stop if a blank line is
+-- encountered.
+charsInBalanced :: Char -> Char -> GenParser Char st String
+charsInBalanced open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close, '\n']))
+ <|> (do res <- charsInBalanced open close
+ return $ [open] ++ res ++ [close])
+ <|> try (string "\n" >>~ notFollowedBy' blanklines)
+ char close
+ return $ concat raw
+
+-- | Like @charsInBalanced@, but allow blank lines in the content.
+charsInBalanced' :: Char -> Char -> GenParser Char st String
+charsInBalanced' open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close]))
+ <|> (do res <- charsInBalanced' open close
+ return $ [open] ++ res ++ [close])
+ char close
+ return $ concat raw
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: Bool -- ^ Uppercase if true
+ -> GenParser Char st Int
+romanNumeral upperCase = do
+ let charAnyCase c = char (if upperCase then toUpper c else c)
+ let one = charAnyCase 'i'
+ let five = charAnyCase 'v'
+ let ten = charAnyCase 'x'
+ let fifty = charAnyCase 'l'
+ let hundred = charAnyCase 'c'
+ let fivehundred = charAnyCase 'd'
+ let thousand = charAnyCase 'm'
+ thousands <- many thousand >>= (return . (1000 *) . length)
+ ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+ fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+ hundreds <- many hundred >>= (return . (100 *) . length)
+ nineties <- option 0 $ try $ ten >> hundred >> return 90
+ fifties <- many fifty >>= (return . (50 *) . length)
+ forties <- option 0 $ try $ ten >> fifty >> return 40
+ tens <- many ten >>= (return . (10 *) . length)
+ nines <- option 0 $ try $ one >> ten >> return 9
+ fives <- many five >>= (return . (5 *) . length)
+ fours <- option 0 $ try $ one >> five >> return 4
+ ones <- many one >>= (return . length)
+ let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+ hundreds + nineties + fifties + forties + tens + nines +
+ fives + fours + ones
+ if total == 0
+ then fail "not a roman numeral"
+ else return total
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
+ -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+ pos1 <- getPosition
+ result <- parser
+ pos2 <- getPosition
+ return (result, sourceColumn pos2 - sourceColumn pos1)
+
+-- | Parses a character and returns 'Null' (so that the parser can move on
+-- if it gets stuck).
+nullBlock :: GenParser Char st Block
+nullBlock = anyChar >> return Null
+
+-- | Fail if reader is in strict markdown syntax mode.
+failIfStrict :: GenParser Char ParserState ()
+failIfStrict = do
+ state <- getState
+ if stateStrict state then fail "strict mode" else return ()
+
+-- | Parses backslash, then applies character parser.
+escaped :: GenParser Char st Char -- ^ Parser for character to escape
+ -> GenParser Char st Inline
+escaped parser = try $ do
+ char '\\'
+ result <- parser
+ return (Str [result])
+
+-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
+upperRoman :: GenParser Char st (ListNumberStyle, Int)
+upperRoman = do
+ num <- romanNumeral True
+ return (UpperRoman, num)
+
+-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
+lowerRoman :: GenParser Char st (ListNumberStyle, Int)
+lowerRoman = do
+ num <- romanNumeral False
+ return (LowerRoman, num)
+
+-- | Parses a decimal numeral and returns (Decimal, number).
+decimal :: GenParser Char st (ListNumberStyle, Int)
+decimal = do
+ num <- many1 digit
+ return (Decimal, read num)
+
+-- | Parses a '#' returns (DefaultStyle, 1).
+defaultNum :: GenParser Char st (ListNumberStyle, Int)
+defaultNum = do
+ char '#'
+ return (DefaultStyle, 1)
+
+-- | Parses a lowercase letter and returns (LowerAlpha, number).
+lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
+lowerAlpha = do
+ ch <- oneOf ['a'..'z']
+ return (LowerAlpha, ord ch - ord 'a' + 1)
+
+-- | Parses an uppercase letter and returns (UpperAlpha, number).
+upperAlpha :: GenParser Char st (ListNumberStyle, Int)
+upperAlpha = do
+ ch <- oneOf ['A'..'Z']
+ return (UpperAlpha, ord ch - ord 'A' + 1)
+
+-- | Parses a roman numeral i or I
+romanOne :: GenParser Char st (ListNumberStyle, Int)
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+ (char 'I' >> return (UpperRoman, 1))
+
+-- | Parses an ordered list marker and returns list attributes.
+anyOrderedListMarker :: GenParser Char st ListAttributes
+anyOrderedListMarker = choice $
+ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+ numParser <- [decimal, defaultNum, romanOne,
+ lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+
+-- | Parses a list number (num) followed by a period, returns list attributes.
+inPeriod :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inPeriod num = try $ do
+ (style, start) <- num
+ char '.'
+ let delim = if style == DefaultStyle
+ then DefaultDelim
+ else Period
+ return (start, style, delim)
+
+-- | Parses a list number (num) followed by a paren, returns list attributes.
+inOneParen :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inOneParen num = try $ do
+ (style, start) <- num
+ char ')'
+ return (start, style, OneParen)
+
+-- | Parses a list number (num) enclosed in parens, returns list attributes.
+inTwoParens :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inTwoParens num = try $ do
+ char '('
+ (style, start) <- num
+ char ')'
+ return (start, style, TwoParens)
+
+-- | Parses an ordered list marker with a given style and delimiter,
+-- returns number.
+orderedListMarker :: ListNumberStyle
+ -> ListNumberDelim
+ -> GenParser Char st Int
+orderedListMarker style delim = do
+ let num = case style of
+ DefaultStyle -> decimal <|> defaultNum
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ let context = case delim of
+ DefaultDelim -> inPeriod
+ Period -> inPeriod
+ OneParen -> inOneParen
+ TwoParens -> inTwoParens
+ (start, _, _) <- context num
+ return start
+
+-- | Parses a character reference and returns a Str element.
+charRef :: GenParser Char st Inline
+charRef = do
+ c <- characterReference
+ return $ Str [c]
+
+-- | Parse a string with a given parser and state.
+readWith :: GenParser Char ParserState a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> String -- ^ input string
+ -> a
+readWith parser state input =
+ case runParser parser state "source" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> result
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a) => GenParser Char ParserState a
+ -> String
+ -> IO ()
+testStringWith parser str = putStrLn $ show $
+ readWith parser defaultParserState str
+
+-- | Parsing options.
+data ParserState = ParserState
+ { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateKeys :: KeyTable, -- ^ List of reference keys
+ stateNotes :: NoteTable, -- ^ List of notes
+ stateTabStop :: Int, -- ^ Tab stop
+ stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateTitle :: [Inline], -- ^ Title of document
+ stateAuthors :: [String], -- ^ Authors of document
+ stateDate :: String, -- ^ Date of document
+ stateStrict :: Bool, -- ^ Use strict markdown syntax?
+ stateSmart :: Bool, -- ^ Use smart typography?
+ stateColumns :: Int, -- ^ Number of columns in terminal
+ stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
+ }
+ deriving Show
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateParseRaw = False,
+ stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
+ stateKeys = [],
+ stateNotes = [],
+ stateTabStop = 4,
+ stateStandalone = False,
+ stateTitle = [],
+ stateAuthors = [],
+ stateDate = [],
+ stateStrict = False,
+ stateSmart = False,
+ stateColumns = 80,
+ stateHeaderTable = [] }
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data QuoteContext
+ = InSingleQuote -- ^ Used when parsing inside single quotes
+ | InDoubleQuote -- ^ Used when parsing inside double quotes
+ | NoQuote -- ^ Used when not parsing inside quotes
+ deriving (Eq, Show)
+
+type NoteTable = [(String, [Block])]
+
+type KeyTable = [([Inline], Target)]
+
+-- | Look up key in key table and return target object.
+lookupKeySrc :: KeyTable -- ^ Key table
+ -> [Inline] -- ^ Key
+ -> Maybe Target
+lookupKeySrc table key = case find (refsMatch key . fst) table of
+ Nothing -> Nothing
+ Just (_, src) -> Just src
+
+-- | Returns @True@ if keys match (case insensitive).
+refsMatch :: [Inline] -> [Inline] -> Bool
+refsMatch ((Str x):restx) ((Str y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Emph x):restx) ((Emph y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strong x):restx) ((Strong y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Superscript x):restx) ((Superscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Subscript x):restx) ((Subscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
+ t == u && refsMatch x y && refsMatch restx resty
+refsMatch ((Code x):restx) ((Code y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((TeX x):restx) ((TeX y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
+refsMatch [] x = null x
+refsMatch x [] = null x
+
+--
+-- Native format prettyprinting
+--
+
+-- | Indent string as a block.
+indentBy :: Int -- ^ Number of spaces to indent the block
+ -> Int -- ^ Number of spaces (rel to block) to indent first line
+ -> String -- ^ Contents of block to indent
+ -> String
+indentBy _ _ [] = ""
+indentBy num first str =
+ let (firstLine:restLines) = lines str
+ firstLineIndent = num + first
+ in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
+ (joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines)
+
+-- | Prettyprint list of Pandoc blocks elements.
+prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
+ -> [Block] -- ^ List of blocks
+ -> String
+prettyBlockList indent [] = indentBy indent 0 "[]"
+prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
+ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
+
+-- | Prettyprint Pandoc block element.
+prettyBlock :: Block -> String
+prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
+ (prettyBlockList 2 blocks)
+prettyBlock (OrderedList attribs blockLists) =
+ "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
+ (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks)
+ blockLists)) ++ " ]"
+prettyBlock (BulletList blockLists) = "BulletList\n" ++
+ indentBy 2 0 ("[ " ++ (joinWithSep ", "
+ (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++
+ indentBy 2 0 ("[" ++ (joinWithSep ",\n"
+ (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
+ indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
+prettyBlock (Table caption aligns widths header rows) =
+ "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
+ show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
+ (joinWithSep ",\n" (map prettyRow rows)) ++ " ]"
+ where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", "
+ (map (\blocks -> prettyBlockList 2 blocks)
+ cols))) ++ " ]"
+prettyBlock block = show block
+
+-- | Prettyprint Pandoc document.
+prettyPandoc :: Pandoc -> String
+prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
+ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
+
+--
+-- Pandoc block and inline list processing
+--
+
+-- | Generate infinite lazy list of markers for an ordered list,
+-- depending on list attributes.
+orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
+orderedListMarkers (start, numstyle, numdelim) =
+ let singleton c = [c]
+ nums = case numstyle of
+ DefaultStyle -> map show [start..]
+ Decimal -> map show [start..]
+ UpperAlpha -> drop (start - 1) $ cycle $
+ map singleton ['A'..'Z']
+ LowerAlpha -> drop (start - 1) $ cycle $
+ map singleton ['a'..'z']
+ UpperRoman -> map toRomanNumeral [start..]
+ LowerRoman -> map (map toLower . toRomanNumeral) [start..]
+ inDelim str = case numdelim of
+ DefaultDelim -> str ++ "."
+ Period -> str ++ "."
+ OneParen -> str ++ ")"
+ TwoParens -> "(" ++ str ++ ")"
+ in map inDelim nums
+
+-- | Normalize a list of inline elements: remove leading and trailing
+-- @Space@ elements, collapse double @Space@s into singles, and
+-- remove empty Str elements.
+normalizeSpaces :: [Inline] -> [Inline]
+normalizeSpaces [] = []
+normalizeSpaces list =
+ let removeDoubles [] = []
+ removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
+ removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
+ removeDoubles ((Str ""):rest) = removeDoubles rest
+ removeDoubles (x:rest) = x:(removeDoubles rest)
+ removeLeading (Space:xs) = removeLeading xs
+ removeLeading x = x
+ removeTrailing [] = []
+ removeTrailing lst = if (last lst == Space)
+ then init lst
+ else lst
+ in removeLeading $ removeTrailing $ removeDoubles list
+
+-- | Change final list item from @Para@ to @Plain@ if the list should
+-- be compact.
+compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
+ -> [[Block]]
+compactify [] = []
+compactify items =
+ let final = last items
+ others = init items
+ in case final of
+ [Para a] -> if any containsPara others
+ then items
+ else others ++ [[Plain a]]
+ _ -> items
+
+containsPara :: [Block] -> Bool
+containsPara [] = False
+containsPara ((Para _):_) = True
+containsPara ((BulletList items):rest) = any containsPara items ||
+ containsPara rest
+containsPara ((OrderedList _ items):rest) = any containsPara items ||
+ containsPara rest
+containsPara ((DefinitionList items):rest) = any containsPara (map snd items) ||
+ containsPara rest
+containsPara (_:rest) = containsPara rest
+
+-- | Data structure for defining hierarchical Pandoc documents
+data Element = Blk Block
+ | Sec [Inline] [Element] deriving (Eq, Read, Show)
+
+-- | Returns @True@ on Header block with at least the specified level
+headerAtLeast :: Int -> Block -> Bool
+headerAtLeast level (Header x _) = x <= level
+headerAtLeast _ _ = False
+
+-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
+hierarchicalize :: [Block] -> [Element]
+hierarchicalize [] = []
+hierarchicalize (block:rest) =
+ case block of
+ (Header level title) ->
+ let (thisSection, rest') = break (headerAtLeast level) rest
+ in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
+ x -> (Blk x):(hierarchicalize rest)
+
+-- | True if block is a Header block.
+isHeaderBlock :: Block -> Bool
+isHeaderBlock (Header _ _) = True
+isHeaderBlock _ = False
+
+--
+-- Writer options
+--
+
+-- | Options for writers
+data WriterOptions = WriterOptions
+ { writerStandalone :: Bool -- ^ Include header and footer
+ , writerHeader :: String -- ^ Header for the document
+ , writerTitlePrefix :: String -- ^ Prefix for HTML titles
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerS5 :: Bool -- ^ We're writing S5
+ , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML
+ , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js
+ , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
+ , writerIncremental :: Bool -- ^ Incremental S5 lists
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerIncludeBefore :: String -- ^ String to include before the body
+ , writerIncludeAfter :: String -- ^ String to include after the body
+ , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , writerWrapText :: Bool -- ^ Wrap text to line length
+ } deriving Show
+
+-- | Default writer options.
+defaultWriterOptions :: WriterOptions
+defaultWriterOptions =
+ WriterOptions { writerStandalone = False
+ , writerHeader = ""
+ , writerTitlePrefix = ""
+ , writerTabStop = 4
+ , writerTableOfContents = False
+ , writerS5 = False
+ , writerUseASCIIMathML = False
+ , writerASCIIMathMLURL = Nothing
+ , writerIgnoreNotes = False
+ , writerIncremental = False
+ , writerNumberSections = False
+ , writerIncludeBefore = ""
+ , writerIncludeAfter = ""
+ , writerStrictMarkdown = False
+ , writerReferenceLinks = False
+ , writerWrapText = True
+ }
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
new file mode 100644
index 000000000..16bdb9218
--- /dev/null
+++ b/src/Text/Pandoc/UTF8.hs
@@ -0,0 +1,45 @@
+-- | Functions for converting Unicode strings to UTF-8 and vice versa.
+--
+-- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
+-- (c) 2003, OGI School of Science & Engineering, Oregon Health and
+-- Science University.
+--
+-- Modified by Martin Norbaeck
+-- to pass illegal UTF-8 sequences through unchanged.
+module Text.Pandoc.UTF8 (
+ fromUTF8,
+ toUTF8
+ ) where
+
+-- From the Char module supplied with HBC.
+
+-- | Take a UTF-8 string and decode it into a Unicode string.
+fromUTF8 :: String -> String
+fromUTF8 "" = ""
+fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
+fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
+ '\x80' <= c' && c' <= '\xbf' =
+ toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
+fromUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
+ '\x80' <= c' && c' <= '\xbf' &&
+ '\x80' <= c'' && c'' <= '\xbf' =
+ toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs
+fromUTF8 (c:cs) = c : fromUTF8 cs
+
+-- | Take a Unicode string and encode it as a UTF-8 string.
+toUTF8 :: String -> String
+toUTF8 "" = ""
+toUTF8 (c:cs) =
+ if c > '\x0000' && c < '\x0080' then
+ c : toUTF8 cs
+ else if c < toEnum 0x0800 then
+ let i = fromEnum c
+ in toEnum (0xc0 + i `div` 0x40) :
+ toEnum (0x80 + i `mod` 0x40) :
+ toUTF8 cs
+ else
+ let i = fromEnum c
+ in toEnum (0xe0 + i `div` 0x1000) :
+ toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
+ toEnum (0x80 + i `mod` 0x40) :
+ toUTF8 cs
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
new file mode 100644
index 000000000..13912a9f3
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -0,0 +1,248 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.ConTeXt
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into ConTeXt.
+-}
+module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( (\\), intersperse )
+import Control.Monad.State
+
+type WriterState = Int -- number of next URL reference
+
+-- | Convert Pandoc to ConTeXt.
+writeConTeXt :: WriterOptions -> Pandoc -> String
+writeConTeXt options document = evalState (pandocToConTeXt options document) 1
+
+pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
+pandocToConTeXt options (Pandoc meta blocks) = do
+ main <- blockListToConTeXt blocks
+ let body = writerIncludeBefore options ++ main ++ writerIncludeAfter options
+ head <- if writerStandalone options
+ then contextHeader options meta
+ else return ""
+ let toc = if writerTableOfContents options
+ then "\\placecontent\n\n"
+ else ""
+ let foot = if writerStandalone options
+ then "\n\\stoptext\n"
+ else ""
+ return $ head ++ toc ++ body ++ foot
+
+-- | Insert bibliographic information into ConTeXt header.
+contextHeader :: WriterOptions -- ^ Options, including ConTeXt header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState String
+contextHeader options (Meta title authors date) = do
+ titletext <- if null title
+ then return ""
+ else inlineListToConTeXt title
+ let authorstext = if null authors
+ then ""
+ else if length authors == 1
+ then stringToConTeXt $ head authors
+ else stringToConTeXt $ (joinWithSep ", " $
+ init authors) ++ " & " ++ last authors
+ let datetext = if date == ""
+ then ""
+ else stringToConTeXt date
+ let titleblock = "\\doctitle{" ++ titletext ++ "}\n\
+ \ \\author{" ++ authorstext ++ "}\n\
+ \ \\date{" ++ datetext ++ "}\n\n"
+ let setupheads = if (writerNumberSections options)
+ then "\\setupheads[sectionnumber=yes, style=\\bf]\n"
+ else "\\setupheads[sectionnumber=no, style=\\bf]\n"
+ let header = writerHeader options
+ return $ header ++ setupheads ++ titleblock ++ "\\starttext\n\\maketitle\n\n"
+
+-- escape things as needed for ConTeXt
+
+escapeCharForConTeXt :: Char -> String
+escapeCharForConTeXt ch =
+ case ch of
+ '{' -> "\\letteropenbrace{}"
+ '}' -> "\\letterclosebrace{}"
+ '\\' -> "\\letterbackslash{}"
+ '$' -> "\\$"
+ '|' -> "\\letterbar{}"
+ '^' -> "\\letterhat{}"
+ '%' -> "\\%"
+ '~' -> "\\lettertilde{}"
+ '&' -> "\\&"
+ '#' -> "\\#"
+ '<' -> "\\letterless{}"
+ '>' -> "\\lettermore{}"
+ '_' -> "\\letterunderscore{}"
+ x -> [x]
+
+-- | Escape string for ConTeXt
+stringToConTeXt :: String -> String
+stringToConTeXt = concatMap escapeCharForConTeXt
+
+-- | Convert Pandoc block element to ConTeXt.
+blockToConTeXt :: Block -> State WriterState String
+blockToConTeXt Null = return ""
+blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n")
+blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n")
+blockToConTeXt (BlockQuote lst) = do
+ contents <- blockListToConTeXt lst
+ return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n"
+blockToConTeXt (CodeBlock str) =
+ return $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n"
+blockToConTeXt (RawHtml str) = return ""
+blockToConTeXt (BulletList lst) = do
+ contents <- mapM listItemToConTeXt lst
+ return $ "\\startltxitem\n" ++ concat contents ++ "\\stopltxitem\n"
+blockToConTeXt (OrderedList attribs lst) = case attribs of
+ (1, DefaultStyle, DefaultDelim) -> do
+ contents <- mapM listItemToConTeXt lst
+ return $ "\\startltxenum\n" ++ concat contents ++ "\\stopltxenum\n"
+ _ -> do
+ let markers = take (length lst) $ orderedListMarkers attribs
+ contents <- zipWithM orderedListItemToConTeXt markers lst
+ let markerWidth = maximum $ map length markers
+ let markerWidth' = if markerWidth < 3
+ then ""
+ else "[width=" ++
+ show ((markerWidth + 2) `div` 2) ++ "em]"
+ return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++
+ "\\stopitemize\n"
+blockToConTeXt (DefinitionList lst) =
+ mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat
+blockToConTeXt HorizontalRule = return "\\thinrule\n\n"
+blockToConTeXt (Header level lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ if level > 0 && level <= 3
+ then "\\" ++ concat (replicate (level - 1) "sub") ++
+ "section{" ++ contents ++ "}\n\n"
+ else contents ++ "\n\n"
+blockToConTeXt (Table caption aligns widths heads rows) = do
+ let colWidths = map printDecimal widths
+ let colDescriptor colWidth alignment = (case alignment of
+ AlignLeft -> 'l'
+ AlignRight -> 'r'
+ AlignCenter -> 'c'
+ AlignDefault -> 'l'):
+ "p(" ++ colWidth ++ "\\textwidth)|"
+ let colDescriptors = "|" ++ (concat $
+ zipWith colDescriptor colWidths aligns)
+ headers <- tableRowToConTeXt heads
+ captionText <- inlineListToConTeXt caption
+ let captionText' = if null caption then "none" else captionText
+ rows' <- mapM tableRowToConTeXt rows
+ return $ "\\placetable[here]{" ++ captionText' ++ "}\n\\starttable[" ++
+ colDescriptors ++ "]\n" ++ "\\HL\n" ++ headers ++ "\\HL\n" ++
+ concat rows' ++ "\\HL\n\\stoptable\n\n"
+
+printDecimal :: Float -> String
+printDecimal = printf "%.2f"
+
+tableRowToConTeXt cols = do
+ cols' <- mapM blockListToConTeXt cols
+ return $ "\\NC " ++ (concat $ intersperse "\\NC " cols') ++ "\\NC\\AR\n"
+
+listItemToConTeXt list = do
+ contents <- blockListToConTeXt list
+ return $ "\\item " ++ contents
+
+orderedListItemToConTeXt marker list = do
+ contents <- blockListToConTeXt list
+ return $ "\\sym{" ++ marker ++ "} " ++ contents
+
+defListItemToConTeXt (term, def) = do
+ term' <- inlineListToConTeXt term
+ def' <- blockListToConTeXt def
+ return $ "\\startdescr{" ++ term' ++ "}\n" ++
+ def' ++ "\n\\stopdescr\n"
+
+-- | Convert list of block elements to ConTeXt.
+blockListToConTeXt :: [Block] -> State WriterState String
+blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat
+
+-- | Convert list of inline elements to ConTeXt.
+inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
+ -> State WriterState String
+inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat
+
+isQuoted :: Inline -> Bool
+isQuoted (Quoted _ _) = True
+isQuoted Apostrophe = True
+isQuoted _ = False
+
+-- | Convert inline element to ConTeXt
+inlineToConTeXt :: Inline -- ^ Inline to convert
+ -> State WriterState String
+inlineToConTeXt (Emph lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "{\\em " ++ contents ++ "}"
+inlineToConTeXt (Strong lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "{\\bf " ++ contents ++ "}"
+inlineToConTeXt (Strikeout lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\overstrikes{" ++ contents ++ "}"
+inlineToConTeXt (Superscript lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\high{" ++ contents ++ "}"
+inlineToConTeXt (Subscript lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\low{" ++ contents ++ "}"
+inlineToConTeXt (Code str) = return $ "\\type{" ++ str ++ "}"
+inlineToConTeXt (Quoted SingleQuote lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\quote{" ++ contents ++ "}"
+inlineToConTeXt (Quoted DoubleQuote lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\quotation{" ++ contents ++ "}"
+inlineToConTeXt Apostrophe = return "'"
+inlineToConTeXt EmDash = return "---"
+inlineToConTeXt EnDash = return "--"
+inlineToConTeXt Ellipses = return "\\ldots{}"
+inlineToConTeXt (Str str) = return $ stringToConTeXt str
+inlineToConTeXt (TeX str) = return str
+inlineToConTeXt (HtmlInline str) = return ""
+inlineToConTeXt (LineBreak) = return "\\crlf\n"
+inlineToConTeXt Space = return " "
+inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own
+ inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links...
+inlineToConTeXt (Link text (src, _)) = do
+ next <- get
+ put (next + 1)
+ let ref = show next
+ label <- inlineListToConTeXt text
+ return $ "\\useurl[" ++ ref ++ "][" ++ src ++ "][][" ++ label ++
+ "]\\from[" ++ ref ++ "]"
+inlineToConTeXt (Image alternate (src, tit)) = do
+ alt <- inlineListToConTeXt alternate
+ return $ "\\placefigure\n[]\n[fig:" ++ alt ++ "]\n{" ++
+ tit ++ "}\n{\\externalfigure[" ++ src ++ "]}"
+inlineToConTeXt (Note contents) = do
+ contents' <- blockListToConTeXt contents
+ return $ "\\footnote{" ++ contents' ++ "}"
+
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
new file mode 100644
index 000000000..13dc8585d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -0,0 +1,299 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docbook
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Docbook XML.
+-}
+module Text.Pandoc.Writers.Docbook ( writeDocbook) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Data.List ( isPrefixOf, drop )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+--
+-- code to format XML
+--
+
+-- | Escape one character as needed for XML.
+escapeCharForXML :: Char -> String
+escapeCharForXML x = case x of
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ '\160' -> "&nbsp;"
+ c -> [c]
+
+-- | True if the character needs to be escaped.
+needsEscaping :: Char -> Bool
+needsEscaping c = c `elem` "&<>\"\160"
+
+-- | Escape string as needed for XML. Entity references are not preserved.
+escapeStringForXML :: String -> String
+escapeStringForXML "" = ""
+escapeStringForXML str =
+ case break needsEscaping str of
+ (okay, "") -> okay
+ (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
+
+-- | Return a text object with a string of formatted XML attributes.
+attributeList :: [(String, String)] -> Doc
+attributeList = text . concatMap
+ (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++
+ escapeStringForXML b ++ "\"")
+
+-- | Put the supplied contents between start and end tags of tagType,
+-- with specified attributes and (if specified) indentation.
+inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
+inTags isIndented tagType attribs contents =
+ let openTag = char '<' <> text tagType <> attributeList attribs <>
+ char '>'
+ closeTag = text "</" <> text tagType <> char '>'
+ in if isIndented
+ then openTag $$ nest 2 contents $$ closeTag
+ else openTag <> contents <> closeTag
+
+-- | Return a self-closing tag of tagType with specified attributes
+selfClosingTag :: String -> [(String, String)] -> Doc
+selfClosingTag tagType attribs =
+ char '<' <> text tagType <> attributeList attribs <> text " />"
+
+-- | Put the supplied contents between start and end tags of tagType.
+inTagsSimple :: String -> Doc -> Doc
+inTagsSimple tagType = inTags False tagType []
+
+-- | Put the supplied contents in indented block btw start and end tags.
+inTagsIndented :: String -> Doc -> Doc
+inTagsIndented tagType = inTags True tagType []
+
+--
+-- Docbook writer
+--
+
+-- | Convert list of authors to a docbook <author> section
+authorToDocbook :: [Char] -> Doc
+authorToDocbook name = inTagsIndented "author" $
+ if ',' `elem` name
+ then -- last name first
+ let (lastname, rest) = break (==',') name
+ firstname = removeLeadingSpace rest in
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ else -- last name last
+ let namewords = words name
+ lengthname = length namewords
+ (firstname, lastname) = case lengthname of
+ 0 -> ("","")
+ 1 -> ("", name)
+ n -> (joinWithSep " " (take (n-1) namewords), last namewords)
+ in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+
+-- | Convert Pandoc document to string in Docbook format.
+writeDocbook :: WriterOptions -> Pandoc -> String
+writeDocbook opts (Pandoc (Meta title authors date) blocks) =
+ let head = if writerStandalone opts
+ then text (writerHeader opts)
+ else empty
+ meta = if writerStandalone opts
+ then inTagsIndented "articleinfo" $
+ (inTagsSimple "title" (wrap opts title)) $$
+ (vcat (map authorToDocbook authors)) $$
+ (inTagsSimple "date" (text $ escapeStringForXML date))
+ else empty
+ elements = hierarchicalize blocks
+ before = writerIncludeBefore opts
+ after = writerIncludeAfter opts
+ body = (if null before then empty else text before) $$
+ vcat (map (elementToDocbook opts) elements) $$
+ (if null after then empty else text after)
+ body' = if writerStandalone opts
+ then inTagsIndented "article" (meta $$ body)
+ else body
+ in render $ head $$ body' $$ text ""
+
+-- | Convert an Element to Docbook.
+elementToDocbook :: WriterOptions -> Element -> Doc
+elementToDocbook opts (Blk block) = blockToDocbook opts block
+elementToDocbook opts (Sec title elements) =
+ -- Docbook doesn't allow sections with no content, so insert some if needed
+ let elements' = if null elements
+ then [Blk (Para [])]
+ else elements
+ in inTagsIndented "section" $
+ inTagsSimple "title" (wrap opts title) $$
+ vcat (map (elementToDocbook opts) elements')
+
+-- | Convert a list of Pandoc blocks to Docbook.
+blocksToDocbook :: WriterOptions -> [Block] -> Doc
+blocksToDocbook opts = vcat . map (blockToDocbook opts)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+-- | Convert a list of pairs of terms and definitions into a list of
+-- Docbook varlistentrys.
+deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc
+deflistItemsToDocbook opts items =
+ vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items
+
+-- | Convert a term and a list of blocks into a Docbook varlistentry.
+deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc
+deflistItemToDocbook opts term def =
+ let def' = map plainToPara def
+ in inTagsIndented "varlistentry" $
+ inTagsIndented "term" (inlinesToDocbook opts term) $$
+ inTagsIndented "listitem" (blocksToDocbook opts def')
+
+-- | Convert a list of lists of blocks to a list of Docbook list items.
+listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
+listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items
+
+-- | Convert a list of blocks into a Docbook list item.
+listItemToDocbook :: WriterOptions -> [Block] -> Doc
+listItemToDocbook opts item =
+ inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item
+
+-- | Convert a Pandoc block element to Docbook.
+blockToDocbook :: WriterOptions -> Block -> Doc
+blockToDocbook opts Null = empty
+blockToDocbook opts (Plain lst) = wrap opts lst
+blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
+blockToDocbook opts (BlockQuote blocks) =
+ inTagsIndented "blockquote" $ blocksToDocbook opts blocks
+blockToDocbook opts (CodeBlock str) =
+ text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>"
+blockToDocbook opts (BulletList lst) =
+ inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
+blockToDocbook opts (OrderedList _ []) = empty
+blockToDocbook opts (OrderedList (start, numstyle, numdelim) (first:rest)) =
+ let attribs = case numstyle of
+ DefaultStyle -> []
+ Decimal -> [("numeration", "arabic")]
+ UpperAlpha -> [("numeration", "upperalpha")]
+ LowerAlpha -> [("numeration", "loweralpha")]
+ UpperRoman -> [("numeration", "upperroman")]
+ LowerRoman -> [("numeration", "lowerroman")]
+ items = if start == 1
+ then listItemsToDocbook opts (first:rest)
+ else (inTags True "listitem" [("override",show start)]
+ (blocksToDocbook opts $ map plainToPara first)) $$
+ listItemsToDocbook opts rest
+ in inTags True "orderedlist" attribs items
+blockToDocbook opts (DefinitionList lst) =
+ inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
+blockToDocbook opts (RawHtml str) = text str -- raw XML block
+blockToDocbook opts HorizontalRule = empty -- not semantic
+blockToDocbook opts (Table caption aligns widths headers rows) =
+ let alignStrings = map alignmentToString aligns
+ captionDoc = if null caption
+ then empty
+ else inTagsIndented "caption"
+ (inlinesToDocbook opts caption)
+ tableType = if isEmpty captionDoc then "informaltable" else "table"
+ in inTagsIndented tableType $ captionDoc $$
+ (colHeadsToDocbook opts alignStrings widths headers) $$
+ (vcat $ map (tableRowToDocbook opts alignStrings) rows)
+
+colHeadsToDocbook opts alignStrings widths headers =
+ let heads = zipWith3 (\align width item ->
+ tableItemToDocbook opts "th" align width item)
+ alignStrings widths headers
+ in inTagsIndented "tr" $ vcat heads
+
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToDocbook opts aligns cols = inTagsIndented "tr" $
+ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
+
+tableItemToDocbook opts tag align width item =
+ let attrib = [("align", align)] ++
+ if width /= 0
+ then [("style", "{width: " ++
+ show (truncate (100*width)) ++ "%;}")]
+ else []
+ in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
+
+-- | Take list of inline elements and return wrapped doc.
+wrap :: WriterOptions -> [Inline] -> Doc
+wrap opts lst = if writerWrapText opts
+ then fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
+ else inlinesToDocbook opts lst
+
+-- | Convert a list of inline elements to Docbook.
+inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
+inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
+
+-- | Convert an inline element to Docbook.
+inlineToDocbook :: WriterOptions -> Inline -> Doc
+inlineToDocbook opts (Str str) = text $ escapeStringForXML str
+inlineToDocbook opts (Emph lst) =
+ inTagsSimple "emphasis" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Strong lst) =
+ inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
+inlineToDocbook opts (Strikeout lst) =
+ inTags False "emphasis" [("role", "strikethrough")] $
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Superscript lst) =
+ inTagsSimple "superscript" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Subscript lst) =
+ inTagsSimple "subscript" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Quoted _ lst) =
+ inTagsSimple "quote" $ inlinesToDocbook opts lst
+inlineToDocbook opts Apostrophe = char '\''
+inlineToDocbook opts Ellipses = text "&#8230;"
+inlineToDocbook opts EmDash = text "&#8212;"
+inlineToDocbook opts EnDash = text "&#8211;"
+inlineToDocbook opts (Code str) =
+ inTagsSimple "literal" $ text (escapeStringForXML str)
+inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
+inlineToDocbook opts (HtmlInline str) = empty
+inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>"
+inlineToDocbook opts Space = char ' '
+inlineToDocbook opts (Link txt (src, tit)) =
+ if isPrefixOf "mailto:" src
+ then let src' = drop 7 src
+ emailLink = inTagsSimple "email" $ text $
+ escapeStringForXML $ src'
+ in if txt == [Code src']
+ then emailLink
+ else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
+ char ')'
+ else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
+inlineToDocbook opts (Image alt (src, tit)) =
+ let titleDoc = if null tit
+ then empty
+ else inTagsIndented "objectinfo" $
+ inTagsIndented "title" (text $ escapeStringForXML tit)
+ in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
+ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
+inlineToDocbook opts (Note contents) =
+ inTagsIndented "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
new file mode 100644
index 000000000..7ec95d8ef
--- /dev/null
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -0,0 +1,458 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.HTML
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to HTML.
+-}
+module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.ASCIIMathML
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.Pandoc.Shared
+import Text.Regex ( mkRegex, matchRegex )
+import Numeric ( showHex )
+import Data.Char ( ord, toLower )
+import Data.List ( isPrefixOf, intersperse )
+import qualified Data.Set as S
+import Control.Monad.State
+import Text.XHtml.Transitional
+
+data WriterState = WriterState
+ { stNotes :: [Html] -- ^ List of notes
+ , stIds :: [String] -- ^ List of header identifiers
+ , stMath :: Bool -- ^ Math is used in document
+ , stCSS :: S.Set String -- ^ CSS to include in header
+ } deriving Show
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState {stNotes= [], stIds = [],
+ stMath = False, stCSS = S.empty}
+
+-- Helpers to render HTML with the appropriate function.
+render opts = if writerWrapText opts then renderHtml else showHtml
+renderFragment opts = if writerWrapText opts
+ then renderHtmlFragment
+ else showHtmlFragment
+
+-- | Convert Pandoc document to Html string.
+writeHtmlString :: WriterOptions -> Pandoc -> String
+writeHtmlString opts =
+ if writerStandalone opts
+ then render opts . writeHtml opts
+ else renderFragment opts . writeHtml opts
+
+-- | Convert Pandoc document to Html structure.
+writeHtml :: WriterOptions -> Pandoc -> Html
+writeHtml opts (Pandoc (Meta tit authors date) blocks) =
+ let titlePrefix = writerTitlePrefix opts
+ topTitle = evalState (inlineListToHtml opts tit) defaultWriterState
+ topTitle' = if null titlePrefix
+ then topTitle
+ else titlePrefix +++ " - " +++ topTitle
+ metadata = thetitle topTitle' +++
+ meta ! [httpequiv "Content-Type",
+ content "text/html; charset=UTF-8"] +++
+ meta ! [name "generator", content "pandoc"] +++
+ (toHtmlFromList $
+ map (\a -> meta ! [name "author", content a]) authors) +++
+ (if null date
+ then noHtml
+ else meta ! [name "date", content date])
+ titleHeader = if writerStandalone opts && not (null tit) &&
+ not (writerS5 opts)
+ then h1 ! [theclass "title"] $ topTitle
+ else noHtml
+ headerBlocks = filter isHeaderBlock blocks
+ ids = uniqueIdentifiers $
+ map (\(Header _ lst) -> lst) headerBlocks
+ toc = if writerTableOfContents opts
+ then tableOfContents opts headerBlocks ids
+ else noHtml
+ (blocks', newstate) =
+ runState (blockListToHtml opts blocks)
+ (defaultWriterState {stIds = ids})
+ cssLines = stCSS newstate
+ css = if S.null cssLines
+ then noHtml
+ else style ! [thetype "text/css"] $ primHtml $
+ '\n':(unlines $ S.toList cssLines)
+ math = if stMath newstate
+ then case writerASCIIMathMLURL opts of
+ Just path -> script ! [src path,
+ thetype "text/javascript"] $
+ noHtml
+ Nothing -> primHtml asciiMathMLScript
+ else noHtml
+ head = header $ metadata +++ math +++ css +++
+ primHtml (writerHeader opts)
+ notes = reverse (stNotes newstate)
+ before = primHtml $ writerIncludeBefore opts
+ after = primHtml $ writerIncludeAfter opts
+ thebody = before +++ titleHeader +++ toc +++ blocks' +++
+ footnoteSection opts notes +++ after
+ in if writerStandalone opts
+ then head +++ body thebody
+ else thebody
+
+-- | Construct table of contents from list of header blocks and identifiers.
+-- Assumes there are as many identifiers as header blocks.
+tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
+tableOfContents _ [] _ = noHtml
+tableOfContents opts headers ids =
+ let opts' = opts { writerIgnoreNotes = True }
+ contentsTree = hierarchicalize headers
+ contents = evalState (mapM (elementToListItem opts') contentsTree)
+ (defaultWriterState {stIds = ids})
+ in thediv ! [identifier "toc"] $ unordList contents
+
+-- | Converts an Element to a list item for a table of contents,
+-- retrieving the appropriate identifier from state.
+elementToListItem :: WriterOptions -> Element -> State WriterState Html
+elementToListItem opts (Blk _) = return noHtml
+elementToListItem opts (Sec headerText subsecs) = do
+ st <- get
+ let ids = stIds st
+ let (id, rest) = if null ids
+ then ("", [])
+ else (head ids, tail ids)
+ put $ st {stIds = rest}
+ txt <- inlineListToHtml opts headerText
+ subHeads <- mapM (elementToListItem opts) subsecs
+ let subList = if null subHeads
+ then noHtml
+ else unordList subHeads
+ return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++
+ subList
+
+-- | Convert list of Note blocks to a footnote <div>.
+-- Assumes notes are sorted.
+footnoteSection :: WriterOptions -> [Html] -> Html
+footnoteSection opts notes =
+ if null notes
+ then noHtml
+ else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes)
+
+-- | Obfuscate a "mailto:" link using Javascript.
+obfuscateLink :: WriterOptions -> String -> String -> Html
+obfuscateLink opts text src =
+ let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$"
+ src' = map toLower src
+ in case (matchRegex emailRegex src') of
+ (Just [name, domain]) ->
+ let domain' = substitute "." " dot " domain
+ at' = obfuscateChar '@'
+ (linkText, altText) =
+ if text == drop 7 src' -- autolink
+ then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
+ else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
+ domain' ++ ")")
+ in if writerStrictMarkdown opts
+ then -- need to use primHtml or &'s are escaped to &amp; in URL
+ primHtml $ "<a href=\"" ++ (obfuscateString src')
+ ++ "\">" ++ (obfuscateString text) ++ "</a>"
+ else (script ! [thetype "text/javascript"] $
+ primHtml ("\n<!--\nh='" ++
+ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
+ obfuscateString name ++ "';e=n+a+h;\n" ++
+ "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
+ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
+ noscript (primHtml $ obfuscateString altText)
+ _ -> anchor ! [href src] $ primHtml text -- malformed email
+
+-- | Obfuscate character as entity.
+obfuscateChar :: Char -> String
+obfuscateChar char =
+ let num = ord char
+ numstr = if even num then show num else "x" ++ showHex num ""
+ in "&#" ++ numstr ++ ";"
+
+-- | Obfuscate string using entities.
+obfuscateString :: String -> String
+obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+ let c' = ord c
+ in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
+ c' >= 0xE000 && c' <= 0xE0FF
+ then True
+ else False
+
+-- | Add CSS for document header.
+addToCSS :: String -> State WriterState ()
+addToCSS item = do
+ st <- get
+ let current = stCSS st
+ put $ st {stCSS = S.insert item current}
+
+-- | Convert Pandoc inline list to plain text identifier.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier [] = ""
+inlineListToIdentifier (x:xs) =
+ xAsText ++ inlineListToIdentifier xs
+ where xAsText = case x of
+ Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
+ concat $ intersperse "-" $ words $ map toLower s
+ Emph lst -> inlineListToIdentifier lst
+ Strikeout lst -> inlineListToIdentifier lst
+ Superscript lst -> inlineListToIdentifier lst
+ Subscript lst -> inlineListToIdentifier lst
+ Strong lst -> inlineListToIdentifier lst
+ Quoted _ lst -> inlineListToIdentifier lst
+ Code s -> s
+ Space -> "-"
+ EmDash -> "-"
+ EnDash -> "-"
+ Apostrophe -> ""
+ Ellipses -> ""
+ LineBreak -> "-"
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> inlineListToIdentifier lst
+ Image lst _ -> inlineListToIdentifier lst
+ Note _ -> ""
+
+-- | Return unique identifiers for list of inline lists.
+uniqueIdentifiers :: [[Inline]] -> [String]
+uniqueIdentifiers ls =
+ let addIdentifier (nonuniqueIds, uniqueIds) l =
+ let new = inlineListToIdentifier l
+ matches = length $ filter (== new) nonuniqueIds
+ new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
+ in (new:nonuniqueIds, new':uniqueIds)
+ in reverse $ snd $ foldl addIdentifier ([],[]) ls
+
+-- | Convert Pandoc block element to HTML.
+blockToHtml :: WriterOptions -> Block -> State WriterState Html
+blockToHtml opts Null = return $ noHtml
+blockToHtml opts (Plain lst) = inlineListToHtml opts lst
+blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph)
+blockToHtml opts (RawHtml str) = return $ primHtml str
+blockToHtml opts (HorizontalRule) = return $ hr
+blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n")
+ -- the final \n for consistency with Markdown.pl
+blockToHtml opts (BlockQuote blocks) =
+ -- in S5, treat list in blockquote specially
+ -- if default is incremental, make it nonincremental;
+ -- otherwise incremental
+ if writerS5 opts
+ then let inc = not (writerIncremental opts) in
+ case blocks of
+ [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
+ (BulletList lst)
+ [OrderedList attribs lst] ->
+ blockToHtml (opts {writerIncremental = inc})
+ (OrderedList attribs lst)
+ otherwise -> blockListToHtml opts blocks >>=
+ (return . blockquote)
+ else blockListToHtml opts blocks >>= (return . blockquote)
+blockToHtml opts (Header level lst) = do
+ contents <- inlineListToHtml opts lst
+ st <- get
+ let ids = stIds st
+ let (id, rest) = if null ids
+ then ("", [])
+ else (head ids, tail ids)
+ put $ st {stIds = rest}
+ let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts)
+ then []
+ else [identifier id]
+ let contents' = if writerTableOfContents opts
+ then anchor ! [href ("#TOC-" ++ id)] $ contents
+ else contents
+ return $ case level of
+ 1 -> h1 contents' ! attribs
+ 2 -> h2 contents' ! attribs
+ 3 -> h3 contents' ! attribs
+ 4 -> h4 contents' ! attribs
+ 5 -> h5 contents' ! attribs
+ 6 -> h6 contents' ! attribs
+ _ -> paragraph contents' ! attribs
+blockToHtml opts (BulletList lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ unordList ! attribs $ contents
+blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ let attribs = (if writerIncremental opts
+ then [theclass "incremental"]
+ else []) ++
+ (if startnum /= 1
+ then [start startnum]
+ else []) ++
+ (if numstyle /= DefaultStyle
+ then [theclass numstyle']
+ else [])
+ if numstyle /= DefaultStyle
+ then addToCSS $ "ol." ++ numstyle' ++
+ " { list-style-type: " ++
+ numstyle' ++ "; }"
+ else return ()
+ return $ ordList ! attribs $ contents
+blockToHtml opts (DefinitionList lst) = do
+ contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term
+ def' <- blockListToHtml opts def
+ return $ (term', def')) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ defList ! attribs $ contents
+blockToHtml opts (Table capt aligns widths headers rows) = do
+ let alignStrings = map alignmentToString aligns
+ captionDoc <- if null capt
+ then return noHtml
+ else inlineListToHtml opts capt >>= return . caption
+ colHeads <- colHeadsToHtml opts alignStrings
+ widths headers
+ rows' <- mapM (tableRowToHtml opts alignStrings) rows
+ return $ table $ captionDoc +++ colHeads +++ rows'
+
+colHeadsToHtml opts alignStrings widths headers = do
+ heads <- sequence $ zipWith3
+ (\align width item -> tableItemToHtml opts th align width item)
+ alignStrings widths headers
+ return $ tr $ toHtmlFromList heads
+
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToHtml opts aligns cols =
+ (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>=
+ return . tr . toHtmlFromList
+
+tableItemToHtml opts tag align' width item = do
+ contents <- blockListToHtml opts item
+ let attrib = [align align'] ++
+ if width /= 0
+ then [thestyle ("width: " ++ show (truncate (100*width)) ++
+ "%;")]
+ else []
+ return $ tag ! attrib $ contents
+
+blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
+blockListToHtml opts lst =
+ mapM (blockToHtml opts) lst >>= return . toHtmlFromList
+
+-- | Convert list of Pandoc inline elements to HTML.
+inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
+inlineListToHtml opts lst =
+ mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
+
+-- | Convert Pandoc inline element to HTML.
+inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
+inlineToHtml opts inline =
+ case inline of
+ (Str str) -> return $ stringToHtml str
+ (Space) -> return $ stringToHtml " "
+ (LineBreak) -> return $ br
+ (EmDash) -> return $ primHtmlChar "mdash"
+ (EnDash) -> return $ primHtmlChar "ndash"
+ (Ellipses) -> return $ primHtmlChar "hellip"
+ (Apostrophe) -> return $ primHtmlChar "rsquo"
+ (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
+ (Strong lst) -> inlineListToHtml opts lst >>= return . strong
+ (Code str) -> return $ thecode << str
+ (Strikeout lst) -> addToCSS
+ ".strikeout { text-decoration: line-through; }" >>
+ inlineListToHtml opts lst >>=
+ return . (thespan ! [theclass "strikeout"])
+ (Superscript lst) -> inlineListToHtml opts lst >>= return . sup
+ (Subscript lst) -> inlineListToHtml opts lst >>= return . sub
+ (Quoted quoteType lst) ->
+ let (leftQuote, rightQuote) = case quoteType of
+ SingleQuote -> (primHtmlChar "lsquo",
+ primHtmlChar "rsquo")
+ DoubleQuote -> (primHtmlChar "ldquo",
+ primHtmlChar "rdquo")
+ in do contents <- inlineListToHtml opts lst
+ return $ leftQuote +++ contents +++ rightQuote
+ (TeX str) -> (if writerUseASCIIMathML opts
+ then modify (\st -> st {stMath = True})
+ else return ()) >> return (stringToHtml str)
+ (HtmlInline str) -> return $ primHtml str
+ (Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src ->
+ return $ obfuscateLink opts str src
+ (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do
+ linkText <- inlineListToHtml opts txt
+ return $ obfuscateLink opts (show linkText) src
+ (Link txt (src,tit)) -> do
+ linkText <- inlineListToHtml opts txt
+ return $ anchor ! ([href src] ++
+ if null tit then [] else [title tit]) $
+ linkText
+ (Image txt (source,tit)) -> do
+ alternate <- inlineListToHtml opts txt
+ let alternate' = renderFragment opts alternate
+ let attributes = [src source] ++
+ (if null tit
+ then []
+ else [title tit]) ++
+ if null txt
+ then []
+ else [alt alternate']
+ return $ image ! attributes
+ -- note: null title included, as in Markdown.pl
+ (Note contents) -> do
+ st <- get
+ let notes = stNotes st
+ let number = (length notes) + 1
+ let ref = show number
+ htmlContents <- blockListToNote opts ref contents
+ -- push contents onto front of notes
+ put $ st {stNotes = (htmlContents:notes)}
+ return $ anchor ! [href ("#fn" ++ ref),
+ theclass "footnoteRef",
+ identifier ("fnref" ++ ref)] <<
+ sup << ref
+
+blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
+blockListToNote opts ref blocks =
+ -- If last block is Para or Plain, include the backlink at the end of
+ -- that block. Otherwise, insert a new Plain block with the backlink.
+ let backlink = [HtmlInline $ " <a href=\"#fnref" ++ ref ++
+ "\" class=\"footnoteBackLink\"" ++
+ " title=\"Jump back to footnote " ++ ref ++ "\">&#8617;</a>"]
+ blocks' = if null blocks
+ then []
+ else let lastBlock = last blocks
+ otherBlocks = init blocks
+ in case lastBlock of
+ (Para lst) -> otherBlocks ++
+ [Para (lst ++ backlink)]
+ (Plain lst) -> otherBlocks ++
+ [Plain (lst ++ backlink)]
+ _ -> otherBlocks ++ [lastBlock,
+ Plain backlink]
+ in do contents <- blockListToHtml opts blocks'
+ return $ li ! [identifier ("fn" ++ ref)] $ contents
+
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
new file mode 100644
index 000000000..f64e06e24
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -0,0 +1,310 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.LaTeX
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into LaTeX.
+-}
+module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( (\\), isInfixOf, isSuffixOf, intersperse )
+import Data.Char ( toLower )
+import qualified Data.Set as S
+import Control.Monad.State
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+data WriterState =
+ WriterState { stIncludes :: S.Set String -- strings to include in header
+ , stInNote :: Bool -- @True@ if we're in a note
+ , stOLLevel :: Int } -- level of ordered list nesting
+
+-- | Add line to header.
+addToHeader :: String -> State WriterState ()
+addToHeader str = do
+ st <- get
+ let includes = stIncludes st
+ put st {stIncludes = S.insert str includes}
+
+-- | Convert Pandoc to LaTeX.
+writeLaTeX :: WriterOptions -> Pandoc -> String
+writeLaTeX options document =
+ render $ evalState (pandocToLaTeX options document) $
+ WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1 }
+
+pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToLaTeX options (Pandoc meta blocks) = do
+ main <- blockListToLaTeX blocks
+ head <- if writerStandalone options
+ then latexHeader options meta
+ else return empty
+ let before = if null (writerIncludeBefore options)
+ then empty
+ else text (writerIncludeBefore options)
+ let after = if null (writerIncludeAfter options)
+ then empty
+ else text (writerIncludeAfter options)
+ let body = before $$ main $$ after
+ let toc = if writerTableOfContents options
+ then text "\\tableofcontents\n"
+ else empty
+ let foot = if writerStandalone options
+ then text "\\end{document}"
+ else empty
+ return $ head $$ toc $$ body $$ foot
+
+-- | Insert bibliographic information into LaTeX header.
+latexHeader :: WriterOptions -- ^ Options, including LaTeX header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState Doc
+latexHeader options (Meta title authors date) = do
+ titletext <- if null title
+ then return empty
+ else inlineListToLaTeX title >>= return . inCmd "title"
+ headerIncludes <- get >>= return . S.toList . stIncludes
+ let extras = text $ unlines headerIncludes
+ let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes
+ then text "\\VerbatimFootnotes % allows verbatim text in footnotes"
+ else empty
+ let authorstext = text $ "\\author{" ++
+ joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}"
+ let datetext = if date == ""
+ then empty
+ else text $ "\\date{" ++ stringToLaTeX date ++ "}"
+ let maketitle = if null title then empty else text "\\maketitle"
+ let secnumline = if (writerNumberSections options)
+ then empty
+ else text "\\setcounter{secnumdepth}{0}"
+ let baseHeader = text $ writerHeader options
+ let header = baseHeader $$ extras
+ return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$
+ datetext $$ text "\\begin{document}" $$ maketitle $$ text ""
+
+-- escape things as needed for LaTeX
+
+stringToLaTeX :: String -> String
+stringToLaTeX = escapeStringUsing latexEscapes
+ where latexEscapes = backslashEscapes "{}$%&_#" ++
+ [ ('^', "\\^{}")
+ , ('\\', "\\textbackslash{}")
+ , ('~', "\\ensuremath{\\sim}")
+ , ('|', "\\textbar{}")
+ , ('<', "\\textless{}")
+ , ('>', "\\textgreater{}")
+ ]
+
+-- | Puts contents into LaTeX command.
+inCmd :: String -> Doc -> Doc
+inCmd cmd contents = char '\\' <> text cmd <> braces contents
+
+-- | Remove all code elements from list of inline elements
+-- (because it's illegal to have verbatim inside some command arguments)
+deVerb :: [Inline] -> [Inline]
+deVerb [] = []
+deVerb ((Code str):rest) =
+ (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest)
+deVerb (other:rest) = other:(deVerb rest)
+
+-- | Convert Pandoc block element to LaTeX.
+blockToLaTeX :: Block -- ^ Block to convert
+ -> State WriterState Doc
+blockToLaTeX Null = return empty
+blockToLaTeX (Plain lst) = wrapped inlineListToLaTeX lst >>= return
+blockToLaTeX (Para lst) =
+ wrapped inlineListToLaTeX lst >>= return . (<> char '\n')
+blockToLaTeX (BlockQuote lst) = do
+ contents <- blockListToLaTeX lst
+ return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}"
+blockToLaTeX (CodeBlock str) = do
+ st <- get
+ env <- if stInNote st
+ then do addToHeader "\\usepackage{fancyvrb}"
+ return "Verbatim"
+ else return "verbatim"
+ return $ text ("\\begin{" ++ env ++ "}\n") <> text str <>
+ text ("\n\\end{" ++ env ++ "}")
+blockToLaTeX (RawHtml str) = return empty
+blockToLaTeX (BulletList lst) = do
+ items <- mapM listItemToLaTeX lst
+ return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}"
+blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
+ st <- get
+ let oldlevel = stOLLevel st
+ put $ st {stOLLevel = oldlevel + 1}
+ items <- mapM listItemToLaTeX lst
+ modify (\st -> st {stOLLevel = oldlevel})
+ exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim
+ then do addToHeader "\\usepackage{enumerate}"
+ return $ char '[' <>
+ text (head (orderedListMarkers (1, numstyle,
+ numdelim))) <> char ']'
+ else return empty
+ let resetcounter = if start /= 1 && oldlevel <= 4
+ then text $ "\\setcounter{enum" ++
+ map toLower (toRomanNumeral oldlevel) ++
+ "}{" ++ show (start - 1) ++ "}"
+ else empty
+ return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$
+ vcat items $$ text "\\end{enumerate}"
+blockToLaTeX (DefinitionList lst) = do
+ items <- mapM defListItemToLaTeX lst
+ return $ text "\\begin{description}" $$ vcat items $$
+ text "\\end{description}"
+blockToLaTeX HorizontalRule = return $ text $
+ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n"
+blockToLaTeX (Header level lst) = do
+ txt <- inlineListToLaTeX (deVerb lst)
+ return $ if (level > 0) && (level <= 3)
+ then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++
+ "section{") <> txt <> text "}\n"
+ else txt <> char '\n'
+blockToLaTeX (Table caption aligns widths heads rows) = do
+ headers <- tableRowToLaTeX heads
+ captionText <- inlineListToLaTeX caption
+ rows' <- mapM tableRowToLaTeX rows
+ let colWidths = map (printf "%.2f") widths
+ let colDescriptors = concat $ zipWith
+ (\width align -> ">{\\PBS" ++
+ (case align of
+ AlignLeft -> "\\raggedright"
+ AlignRight -> "\\raggedleft"
+ AlignCenter -> "\\centering"
+ AlignDefault -> "\\raggedright") ++
+ "\\hspace{0pt}}p{" ++ width ++
+ "\\columnwidth}")
+ colWidths aligns
+ let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$
+ headers $$ text "\\hline" $$ vcat rows' $$
+ text "\\end{tabular}"
+ let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}"
+ addToHeader "\\usepackage{array}\n\
+ \% This is needed because raggedright in table elements redefines \\\\:\n\
+ \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
+ \\\let\\PBS=\\PreserveBackslash"
+ return $ if isEmpty captionText
+ then centered tableBody <> char '\n'
+ else text "\\begin{table}[h]" $$ centered tableBody $$
+ inCmd "caption" captionText $$ text "\\end{table}\n"
+
+blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat
+
+tableRowToLaTeX cols = mapM blockListToLaTeX cols >>=
+ return . ($$ text "\\\\") . foldl (\row item -> row $$
+ (if isEmpty row then empty else text " & ") <> item) empty
+
+listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item " $$) .
+ (nest 2)
+
+defListItemToLaTeX (term, def) = do
+ term' <- inlineListToLaTeX $ deVerb term
+ def' <- blockListToLaTeX def
+ return $ text "\\item[" <> term' <> text "]" $$ def'
+
+-- | Convert list of inline elements to LaTeX.
+inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
+ -> State WriterState Doc
+inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat
+
+isQuoted :: Inline -> Bool
+isQuoted (Quoted _ _) = True
+isQuoted Apostrophe = True
+isQuoted _ = False
+
+-- | Convert inline element to LaTeX
+inlineToLaTeX :: Inline -- ^ Inline to convert
+ -> State WriterState Doc
+inlineToLaTeX (Emph lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph"
+inlineToLaTeX (Strong lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf"
+inlineToLaTeX (Strikeout lst) = do
+ contents <- inlineListToLaTeX $ deVerb lst
+ addToHeader "\\usepackage[normalem]{ulem}"
+ return $ inCmd "sout" contents
+inlineToLaTeX (Superscript lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript"
+inlineToLaTeX (Subscript lst) = do
+ contents <- inlineListToLaTeX $ deVerb lst
+ -- oddly, latex includes \textsuperscript but not \textsubscript
+ -- so we have to define it:
+ addToHeader "\\newcommand{\\textsubscript}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}"
+ return $ inCmd "textsubscript" contents
+inlineToLaTeX (Code str) = do
+ st <- get
+ if stInNote st
+ then do addToHeader "\\usepackage{fancyvrb}"
+ else return ()
+ let chr = ((enumFromTo '!' '~') \\ str) !! 0
+ return $ text $ "\\verb" ++ [chr] ++ str ++ [chr]
+inlineToLaTeX (Quoted SingleQuote lst) = do
+ contents <- inlineListToLaTeX lst
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then text "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then text "\\,"
+ else empty
+ return $ char '`' <> s1 <> contents <> s2 <> char '\''
+inlineToLaTeX (Quoted DoubleQuote lst) = do
+ contents <- inlineListToLaTeX lst
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then text "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then text "\\,"
+ else empty
+ return $ text "``" <> s1 <> contents <> s2 <> text "''"
+inlineToLaTeX Apostrophe = return $ char '\''
+inlineToLaTeX EmDash = return $ text "---"
+inlineToLaTeX EnDash = return $ text "--"
+inlineToLaTeX Ellipses = return $ text "\\ldots{}"
+inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str
+inlineToLaTeX (TeX str) = return $ text str
+inlineToLaTeX (HtmlInline str) = return empty
+inlineToLaTeX (LineBreak) = return $ text "\\\\"
+inlineToLaTeX Space = return $ char ' '
+inlineToLaTeX (Link txt (src, _)) = do
+ addToHeader "\\usepackage[breaklinks=true]{hyperref}"
+ case txt of
+ [Code x] | x == src -> -- autolink
+ do addToHeader "\\usepackage{url}"
+ return $ text $ "\\url{" ++ x ++ "}"
+ _ -> do contents <- inlineListToLaTeX $ deVerb txt
+ return $ text ("\\href{" ++ src ++ "}{") <> contents <>
+ char '}'
+inlineToLaTeX (Image alternate (source, tit)) = do
+ addToHeader "\\usepackage{graphicx}"
+ return $ text $ "\\includegraphics{" ++ source ++ "}"
+inlineToLaTeX (Note contents) = do
+ st <- get
+ put (st {stInNote = True})
+ contents' <- blockListToLaTeX contents
+ modify (\st -> st {stInNote = False})
+ let rawnote = stripTrailingNewlines $ render contents'
+ -- note: a \n before } is needed when note ends with a Verbatim environment
+ let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote
+ return $ text "%\n\\footnote{" <>
+ text rawnote <> (if optNewline then char '\n' else empty) <> char '}'
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
new file mode 100644
index 000000000..8e14c2bf0
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -0,0 +1,293 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Man
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to groff man page format.
+
+-}
+module Text.Pandoc.Writers.Man ( writeMan) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( isPrefixOf, drop, nub, intersperse )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Preprocessors = [String] -- e.g. "t" for tbl
+type WriterState = (Notes, Preprocessors)
+
+-- | Convert Pandoc to Man.
+writeMan :: WriterOptions -> Pandoc -> String
+writeMan opts document = render $ evalState (pandocToMan opts document) ([],[])
+
+-- | Return groff man representation of document.
+pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToMan opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ let before' = if null before then empty else text before
+ let after' = if null after then empty else text after
+ (head, foot) <- metaToMan opts meta
+ body <- blockListToMan opts blocks
+ (notes, preprocessors) <- get
+ let preamble = if null preprocessors || not (writerStandalone opts)
+ then empty
+ else text $ ".\\\" " ++ concat (nub preprocessors)
+ notes' <- notesToMan opts (reverse notes)
+ return $ preamble $$ head $$ before' $$ body $$ notes' $$ foot $$ after'
+
+-- | Insert bibliographic information into Man header and footer.
+metaToMan :: WriterOptions -- ^ Options, including Man header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState (Doc, Doc)
+metaToMan options (Meta title authors date) = do
+ titleText <- inlineListToMan options title
+ let (cmdName, rest) = break (== ' ') $ render titleText
+ let (title', section) = case reverse cmdName of
+ (')':d:'(':xs) | d `elem` ['0'..'9'] ->
+ (text (reverse xs), char d)
+ xs -> (text (reverse xs), doubleQuotes empty)
+ let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $
+ splitBy '|' rest
+ let head = (text ".TH") <+> title' <+> section <+>
+ doubleQuotes (text date) <+> hsep extras
+ let foot = case length authors of
+ 0 -> empty
+ 1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors)
+ 2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors)
+ return $ if writerStandalone options
+ then (head, foot)
+ else (empty, empty)
+
+-- | Return man representation of notes.
+notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToMan opts notes =
+ if null notes
+ then return empty
+ else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
+ return . (text ".SH NOTES" $$) . vcat
+
+-- | Return man representation of a note.
+noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToMan opts num note = do
+ contents <- blockListToMan opts note
+ let marker = text "\n.SS [" <> text (show num) <> char ']'
+ return $ marker $$ contents
+
+-- | Association list of characters to escape.
+manEscapes :: [(Char, String)]
+manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\"
+
+-- | Escape special characters for Man.
+escapeString :: String -> String
+escapeString = escapeStringUsing manEscapes
+
+-- | Escape a literal (code) section for Man.
+escapeCode :: String -> String
+escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ")
+
+-- | Convert Pandoc block element to man.
+blockToMan :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToMan opts Null = return empty
+blockToMan opts (Plain inlines) =
+ wrapIfNeeded opts (inlineListToMan opts) inlines
+blockToMan opts (Para inlines) = do
+ contents <- wrapIfNeeded opts (inlineListToMan opts) inlines
+ return $ text ".PP" $$ contents
+blockToMan opts (RawHtml str) = return $ text str
+blockToMan opts HorizontalRule = return $ text $ ".PP\n * * * * *"
+blockToMan opts (Header level inlines) = do
+ contents <- inlineListToMan opts inlines
+ let heading = case level of
+ 1 -> ".SH "
+ _ -> ".SS "
+ return $ text heading <> contents
+blockToMan opts (CodeBlock str) = return $
+ text ".PP" $$ text "\\f[CR]" $$
+ text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]"
+blockToMan opts (BlockQuote blocks) = do
+ contents <- blockListToMan opts blocks
+ return $ text ".RS" $$ contents $$ text ".RE"
+blockToMan opts (Table caption alignments widths headers rows) =
+ let aligncode AlignLeft = "l"
+ aligncode AlignRight = "r"
+ aligncode AlignCenter = "c"
+ aligncode AlignDefault = "l"
+ in do
+ caption' <- inlineListToMan opts caption
+ modify (\(notes, preprocessors) -> (notes, "t":preprocessors))
+ let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths
+ -- 78n default width - 8n indent = 70n
+ let coldescriptions = text $ joinWithSep " "
+ (zipWith (\align width -> aligncode align ++ width)
+ alignments iwidths) ++ "."
+ colheadings <- mapM (blockListToMan opts) headers
+ let makeRow cols = text "T{" $$
+ (vcat $ intersperse (text "T}@T{") cols) $$
+ text "T}"
+ let colheadings' = makeRow colheadings
+ body <- mapM (\row -> do
+ cols <- mapM (blockListToMan opts) row
+ return $ makeRow cols) rows
+ return $ text ".PP" $$ caption' $$
+ text ".TS" $$ text "tab(@);" $$ coldescriptions $$
+ colheadings' $$ char '_' $$ vcat body $$ text ".TE"
+
+blockToMan opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMan opts) items
+ return (vcat contents)
+blockToMan opts (OrderedList attribs items) = do
+ let markers = take (length items) $ orderedListMarkers attribs
+ let indent = 1 + (maximum $ map length markers)
+ contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
+ zip markers items
+ return (vcat contents)
+blockToMan opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMan opts) items
+ return (vcat contents)
+
+-- | Convert bullet list item (list of blocks) to man.
+bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToMan opts [] = return empty
+bulletListItemToMan opts ((Para first):rest) =
+ bulletListItemToMan opts ((Plain first):rest)
+bulletListItemToMan opts ((Plain first):rest) = do
+ first' <- blockToMan opts (Plain first)
+ rest' <- blockListToMan opts rest
+ let first'' = text ".IP \\[bu] 2" $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 2" $$ rest' $$ text ".RE"
+ return (first'' $$ rest'')
+bulletListItemToMan opts (first:rest) = do
+ first' <- blockToMan opts first
+ rest' <- blockListToMan opts rest
+ return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
+
+-- | Convert ordered list item (a list of blocks) to man.
+orderedListItemToMan :: WriterOptions -- ^ options
+ -> String -- ^ order marker for list item
+ -> Int -- ^ number of spaces to indent
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToMan _ _ _ [] = return empty
+orderedListItemToMan opts num indent ((Para first):rest) =
+ orderedListItemToMan opts num indent ((Plain first):rest)
+orderedListItemToMan opts num indent (first:rest) = do
+ first' <- blockToMan opts first
+ rest' <- blockListToMan opts rest
+ let num' = printf ("%" ++ show (indent - 1) ++ "s") num
+ let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 4" $$ rest' $$ text ".RE"
+ return $ first'' $$ rest''
+
+-- | Convert definition list item (label, list of blocks) to man.
+definitionListItemToMan :: WriterOptions
+ -> ([Inline],[Block])
+ -> State WriterState Doc
+definitionListItemToMan opts (label, items) = do
+ labelText <- inlineListToMan opts label
+ contents <- if null items
+ then return empty
+ else do
+ let (first, rest) = case items of
+ ((Para x):y) -> (Plain x,y)
+ (x:y) -> (x,y)
+ rest' <- mapM (\item -> blockToMan opts item)
+ rest >>= (return . vcat)
+ first' <- blockToMan opts first
+ return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
+ return $ text ".TP\n.B " <> labelText $+$ contents
+
+-- | Convert list of Pandoc block elements to man.
+blockListToMan :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToMan opts blocks =
+ mapM (blockToMan opts) blocks >>= (return . vcat)
+
+-- | Convert list of Pandoc inline elements to man.
+inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
+
+-- | Convert Pandoc inline element to man.
+inlineToMan :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMan opts (Emph lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\f[I]" <> contents <> text "\\f[]"
+inlineToMan opts (Strong lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\f[B]" <> contents <> text "\\f[]"
+inlineToMan opts (Strikeout lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "[STRIKEOUT:" <> contents <> char ']'
+inlineToMan opts (Superscript lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '^' <> contents <> char '^'
+inlineToMan opts (Subscript lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '~' <> contents <> char '~'
+inlineToMan opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '`' <> contents <> char '\''
+inlineToMan opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\[lq]" <> contents <> text "\\[rq]"
+inlineToMan opts EmDash = return $ text "\\[em]"
+inlineToMan opts EnDash = return $ text "\\[en]"
+inlineToMan opts Apostrophe = return $ char '\''
+inlineToMan opts Ellipses = return $ text "\\&..."
+inlineToMan opts (Code str) =
+ return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]"
+inlineToMan opts (Str str) = return $ text $ escapeString str
+inlineToMan opts (TeX str) = return $ text $ escapeCode str
+inlineToMan opts (HtmlInline str) = return $ text $ escapeCode str
+inlineToMan opts (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n"
+inlineToMan opts Space = return $ char ' '
+inlineToMan opts (Link txt (src, _)) = do
+ linktext <- inlineListToMan opts txt
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ return $ if txt == [Code srcSuffix]
+ then char '<' <> text srcSuffix <> char '>'
+ else linktext <> text " (" <> text src <> char ')'
+inlineToMan opts (Image alternate (source, tit)) = do
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate == [Str source]) -- to prevent autolinks
+ then [Str "image"]
+ else alternate
+ linkPart <- inlineToMan opts (Link txt (source, tit))
+ return $ char '[' <> text "IMAGE: " <> linkPart <> char ']'
+inlineToMan opts (Note contents) = do
+ modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state
+ (notes, _) <- get
+ let ref = show $ (length notes)
+ return $ char '[' <> text ref <> char ']'
+
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
new file mode 100644
index 000000000..4cecaae5d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -0,0 +1,373 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Markdown
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to markdown-formatted plain text.
+
+Markdown: <http://daringfireball.net/projects/markdown/>
+-}
+module Text.Pandoc.Writers.Markdown ( writeMarkdown) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Blocks
+import Text.ParserCombinators.Parsec ( parse, (<|>), GenParser )
+import Data.List ( group, isPrefixOf, drop, find, intersperse )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Refs = KeyTable
+type WriterState = (Notes, Refs)
+
+-- | Convert Pandoc to Markdown.
+writeMarkdown :: WriterOptions -> Pandoc -> String
+writeMarkdown opts document =
+ render $ evalState (pandocToMarkdown opts document) ([],[])
+
+-- | Return markdown representation of document.
+pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToMarkdown opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ let before' = if null before then empty else text before
+ let after' = if null after then empty else text after
+ metaBlock <- metaToMarkdown opts meta
+ let head = if writerStandalone opts
+ then metaBlock $+$ text (writerHeader opts)
+ else empty
+ let headerBlocks = filter isHeaderBlock blocks
+ let toc = if writerTableOfContents opts
+ then tableOfContents opts headerBlocks
+ else empty
+ body <- blockListToMarkdown opts blocks
+ (notes, _) <- get
+ notes' <- notesToMarkdown opts (reverse notes)
+ (_, refs) <- get -- note that the notes may contain refs
+ refs' <- keyTableToMarkdown opts (reverse refs)
+ return $ head $+$ before' $+$ toc $+$ body $+$ text "" $+$
+ notes' $+$ text "" $+$ refs' $+$ after'
+
+-- | Return markdown representation of reference key table.
+keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
+keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
+
+-- | Return markdown representation of a reference key.
+keyToMarkdown :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+keyToMarkdown opts (label, (src, tit)) = do
+ label' <- inlineListToMarkdown opts label
+ let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ return $ text " " <> char '[' <> label' <> char ']' <> text ": " <>
+ text src <> tit'
+
+-- | Return markdown representation of notes.
+notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToMarkdown opts notes =
+ mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
+ return . vcat
+
+-- | Return markdown representation of a note.
+noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToMarkdown opts num blocks = do
+ contents <- blockListToMarkdown opts blocks
+ let marker = text "[^" <> text (show num) <> text "]:"
+ return $ hang marker (writerTabStop opts) contents
+
+-- | Escape special characters for Markdown.
+escapeString :: String -> String
+escapeString = escapeStringUsing markdownEscapes
+ where markdownEscapes = ('\160', "&nbsp;"):(backslashEscapes "`<\\*_^~")
+
+-- | Convert bibliographic information into Markdown header.
+metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc
+metaToMarkdown opts (Meta title authors date) = do
+ title' <- titleToMarkdown opts title
+ authors' <- authorsToMarkdown authors
+ date' <- dateToMarkdown date
+ return $ title' $+$ authors' $+$ date'
+
+titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+titleToMarkdown opts [] = return empty
+titleToMarkdown opts lst = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "% " <> contents
+
+authorsToMarkdown :: [String] -> State WriterState Doc
+authorsToMarkdown [] = return empty
+authorsToMarkdown lst = return $
+ text "% " <> text (joinWithSep ", " (map escapeString lst))
+
+dateToMarkdown :: String -> State WriterState Doc
+dateToMarkdown [] = return empty
+dateToMarkdown str = return $ text "% " <> text (escapeString str)
+
+-- | Construct table of contents from list of header blocks.
+tableOfContents :: WriterOptions -> [Block] -> Doc
+tableOfContents opts headers =
+ let opts' = opts { writerIgnoreNotes = True }
+ contents = BulletList $ map elementToListItem $ hierarchicalize headers
+ in evalState (blockToMarkdown opts' contents) ([],[])
+
+-- | Converts an Element to a list item for a table of contents,
+elementToListItem :: Element -> [Block]
+elementToListItem (Blk _) = []
+elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
+ if null subsecs
+ then []
+ else [BulletList $ map elementToListItem subsecs]
+
+-- | Ordered list start parser for use in Para below.
+olMarker :: GenParser Char st Char
+olMarker = do (start, style, delim) <- anyOrderedListMarker
+ if delim == Period &&
+ (style == UpperAlpha || (style == UpperRoman &&
+ start `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then spaceChar >> spaceChar
+ else spaceChar
+
+-- | True if string begins with an ordered list marker
+beginsWithOrderedListMarker :: String -> Bool
+beginsWithOrderedListMarker str =
+ case parse olMarker "para start" str of
+ Left _ -> False
+ Right _ -> True
+
+wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedMarkdown opts inlines = do
+ let chunks = splitBy LineBreak inlines
+ let chunks' = if null chunks
+ then []
+ else (map (++ [Str " "]) $ init chunks) ++ [last chunks]
+ lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks'
+ return $ vcat lns
+
+-- | Convert Pandoc block element to markdown.
+blockToMarkdown :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToMarkdown opts Null = return empty
+blockToMarkdown opts (Plain inlines) =
+ wrappedMarkdown opts inlines
+blockToMarkdown opts (Para inlines) = do
+ contents <- wrappedMarkdown opts inlines
+ -- escape if para starts with ordered list marker
+ let esc = if (not (writerStrictMarkdown opts)) &&
+ beginsWithOrderedListMarker (render contents)
+ then char '\\'
+ else empty
+ return $ esc <> contents <> text "\n"
+blockToMarkdown opts (RawHtml str) = return $ text str
+blockToMarkdown opts HorizontalRule = return $ text "\n* * * * *\n"
+blockToMarkdown opts (Header level inlines) = do
+ contents <- inlineListToMarkdown opts inlines
+ return $ text ((replicate level '#') ++ " ") <> contents <> text "\n"
+blockToMarkdown opts (CodeBlock str) = return $
+ (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
+blockToMarkdown opts (BlockQuote blocks) = do
+ contents <- blockListToMarkdown opts blocks
+ return $ (vcat $ map (text . ("> " ++)) $ lines $ render contents) <>
+ text "\n"
+blockToMarkdown opts (Table caption aligns widths headers rows) = do
+ caption' <- inlineListToMarkdown opts caption
+ let caption'' = if null caption
+ then empty
+ else text "" $+$ (text "Table: " <> caption')
+ headers' <- mapM (blockListToMarkdown opts) headers
+ let widthsInChars = map (floor . (78 *)) widths
+ let alignHeader alignment = case alignment of
+ AlignLeft -> leftAlignBlock
+ AlignCenter -> centerAlignBlock
+ AlignRight -> rightAlignBlock
+ AlignDefault -> leftAlignBlock
+ let makeRow = hsepBlocks . (zipWith alignHeader aligns) .
+ (zipWith docToBlock widthsInChars)
+ let head = makeRow headers'
+ rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row
+ return $ makeRow cols) rows
+ let tableWidth = sum widthsInChars
+ let maxRowHeight = maximum $ map heightOfBlock (head:rows')
+ let isMultilineTable = maxRowHeight > 1
+ let underline = hsep $
+ map (\width -> text $ replicate width '-') widthsInChars
+ let border = if isMultilineTable
+ then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-'
+ else empty
+ let spacer = if isMultilineTable
+ then text ""
+ else empty
+ let body = vcat $ intersperse spacer $ map blockToDoc rows'
+ return $ (nest 2 $ border $+$ (blockToDoc head) $+$ underline $+$ body $+$
+ border $+$ caption'') <> text "\n"
+blockToMarkdown opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMarkdown opts) items
+ return $ (vcat contents) <> text "\n"
+blockToMarkdown opts (OrderedList attribs items) = do
+ let markers = orderedListMarkers attribs
+ let markers' = map (\m -> if length m < 3
+ then m ++ replicate (3 - length m) ' '
+ else m) markers
+ contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
+ zip markers' items
+ return $ (vcat contents) <> text "\n"
+blockToMarkdown opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMarkdown opts) items
+ return $ (vcat contents) <> text "\n"
+
+-- | Convert bullet list item (list of blocks) to markdown.
+bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToMarkdown opts items = do
+ contents <- blockListToMarkdown opts items
+ return $ hang (text "- ") (writerTabStop opts) contents
+
+-- | Convert ordered list item (a list of blocks) to markdown.
+orderedListItemToMarkdown :: WriterOptions -- ^ options
+ -> String -- ^ list item marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToMarkdown opts marker items = do
+ contents <- blockListToMarkdown opts items
+ -- The complexities here are needed to ensure that if the list
+ -- marker is 4 characters or longer, the second and following
+ -- lines are indented 4 spaces but the list item begins after the marker.
+ return $ sep [nest (min (3 - length marker) 0) (text marker),
+ nest (writerTabStop opts) contents]
+
+-- | Convert definition list item (label, list of blocks) to markdown.
+definitionListItemToMarkdown :: WriterOptions
+ -> ([Inline],[Block])
+ -> State WriterState Doc
+definitionListItemToMarkdown opts (label, items) = do
+ labelText <- inlineListToMarkdown opts label
+ let tabStop = writerTabStop opts
+ let leader = char ':'
+ contents <- mapM (\item -> blockToMarkdown opts item >>=
+ (\txt -> return (leader $$ nest tabStop txt)))
+ items >>= return . vcat
+ return $ labelText $+$ contents
+
+-- | Convert list of Pandoc block elements to markdown.
+blockListToMarkdown :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToMarkdown opts blocks =
+ mapM (blockToMarkdown opts) blocks >>= return . vcat
+
+-- | Get reference for target; if none exists, create unique one and return.
+-- Prefer label if possible; otherwise, generate a unique key.
+getReference :: [Inline] -> Target -> State WriterState [Inline]
+getReference label (src, tit) = do
+ (_,refs) <- get
+ case find ((== (src, tit)) . snd) refs of
+ Just (ref, _) -> return ref
+ Nothing -> do
+ let label' = case find ((== label) . fst) refs of
+ Just _ -> -- label is used; generate numerical label
+ case find (\n -> not (any (== [Str (show n)])
+ (map fst refs))) [1..10000] of
+ Just x -> [Str (show x)]
+ Nothing -> error "no unique label"
+ Nothing -> label
+ modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
+ return label'
+
+-- | Convert list of Pandoc inline elements to markdown.
+inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToMarkdown opts lst =
+ mapM (inlineToMarkdown opts) lst >>= return . hcat
+
+-- | Convert Pandoc inline element to markdown.
+inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMarkdown opts (Emph lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '*' <> contents <> char '*'
+inlineToMarkdown opts (Strong lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "**" <> contents <> text "**"
+inlineToMarkdown opts (Strikeout lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "~~" <> contents <> text "~~"
+inlineToMarkdown opts (Superscript lst) = do
+ contents <- inlineListToMarkdown opts lst
+ let contents' = text $ substitute " " "\\ " $ render contents
+ return $ char '^' <> contents' <> char '^'
+inlineToMarkdown opts (Subscript lst) = do
+ contents <- inlineListToMarkdown opts lst
+ let contents' = text $ substitute " " "\\ " $ render contents
+ return $ char '~' <> contents' <> char '~'
+inlineToMarkdown opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '\'' <> contents <> char '\''
+inlineToMarkdown opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '"' <> contents <> char '"'
+inlineToMarkdown opts EmDash = return $ text "--"
+inlineToMarkdown opts EnDash = return $ char '-'
+inlineToMarkdown opts Apostrophe = return $ char '\''
+inlineToMarkdown opts Ellipses = return $ text "..."
+inlineToMarkdown opts (Code str) =
+ let tickGroups = filter (\s -> '`' `elem` s) $ group str
+ longest = if null tickGroups
+ then 0
+ else maximum $ map length tickGroups
+ marker = replicate (longest + 1) '`'
+ spacer = if (longest == 0) then "" else " " in
+ return $ text (marker ++ spacer ++ str ++ spacer ++ marker)
+inlineToMarkdown opts (Str str) = return $ text $ escapeString str
+inlineToMarkdown opts (TeX str) = return $ text str
+inlineToMarkdown opts (HtmlInline str) = return $ text str
+inlineToMarkdown opts (LineBreak) = return $ text " \n"
+inlineToMarkdown opts Space = return $ char ' '
+inlineToMarkdown opts (Link txt (src, tit)) = do
+ linktext <- inlineListToMarkdown opts txt
+ let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let useRefLinks = writerReferenceLinks opts
+ let useAuto = null tit && txt == [Code srcSuffix]
+ ref <- if useRefLinks then getReference txt (src, tit) else return []
+ reftext <- inlineListToMarkdown opts ref
+ return $ if useAuto
+ then char '<' <> text srcSuffix <> char '>'
+ else if useRefLinks
+ then let first = char '[' <> linktext <> char ']'
+ second = if txt == ref
+ then text "[]"
+ else char '[' <> reftext <> char ']'
+ in first <> second
+ else char '[' <> linktext <> char ']' <>
+ char '(' <> text src <> linktitle <> char ')'
+inlineToMarkdown opts (Image alternate (source, tit)) = do
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate == [Str source]) -- to prevent autolinks
+ then [Str "image"]
+ else alternate
+ linkPart <- inlineToMarkdown opts (Link txt (source, tit))
+ return $ char '!' <> linkPart
+inlineToMarkdown opts (Note contents) = do
+ modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state
+ (notes, _) <- get
+ let ref = show $ (length notes)
+ return $ text "[^" <> text ref <> char ']'
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
new file mode 100644
index 000000000..ddcbf95c0
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -0,0 +1,325 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.RST
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to reStructuredText.
+
+reStructuredText: <http://docutils.sourceforge.net/rst.html>
+-}
+module Text.Pandoc.Writers.RST ( writeRST) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Blocks
+import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Refs = KeyTable
+type WriterState = (Notes, Refs, Refs) -- first Refs is links, second pictures
+
+-- | Convert Pandoc to RST.
+writeRST :: WriterOptions -> Pandoc -> String
+writeRST opts document =
+ render $ evalState (pandocToRST opts document) ([],[],[])
+
+-- | Return RST representation of document.
+pandocToRST :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToRST opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ before' = if null before then empty else text before
+ after' = if null after then empty else text after
+ metaBlock <- metaToRST opts meta
+ let head = if (writerStandalone opts)
+ then metaBlock $+$ text (writerHeader opts)
+ else empty
+ body <- blockListToRST opts blocks
+ (notes, _, _) <- get
+ notes' <- notesToRST opts (reverse notes)
+ (_, refs, pics) <- get -- note that the notes may contain refs
+ refs' <- keyTableToRST opts (reverse refs)
+ pics' <- pictTableToRST opts (reverse pics)
+ return $ head $+$ before' $+$ body $+$ notes' $+$ text "" $+$ refs' $+$
+ pics' $+$ after'
+
+-- | Return RST representation of reference key table.
+keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
+keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat
+
+-- | Return RST representation of a reference key.
+keyToRST :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+keyToRST opts (label, (src, tit)) = do
+ label' <- inlineListToRST opts label
+ let label'' = if ':' `elem` (render label')
+ then char '`' <> label' <> char '`'
+ else label'
+ return $ text ".. _" <> label'' <> text ": " <> text src
+
+-- | Return RST representation of notes.
+notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToRST opts notes =
+ mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>=
+ return . vcat
+
+-- | Return RST representation of a note.
+noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToRST opts num note = do
+ contents <- blockListToRST opts note
+ let marker = text ".. [" <> text (show num) <> text "] "
+ return $ hang marker 3 contents
+
+-- | Return RST representation of picture reference table.
+pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
+pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat
+
+-- | Return RST representation of a picture substitution reference.
+pictToRST :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+pictToRST opts (label, (src, _)) = do
+ label' <- inlineListToRST opts label
+ return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <>
+ text src
+
+-- | Take list of inline elements and return wrapped doc.
+wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedRST opts inlines = mapM (wrapIfNeeded opts (inlineListToRST opts))
+ (splitBy LineBreak inlines) >>= return . vcat
+
+-- | Escape special characters for RST.
+escapeString :: String -> String
+escapeString = escapeStringUsing (backslashEscapes "`\\|*_")
+
+-- | Convert bibliographic information into RST header.
+metaToRST :: WriterOptions -> Meta -> State WriterState Doc
+metaToRST opts (Meta title authors date) = do
+ title' <- titleToRST opts title
+ authors' <- authorsToRST authors
+ date' <- dateToRST date
+ let toc = if writerTableOfContents opts
+ then text "" $+$ text ".. contents::"
+ else empty
+ return $ title' $+$ authors' $+$ date' $+$ toc
+
+titleToRST :: WriterOptions -> [Inline] -> State WriterState Doc
+titleToRST opts [] = return empty
+titleToRST opts lst = do
+ contents <- inlineListToRST opts lst
+ let titleLength = length $ render contents
+ let border = text (replicate titleLength '=')
+ return $ border $+$ contents $+$ border <> text "\n"
+
+authorsToRST :: [String] -> State WriterState Doc
+authorsToRST [] = return empty
+authorsToRST (first:rest) = do
+ rest' <- authorsToRST rest
+ return $ (text ":Author: " <> text first) $+$ rest'
+
+dateToRST :: String -> State WriterState Doc
+dateToRST [] = return empty
+dateToRST str = return $ text ":Date: " <> text (escapeString str)
+
+-- | Convert Pandoc block element to RST.
+blockToRST :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToRST opts Null = return empty
+blockToRST opts (Plain inlines) = wrappedRST opts inlines
+blockToRST opts (Para [TeX str]) =
+ let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
+ return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str')
+blockToRST opts (Para inlines) = do
+ contents <- wrappedRST opts inlines
+ return $ contents <> text "\n"
+blockToRST opts (RawHtml str) =
+ let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
+ return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str')
+blockToRST opts HorizontalRule = return $ text "--------------\n"
+blockToRST opts (Header level inlines) = do
+ contents <- inlineListToRST opts inlines
+ let headerLength = length $ render contents
+ let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
+ let border = text $ replicate headerLength headerChar
+ return $ contents $+$ border <> text "\n"
+blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$
+ (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
+blockToRST opts (BlockQuote blocks) = do
+ contents <- blockListToRST opts blocks
+ return $ (nest (writerTabStop opts) contents) <> text "\n"
+blockToRST opts (Table caption aligns widths headers rows) = do
+ caption' <- inlineListToRST opts caption
+ let caption'' = if null caption
+ then empty
+ else text "" $+$ (text "Table: " <> caption')
+ headers' <- mapM (blockListToRST opts) headers
+ let widthsInChars = map (floor . (78 *)) widths
+ let alignHeader alignment = case alignment of
+ AlignLeft -> leftAlignBlock
+ AlignCenter -> centerAlignBlock
+ AlignRight -> rightAlignBlock
+ AlignDefault -> leftAlignBlock
+ let hpipeBlocks blocks = hcatBlocks [beg, middle, end]
+ where height = maximum (map heightOfBlock blocks)
+ sep = TextBlock 3 height (replicate height " | ")
+ beg = TextBlock 2 height (replicate height "| ")
+ end = TextBlock 2 height (replicate height " |")
+ middle = hcatBlocks $ intersperse sep blocks
+ let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars
+ let head = makeRow headers'
+ rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row
+ return $ makeRow cols) rows
+ let tableWidth = sum widthsInChars
+ let maxRowHeight = maximum $ map heightOfBlock (head:rows')
+ let border ch = char '+' <> char ch <>
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ map (\l -> text $ replicate l ch) widthsInChars) <>
+ char ch <> char '+'
+ let body = vcat $ intersperse (border '-') $ map blockToDoc rows'
+ return $ border '-' $+$ blockToDoc head $+$ border '=' $+$ body $+$
+ border '-' $$ caption'' $$ text ""
+blockToRST opts (BulletList items) = do
+ contents <- mapM (bulletListItemToRST opts) items
+ -- ensure that sublists have preceding blank line
+ return $ text "" $+$ vcat contents <> text "\n"
+blockToRST opts (OrderedList (start, style, delim) items) = do
+ let markers = if start == 1 && style == DefaultStyle && delim == DefaultDelim
+ then take (length items) $ repeat "#."
+ else take (length items) $ orderedListMarkers
+ (start, style, delim)
+ let maxMarkerLength = maximum $ map length markers
+ let markers' = map (\m -> let s = maxMarkerLength - length m
+ in m ++ replicate s ' ') markers
+ contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $
+ zip markers' items
+ -- ensure that sublists have preceding blank line
+ return $ text "" $+$ vcat contents <> text "\n"
+blockToRST opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToRST opts) items
+ return $ (vcat contents) <> text "\n"
+
+-- | Convert bullet list item (list of blocks) to RST.
+bulletListItemToRST :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToRST opts items = do
+ contents <- blockListToRST opts items
+ return $ hang (text "- ") 3 contents
+
+-- | Convert ordered list item (a list of blocks) to RST.
+orderedListItemToRST :: WriterOptions -- ^ options
+ -> String -- ^ marker for list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToRST opts marker items = do
+ contents <- blockListToRST opts items
+ return $ hang (text marker) (length marker + 1) contents
+
+-- | Convert defintion list item (label, list of blocks) to RST.
+definitionListItemToRST :: WriterOptions -> ([Inline], [Block]) -> State WriterState Doc
+definitionListItemToRST opts (label, items) = do
+ label <- inlineListToRST opts label
+ contents <- blockListToRST opts items
+ return $ label $+$ nest (writerTabStop opts) contents
+
+-- | Convert list of Pandoc block elements to RST.
+blockListToRST :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToRST opts blocks =
+ mapM (blockToRST opts) blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to RST.
+inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat
+
+-- | Convert Pandoc inline element to RST.
+inlineToRST :: WriterOptions -> Inline -> State WriterState Doc
+inlineToRST opts (Emph lst) = do
+ contents <- inlineListToRST opts lst
+ return $ char '*' <> contents <> char '*'
+inlineToRST opts (Strong lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "**" <> contents <> text "**"
+inlineToRST opts (Strikeout lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "[STRIKEOUT:" <> contents <> char ']'
+inlineToRST opts (Superscript lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "\\ :sup:`" <> contents <> text "`\\ "
+inlineToRST opts (Subscript lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "\\ :sub:`" <> contents <> text "`\\ "
+inlineToRST opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToRST opts lst
+ return $ char '\'' <> contents <> char '\''
+inlineToRST opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToRST opts lst
+ return $ char '"' <> contents <> char '"'
+inlineToRST opts EmDash = return $ text "--"
+inlineToRST opts EnDash = return $ char '-'
+inlineToRST opts Apostrophe = return $ char '\''
+inlineToRST opts Ellipses = return $ text "..."
+inlineToRST opts (Code str) = return $ text $ "``" ++ str ++ "``"
+inlineToRST opts (Str str) = return $ text $ escapeString str
+inlineToRST opts (TeX str) = return $ text str
+inlineToRST opts (HtmlInline str) = return empty
+inlineToRST opts (LineBreak) = return $ char ' ' -- RST doesn't have linebreaks
+inlineToRST opts Space = return $ char ' '
+inlineToRST opts (Link [Code str] (src, tit)) | src == str ||
+ src == "mailto:" ++ str = do
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ return $ text srcSuffix
+inlineToRST opts (Link txt (src, tit)) = do
+ let useReferenceLinks = writerReferenceLinks opts
+ linktext <- inlineListToRST opts $ normalizeSpaces txt
+ if useReferenceLinks
+ then do (notes, refs, pics) <- get
+ let refs' = if (txt, (src, tit)) `elem` refs
+ then refs
+ else (txt, (src, tit)):refs
+ put (notes, refs', pics)
+ return $ char '`' <> linktext <> text "`_"
+ else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_"
+inlineToRST opts (Image alternate (source, tit)) = do
+ (notes, refs, pics) <- get
+ let labelsUsed = map fst pics
+ let txt = if null alternate || alternate == [Str ""] ||
+ alternate `elem` labelsUsed
+ then [Str $ "image" ++ show (length refs)]
+ else alternate
+ let pics' = if (txt, (source, tit)) `elem` pics
+ then pics
+ else (txt, (source, tit)):pics
+ put (notes, refs, pics')
+ label <- inlineListToRST opts txt
+ return $ char '|' <> label <> char '|'
+inlineToRST opts (Note contents) = do
+ -- add to notes in state
+ modify (\(notes, refs, pics) -> (contents:notes, refs, pics))
+ (notes, _, _) <- get
+ let ref = show $ (length notes)
+ return $ text " [" <> text ref <> text "]_"
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
new file mode 100644
index 000000000..3bd5c63b2
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -0,0 +1,286 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.RTF
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to RTF (rich text format).
+-}
+module Text.Pandoc.Writers.RTF ( writeRTF ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Regex ( matchRegexAll, mkRegex )
+import Data.List ( isSuffixOf )
+import Data.Char ( ord )
+
+-- | Convert Pandoc to a string in rich text format.
+writeRTF :: WriterOptions -> Pandoc -> String
+writeRTF options (Pandoc meta blocks) =
+ let head = if writerStandalone options
+ then rtfHeader (writerHeader options) meta
+ else ""
+ toc = if writerTableOfContents options
+ then tableOfContents $ filter isHeaderBlock blocks
+ else ""
+ foot = if writerStandalone options then "\n}\n" else ""
+ body = writerIncludeBefore options ++
+ concatMap (blockToRTF 0 AlignDefault) blocks ++
+ writerIncludeAfter options
+ in head ++ toc ++ body ++ foot
+
+-- | Construct table of contents from list of header blocks.
+tableOfContents :: [Block] -> String
+tableOfContents headers =
+ let contentsTree = hierarchicalize headers
+ in concatMap (blockToRTF 0 AlignDefault) $
+ [Header 1 [Str "Contents"],
+ BulletList (map elementToListItem contentsTree)]
+
+elementToListItem :: Element -> [Block]
+elementToListItem (Blk _) = []
+elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
+ if null subsecs
+ then []
+ else [BulletList (map elementToListItem subsecs)]
+
+-- | Convert unicode characters (> 127) into rich text format representation.
+handleUnicode :: String -> String
+handleUnicode [] = []
+handleUnicode (c:cs) =
+ if ord c > 127
+ then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs
+ else c:(handleUnicode cs)
+
+-- | Escape special characters.
+escapeSpecial :: String -> String
+escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}"))
+
+-- | Escape strings as needed for rich text format.
+stringToRTF :: String -> String
+stringToRTF = handleUnicode . escapeSpecial
+
+-- | Escape things as needed for code block in RTF.
+codeStringToRTF :: String -> String
+codeStringToRTF str = joinWithSep "\\line\n" $ lines (stringToRTF str)
+
+-- | Deal with raw LaTeX.
+latexToRTF :: String -> String
+latexToRTF str = "{\\cf1 " ++ (stringToRTF str) ++ "\\cf0 } "
+
+-- | Make a paragraph with first-line indent, block indent, and space after.
+rtfParSpaced :: Int -- ^ space after (in twips)
+ -> Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfParSpaced spaceAfter indent firstLineIndent alignment content =
+ let alignString = case alignment of
+ AlignLeft -> "\\ql "
+ AlignRight -> "\\qr "
+ AlignCenter -> "\\qc "
+ AlignDefault -> "\\ql "
+ in "{\\pard " ++ alignString ++
+ "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
+ " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
+
+-- | Default paragraph.
+rtfPar :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfPar = rtfParSpaced 180
+
+-- | Compact paragraph (e.g. for compact list items).
+rtfCompact :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfCompact = rtfParSpaced 0
+
+-- number of twips to indent
+indentIncrement = 720
+listIncrement = 360
+
+-- | Returns appropriate bullet list marker for indent level.
+bulletMarker :: Int -> String
+bulletMarker indent = case indent `mod` 720 of
+ 0 -> "\\bullet "
+ otherwise -> "\\endash "
+
+-- | Returns appropriate (list of) ordered list markers for indent level.
+orderedMarkers :: Int -> ListAttributes -> [String]
+orderedMarkers indent (start, style, delim) =
+ if style == DefaultStyle && delim == DefaultDelim
+ then case indent `mod` 720 of
+ 0 -> orderedListMarkers (start, Decimal, Period)
+ otherwise -> orderedListMarkers (start, LowerAlpha, Period)
+ else orderedListMarkers (start, style, delim)
+
+-- | Returns RTF header.
+rtfHeader :: String -- ^ header text
+ -> Meta -- ^ bibliographic information
+ -> String
+rtfHeader headerText (Meta title authors date) =
+ let titletext = if null title
+ then ""
+ else rtfPar 0 0 AlignCenter $
+ "\\b \\fs36 " ++ inlineListToRTF title
+ authorstext = if null authors
+ then ""
+ else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $
+ map stringToRTF authors))
+ datetext = if date == ""
+ then ""
+ else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
+ let spacer = if null (titletext ++ authorstext ++ datetext)
+ then ""
+ else rtfPar 0 0 AlignDefault "" in
+ headerText ++ titletext ++ authorstext ++ datetext ++ spacer
+
+-- | Convert Pandoc block element to RTF.
+blockToRTF :: Int -- ^ indent level
+ -> Alignment -- ^ alignment
+ -> Block -- ^ block to convert
+ -> String
+blockToRTF _ _ Null = ""
+blockToRTF indent alignment (Plain lst) =
+ rtfCompact indent 0 alignment $ inlineListToRTF lst
+blockToRTF indent alignment (Para lst) =
+ rtfPar indent 0 alignment $ inlineListToRTF lst
+blockToRTF indent alignment (BlockQuote lst) =
+ concatMap (blockToRTF (indent + indentIncrement) alignment) lst
+blockToRTF indent _ (CodeBlock str) =
+ rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
+blockToRTF _ _ (RawHtml str) = ""
+blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
+ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
+blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
+ zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
+blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
+ concatMap (definitionListItemToRTF alignment indent) lst
+blockToRTF indent _ HorizontalRule =
+ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
+blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $
+ "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
+blockToRTF indent alignment (Table caption aligns sizes headers rows) =
+ tableRowToRTF True indent aligns sizes headers ++
+ concatMap (tableRowToRTF False indent aligns sizes) rows ++
+ rtfPar indent 0 alignment (inlineListToRTF caption)
+
+tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String
+tableRowToRTF header indent aligns sizes cols =
+ let columns = concat $ zipWith (tableItemToRTF indent) aligns cols
+ totalTwips = 6 * 1440 -- 6 inches
+ rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
+ 0 sizes
+ cellDefs = map (\edge -> (if header
+ then "\\clbrdrb\\brdrs"
+ else "") ++ "\\cellx" ++ show edge)
+ rightEdges
+ start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
+ "\\trkeep\\intbl\n{\n"
+ end = "}\n\\intbl\\row}\n"
+ in start ++ columns ++ end
+
+tableItemToRTF :: Int -> Alignment -> [Block] -> String
+tableItemToRTF indent alignment item =
+ let contents = concatMap (blockToRTF indent alignment) item
+ in "{\\intbl " ++ contents ++ "\\cell}\n"
+
+-- | Ensure that there's the same amount of space after compact
+-- lists as after regular lists.
+spaceAtEnd :: String -> String
+spaceAtEnd str =
+ if isSuffixOf "\\par}\n" str
+ then (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
+ else str
+
+-- | Convert list item (list of blocks) to RTF.
+listItemToRTF :: Alignment -- ^ alignment
+ -> Int -- ^ indent level
+ -> String -- ^ list start marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> [Char]
+listItemToRTF alignment indent marker [] =
+ rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
+ (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
+listItemToRTF alignment indent marker list =
+ let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in
+ -- insert the list marker into the (processed) first block
+ let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of
+ Just (before, matched, after, _) ->
+ before ++ "\\fi" ++ show (0 - listIncrement) ++
+ " " ++ marker ++ "\\tx" ++
+ show listIncrement ++ "\\tab" ++ after
+ Nothing -> first in
+ modFirst ++ concat rest
+
+-- | Convert definition list item (label, list of blocks) to RTF.
+definitionListItemToRTF :: Alignment -- ^ alignment
+ -> Int -- ^ indent level
+ -> ([Inline],[Block]) -- ^ list item (list of blocks)
+ -> [Char]
+definitionListItemToRTF alignment indent (label, items) =
+ let labelText = blockToRTF indent alignment (Plain label)
+ itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items
+ in labelText ++ itemsText
+
+-- | Convert list of inline items to RTF.
+inlineListToRTF :: [Inline] -- ^ list of inlines to convert
+ -> String
+inlineListToRTF lst = concatMap inlineToRTF lst
+
+-- | Convert inline item to RTF.
+inlineToRTF :: Inline -- ^ inline to convert
+ -> String
+inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Quoted SingleQuote lst) =
+ "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
+inlineToRTF (Quoted DoubleQuote lst) =
+ "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
+inlineToRTF Apostrophe = "\\u8217'"
+inlineToRTF Ellipses = "\\u8230?"
+inlineToRTF EmDash = "\\u8212-"
+inlineToRTF EnDash = "\\u8211-"
+inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} "
+inlineToRTF (Str str) = stringToRTF str
+inlineToRTF (TeX str) = latexToRTF str
+inlineToRTF (HtmlInline str) = ""
+inlineToRTF (LineBreak) = "\\line "
+inlineToRTF Space = " "
+inlineToRTF (Link text (src, tit)) =
+ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
+ "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n"
+inlineToRTF (Image alternate (source, tit)) =
+ "{\\cf1 [image: " ++ source ++ "]\\cf0}"
+inlineToRTF (Note contents) =
+ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
+ (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"
diff --git a/src/headers/ConTeXtHeader b/src/headers/ConTeXtHeader
new file mode 100644
index 000000000..41648081c
--- /dev/null
+++ b/src/headers/ConTeXtHeader
@@ -0,0 +1,61 @@
+\enableregime[utf] % use UTF-8
+
+\setupcolors[state=start]
+\setupinteraction[state=start, color=middlered] % needed for hyperlinks
+
+\setuppapersize[letter][letter] % use letter paper
+\setuplayout[width=middle, backspace=1.5in, cutspace=1.5in,
+ height=middle, header=0.75in, footer=0.75in] % page layout
+\setuppagenumbering[location={footer,center}] % number pages
+\setupbodyfont[11pt] % 11pt font
+\setupwhitespace[medium] % inter-paragraph spacing
+
+\setuphead[section][style=\tfc]
+\setuphead[subsection][style=\tfb]
+\setuphead[subsubsection][style=\bf]
+
+% define title block commands
+\unprotect
+\def\doctitle#1{\gdef\@title{#1}}
+\def\author#1{\gdef\@author{#1}}
+\def\date#1{\gdef\@date{#1}}
+\date{\currentdate} % Default to today unless specified otherwise.
+\def\maketitle{%
+ \startalignment[center]
+ \blank[2*big]
+ {\tfd \@title}
+ \blank[3*medium]
+ {\tfa \@author}
+ \blank[2*medium]
+ {\tfa \@date}
+ \blank[3*medium]
+ \stopalignment}
+\protect
+
+% define descr (for definition lists)
+\definedescription[descr][
+ headstyle=bold,style=normal,align=left,location=hanging,
+ width=broad,margin=1cm]
+
+% define ltxitem (for bulleted lists)
+\defineitemgroup[ltxitem][levels=4]
+\setupitemgroup[ltxitem][1][1]
+\setupitemgroup[ltxitem][2][2]
+\setupitemgroup[ltxitem][3][3]
+\setupitemgroup[ltxitem][4][4,packed]
+
+% define ltxenum (for enumerated lists)
+\defineitemgroup[ltxenum][levels=4]
+\setupitemgroup[ltxenum][1][n]
+\setupitemgroup[ltxenum][2][a]
+\setupitemgroup[ltxenum][3][r]
+\setupitemgroup[ltxenum][4][A,packed]
+
+\setupthinrules[width=15em] % width of horizontal rules
+
+% for block quotations
+\definestartstop [blockquote]
+ [before={\startnarrower\switchtobodyfont[11pt]
+ \whitespace\setupindenting[no]},
+ after={\stopnarrower\whitespace}]
+
diff --git a/src/headers/DocbookHeader b/src/headers/DocbookHeader
new file mode 100644
index 000000000..7b26b2c73
--- /dev/null
+++ b/src/headers/DocbookHeader
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+ "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
diff --git a/src/headers/LaTeXHeader b/src/headers/LaTeXHeader
new file mode 100644
index 000000000..d891b5f63
--- /dev/null
+++ b/src/headers/LaTeXHeader
@@ -0,0 +1,5 @@
+\documentclass{article}
+\usepackage[mathletters]{ucs}
+\usepackage[utf8x]{inputenc}
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{6pt plus 2pt minus 1pt}
diff --git a/src/headers/RTFHeader b/src/headers/RTFHeader
new file mode 100644
index 000000000..b4368694b
--- /dev/null
+++ b/src/headers/RTFHeader
@@ -0,0 +1,4 @@
+{\rtf1\ansi\deff0{\fonttbl{\f0 \fswiss Helvetica;}{\f1 Courier;}}
+{\colortbl;\red255\green0\blue0;\red0\green0\blue255;}
+\widowctrl\hyphauto
+
diff --git a/src/headers/S5Header b/src/headers/S5Header
new file mode 100644
index 000000000..ebb24ebe2
--- /dev/null
+++ b/src/headers/S5Header
@@ -0,0 +1,3 @@
+<!-- configuration parameters -->
+<meta name="defaultView" content="slideshow" />
+<meta name="controlVis" content="hidden" />
diff --git a/src/templates/ASCIIMathML.hs b/src/templates/ASCIIMathML.hs
new file mode 100644
index 000000000..1d04c6ff7
--- /dev/null
+++ b/src/templates/ASCIIMathML.hs
@@ -0,0 +1,7 @@
+-- | Definitions for use of ASCIIMathML in HTML.
+-- (See <http://www1.chapman.edu/~jipsen/mathml/asciimath.html>.)
+module Text.Pandoc.ASCIIMathML ( asciiMathMLScript ) where
+
+-- | String containing ASCIIMathML javascript.
+asciiMathMLScript :: String
+asciiMathMLScript = "<script type=\"text/javascript\">\n@ASCIIMathML.js@</script>\n"
diff --git a/src/templates/DefaultHeaders.hs b/src/templates/DefaultHeaders.hs
new file mode 100644
index 000000000..1bd9fe1d2
--- /dev/null
+++ b/src/templates/DefaultHeaders.hs
@@ -0,0 +1,52 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.DefaultHeaders
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Default headers for Pandoc writers.
+-}
+module Text.Pandoc.Writers.DefaultHeaders (
+ defaultLaTeXHeader,
+ defaultConTeXtHeader,
+ defaultDocbookHeader,
+ defaultS5Header,
+ defaultRTFHeader
+ ) where
+import Text.Pandoc.Writers.S5
+
+defaultLaTeXHeader :: String
+defaultLaTeXHeader = "@LaTeXHeader@"
+
+defaultConTeXtHeader :: String
+defaultConTeXtHeader = "@ConTeXtHeader@"
+
+defaultDocbookHeader :: String
+defaultDocbookHeader = "@DocbookHeader@"
+
+defaultS5Header :: String
+defaultS5Header = s5Meta ++ s5CSS ++ s5Javascript
+
+defaultRTFHeader :: String
+defaultRTFHeader = "@RTFHeader@"
diff --git a/src/templates/Makefile b/src/templates/Makefile
new file mode 100644
index 000000000..9522666c5
--- /dev/null
+++ b/src/templates/Makefile
@@ -0,0 +1,20 @@
+VPATH := ..
+PROCESSOR := ./fillTemplates.pl
+TARGETS := Text/Pandoc/ASCIIMathML.hs \
+ Text/Pandoc/Writers/S5.hs \
+ Text/Pandoc/Writers/DefaultHeaders.hs
+
+all: $(TARGETS)
+
+Text/Pandoc/ASCIIMathML.hs: ASCIIMathML.hs $(PROCESSOR) $(VPATH)/ASCIIMathML.js
+ perl $(PROCESSOR) $@ $(VPATH)
+
+Text/Pandoc/Writers/S5.hs: S5.hs $(PROCESSOR) $(VPATH)/ui/default/*
+ perl $(PROCESSOR) $@ $(VPATH)
+
+Text/Pandoc/Writers/DefaultHeaders.hs: DefaultHeaders.hs $(PROCESSOR) $(VPATH)/headers/*
+ perl $(PROCESSOR) $@ $(VPATH)
+
+.PHONY: clean
+clean:
+ cd $(VPATH); rm -f $(TARGETS)
diff --git a/src/templates/S5.hs b/src/templates/S5.hs
new file mode 100644
index 000000000..a0b69b132
--- /dev/null
+++ b/src/templates/S5.hs
@@ -0,0 +1,133 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.S5
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definitions for creation of S5 powerpoint-like HTML.
+(See <http://meyerweb.com/eric/tools/s5/>.)
+-}
+module Text.Pandoc.Writers.S5 (
+ -- * Strings
+ s5Meta,
+ s5Javascript,
+ s5CSS,
+ s5Links,
+ -- * Functions
+ writeS5,
+ writeS5String,
+ insertS5Structure
+ ) where
+import Text.Pandoc.Shared ( joinWithSep, WriterOptions )
+import Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString )
+import Text.Pandoc.Definition
+import Text.XHtml.Strict
+
+s5Meta :: String
+s5Meta = "<!-- configuration parameters -->\n<meta name=\"defaultView\" content=\"slideshow\" />\n<meta name=\"controlVis\" content=\"hidden\" />\n"
+
+s5Javascript :: String
+s5Javascript = "<script type=\"text/javascript\">\n@slides.js@</script>\n"
+
+s5CoreCSS :: String
+s5CoreCSS = "@s5-core.css@"
+
+s5FramingCSS :: String
+s5FramingCSS = "@framing.css@"
+
+s5PrettyCSS :: String
+s5PrettyCSS = "@pretty.css@"
+
+s5OperaCSS :: String
+s5OperaCSS = "@opera.css@"
+
+s5OutlineCSS :: String
+s5OutlineCSS = "@outline.css@"
+
+s5PrintCSS :: String
+s5PrintCSS = "@print.css@"
+
+s5CSS :: String
+s5CSS = "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n"
+
+s5Links :: String
+s5Links = "<!-- style sheet links -->\n<link rel=\"stylesheet\" href=\"ui/default/slides.css\" type=\"text/css\" media=\"projection\" id=\"slideProj\" />\n<link rel=\"stylesheet\" href=\"ui/default/outline.css\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" />\n<link rel=\"stylesheet\" href=\"ui/default/print.css\" type=\"text/css\" media=\"print\" id=\"slidePrint\" />\n<link rel=\"stylesheet\" href=\"ui/default/opera.css\" type=\"text/css\" media=\"projection\" id=\"operaFix\" />\n<!-- S5 JS -->\n<script src=\"ui/default/slides.js\" type=\"text/javascript\"></script>\n"
+
+-- | Converts Pandoc document to an S5 HTML presentation (Html structure).
+writeS5 :: WriterOptions -> Pandoc -> Html
+writeS5 options = (writeHtml options) . insertS5Structure
+
+-- | Converts Pandoc document to an S5 HTML presentation (string).
+writeS5String :: WriterOptions -> Pandoc -> String
+writeS5String options = (writeHtmlString options) . insertS5Structure
+
+-- | Inserts HTML needed for an S5 presentation (e.g. around slides).
+layoutDiv :: [Inline] -- ^ Title of document (for header or footer)
+ -> String -- ^ Date of document (for header or footer)
+ -> [Block] -- ^ List of block elements returned
+layoutDiv title date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 [Str date]), (Header 2 title), (RawHtml "</div>\n</div>\n")]
+
+presentationStart = RawHtml "<div class=\"presentation\">\n\n"
+
+presentationEnd = RawHtml "</div>\n"
+
+slideStart = RawHtml "<div class=\"slide\">\n"
+
+slideEnd = RawHtml "</div>\n"
+
+-- | Returns 'True' if block is a Header 1.
+isH1 :: Block -> Bool
+isH1 (Header 1 _) = True
+isH1 _ = False
+
+-- | Insert HTML around sections to make individual slides.
+insertSlides :: Bool -> [Block] -> [Block]
+insertSlides beginning blocks =
+ let (beforeHead, rest) = break isH1 blocks in
+ if (null rest) then
+ if beginning then
+ beforeHead
+ else
+ beforeHead ++ [slideEnd]
+ else
+ if beginning then
+ beforeHead ++
+ slideStart:(head rest):(insertSlides False (tail rest))
+ else
+ beforeHead ++
+ slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
+
+-- | Insert blocks into 'Pandoc' for slide structure.
+insertS5Structure :: Pandoc -> Pandoc
+insertS5Structure (Pandoc meta []) = Pandoc meta []
+insertS5Structure (Pandoc (Meta title authors date) blocks) =
+ let slides = insertSlides True blocks
+ firstSlide = if not (null title)
+ then [slideStart, (Header 1 title),
+ (Header 3 [Str (joinWithSep ", " authors)]),
+ (Header 4 [Str date]), slideEnd]
+ else []
+ newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++
+ slides ++ [presentationEnd]
+ in Pandoc (Meta title authors date) newBlocks
diff --git a/src/templates/fillTemplates.pl b/src/templates/fillTemplates.pl
new file mode 100755
index 000000000..85e6eaa18
--- /dev/null
+++ b/src/templates/fillTemplates.pl
@@ -0,0 +1,131 @@
+#!/usr/bin/env perl
+# Fills templates with haskell-escaped strings slurped from input files.
+# Takes two arguments, the first specifying the pathname of the target
+# relative to the root directory, the second specifying the root directory
+# (defaulting to ..). The template is assumed to have the same base name
+# as the target and to reside in the templates subdirectory of the root
+# directory.
+
+use strict;
+use warnings;
+
+# Utility routines:
+
+sub slurp {
+ open FILE, $_[0] or die "couldn't open file '$_[0]': $!";
+ my $contents = do { local $/; <FILE>;};
+ close FILE;
+ return $contents;
+}
+
+sub escape_for_haskell {
+ my ($contents) = @_;
+
+ $contents =~ s/\\/\\\\/g;
+ $contents =~ s/\t/\\t/g;
+ $contents =~ s/"/\\"/g;
+ $contents =~ s/\n/\\n/g;
+ return $contents;
+}
+
+# Template processors.
+
+my %processor = (
+ # --------------------------------------------------------------------------
+ 'Text/Pandoc/Writers/S5.hs' => {
+ # --------------------------------------------------------------------------
+ proc => sub {
+ my ($template) = @_;
+
+ my (@files) = qw(slides.js s5-core.css framing.css pretty.css
+ opera.css outline.css print.css);
+
+ foreach my $file (@files) {
+ my $replacement = escape_for_haskell(slurp "ui/default/$file");
+ my $escapedfile = $file;
+ $escapedfile =~ s/\./\\./g;
+ $template =~ s/\@$escapedfile\@/$replacement/;
+ }
+ return $template;
+ },
+ },
+ # --------------------------------------------------------------------------
+ 'Text/Pandoc/ASCIIMathML.hs' => {
+ # --------------------------------------------------------------------------
+ proc => sub {
+ my ($template) = @_;
+
+ my $script = escape_for_haskell(slurp "ASCIIMathML.js");
+ my $acknowledgements =
+ " ASCIIMathML.js - copyright Peter Jipsen,".
+ " released under the GPL\\nSee ".
+ "http://www1.chapman.edu/~jipsen/mathml/asciimath.html/ ";
+ $script =~ s/\/\*.*?\*\//\/\*$acknowledgements\*\//g; # strip comments
+ $template =~ s/\@ASCIIMathML\.js@/$script/;
+
+ return $template;
+ },
+ },
+ # --------------------------------------------------------------------------
+ 'Text/Pandoc/Writers/DefaultHeaders.hs' => {
+ # --------------------------------------------------------------------------
+ proc => sub {
+ my ($template) = @_;
+
+ my (@headers) = split(/\s/,`ls headers`);
+ foreach my $header (@headers) {
+ my ($replacement) = escape_for_haskell(slurp "headers/$header");
+ $template =~ s/\@$header\@/$replacement/;
+ }
+
+ return $template;
+ },
+ },
+ # --------------------------------------------------------------------------
+ # 'foo/bar/baz' => {
+ # --------------------------------------------------------------------------
+ # template => 'optional-template-filename-defaults-to-baz'
+ # proc => sub {
+ # my ($template) = @_;
+ # # Process.
+ # return $template;
+ # },
+ #},
+);
+
+# Main.
+
+my $target = shift @ARGV;
+if (!defined $target || !length $target) {
+ print STDERR "Available targets:\n\n" . join "\n", keys %processor;
+ die "\n\nYou must supply a target!\n";
+}
+
+die "No processor exists for '$target'!\n" if ! exists $processor{$target};
+
+my $rootdir = shift @ARGV || '..';
+chdir $rootdir or die "Couldn't chdir to '$rootdir': $!";
+
+my $template;
+if (exists $processor{$target}->{template}) {
+ $template = $processor{$target}->{template};
+}
+else {
+ ($template = $target) =~ s!.*/+!!;
+}
+$template = "templates/$template";
+die "No template exists for '$target'!\n" if ! -f "$template";
+
+open OUTFILE, ">$target" or die "couldn't open file '$target': $!";
+print OUTFILE <<END;
+----------------------------------------------------
+-- Do not edit this file by hand. Edit
+-- '$template'
+-- and run $0 $target
+----------------------------------------------------
+
+END
+
+print OUTFILE $processor{$target}->{proc}->(slurp($template));
+print OUTFILE "\n";
+close OUTFILE;
diff --git a/src/ui/default/blank.gif b/src/ui/default/blank.gif
new file mode 100644
index 000000000..75b945d25
--- /dev/null
+++ b/src/ui/default/blank.gif
Binary files differ
diff --git a/src/ui/default/bodybg.gif b/src/ui/default/bodybg.gif
new file mode 100644
index 000000000..5f448a16f
--- /dev/null
+++ b/src/ui/default/bodybg.gif
Binary files differ
diff --git a/src/ui/default/framing.css b/src/ui/default/framing.css
new file mode 100644
index 000000000..14d8509e9
--- /dev/null
+++ b/src/ui/default/framing.css
@@ -0,0 +1,23 @@
+/* The following styles size, place, and layer the slide components.
+ Edit these if you want to change the overall slide layout.
+ The commented lines can be uncommented (and modified, if necessary)
+ to help you with the rearrangement process. */
+
+/* target = 1024x768 */
+
+div#header, div#footer, .slide {width: 100%; top: 0; left: 0;}
+div#header {top: 0; height: 3em; z-index: 1;}
+div#footer {top: auto; bottom: 0; height: 2.5em; z-index: 5;}
+.slide {top: 0; width: 92%; padding: 3.5em 4% 4%; z-index: 2; list-style: none;}
+div#controls {left: 50%; bottom: 0; width: 50%; z-index: 100;}
+div#controls form {position: absolute; bottom: 0; right: 0; width: 100%;
+ margin: 0;}
+#currentSlide {position: absolute; width: 10%; left: 45%; bottom: 1em; z-index: 10;}
+html>body #currentSlide {position: fixed;}
+
+/*
+div#header {background: #FCC;}
+div#footer {background: #CCF;}
+div#controls {background: #BBD;}
+div#currentSlide {background: #FFC;}
+*/
diff --git a/src/ui/default/iepngfix.htc b/src/ui/default/iepngfix.htc
new file mode 100644
index 000000000..bba2db756
--- /dev/null
+++ b/src/ui/default/iepngfix.htc
@@ -0,0 +1,42 @@
+<public:component>
+<public:attach event="onpropertychange" onevent="doFix()" />
+
+<script>
+
+// IE5.5+ PNG Alpha Fix v1.0 by Angus Turnbull http://www.twinhelix.com
+// Free usage permitted as long as this notice remains intact.
+
+// This must be a path to a blank image. That's all the configuration you need here.
+var blankImg = 'ui/default/blank.gif';
+
+var f = 'DXImageTransform.Microsoft.AlphaImageLoader';
+
+function filt(s, m) {
+ if (filters[f]) {
+ filters[f].enabled = s ? true : false;
+ if (s) with (filters[f]) { src = s; sizingMethod = m }
+ } else if (s) style.filter = 'progid:'+f+'(src="'+s+'",sizingMethod="'+m+'")';
+}
+
+function doFix() {
+ if ((parseFloat(navigator.userAgent.match(/MSIE (\S+)/)[1]) < 5.5) ||
+ (event && !/(background|src)/.test(event.propertyName))) return;
+
+ if (tagName == 'IMG') {
+ if ((/\.png$/i).test(src)) {
+ filt(src, 'image'); // was 'scale'
+ src = blankImg;
+ } else if (src.indexOf(blankImg) < 0) filt();
+ } else if (style.backgroundImage) {
+ if (style.backgroundImage.match(/^url[("']+(.*\.png)[)"']+$/i)) {
+ var s = RegExp.$1;
+ style.backgroundImage = '';
+ filt(s, 'crop');
+ } else filt();
+ }
+}
+
+doFix();
+
+</script>
+</public:component> \ No newline at end of file
diff --git a/src/ui/default/opera.css b/src/ui/default/opera.css
new file mode 100644
index 000000000..9e9d2a3c5
--- /dev/null
+++ b/src/ui/default/opera.css
@@ -0,0 +1,7 @@
+/* DO NOT CHANGE THESE unless you really want to break Opera Show */
+.slide {
+ visibility: visible !important;
+ position: static !important;
+ page-break-before: always;
+}
+#slide0 {page-break-before: avoid;}
diff --git a/src/ui/default/outline.css b/src/ui/default/outline.css
new file mode 100644
index 000000000..62db519ed
--- /dev/null
+++ b/src/ui/default/outline.css
@@ -0,0 +1,15 @@
+/* don't change this unless you want the layout stuff to show up in the outline view! */
+
+.layout div, #footer *, #controlForm * {display: none;}
+#footer, #controls, #controlForm, #navLinks, #toggle {
+ display: block; visibility: visible; margin: 0; padding: 0;}
+#toggle {float: right; padding: 0.5em;}
+html>body #toggle {position: fixed; top: 0; right: 0;}
+
+/* making the outline look pretty-ish */
+
+#slide0 h1, #slide0 h2, #slide0 h3, #slide0 h4 {border: none; margin: 0;}
+#slide0 h1 {padding-top: 1.5em;}
+.slide h1 {margin: 1.5em 0 0; padding-top: 0.25em;
+ border-top: 1px solid #888; border-bottom: 1px solid #AAA;}
+#toggle {border: 1px solid; border-width: 0 0 1px 1px; background: #FFF;}
diff --git a/src/ui/default/pretty.css b/src/ui/default/pretty.css
new file mode 100644
index 000000000..3d3acefff
--- /dev/null
+++ b/src/ui/default/pretty.css
@@ -0,0 +1,86 @@
+/* Following are the presentation styles -- edit away! */
+
+body {background: #FFF url(bodybg.gif) -16px 0 no-repeat; color: #000; font-size: 2em;}
+:link, :visited {text-decoration: none; color: #00C;}
+#controls :active {color: #88A !important;}
+#controls :focus {outline: 1px dotted #227;}
+h1, h2, h3, h4 {font-size: 100%; margin: 0; padding: 0; font-weight: inherit;}
+ul, pre {margin: 0; line-height: 1em;}
+html, body {margin: 0; padding: 0;}
+
+blockquote, q {font-style: italic;}
+blockquote {padding: 0 2em 0.5em; margin: 0 1.5em 0.5em; text-align: center; font-size: 1em;}
+blockquote p {margin: 0;}
+blockquote i {font-style: normal;}
+blockquote b {display: block; margin-top: 0.5em; font-weight: normal; font-size: smaller; font-style: normal;}
+blockquote b i {font-style: italic;}
+
+kbd {font-weight: bold; font-size: 1em;}
+sup {font-size: smaller; line-height: 1px;}
+
+.slide code {padding: 2px 0.25em; font-weight: bold; color: #533;}
+.slide code.bad, code del {color: red;}
+.slide code.old {color: silver;}
+.slide pre {padding: 0; margin: 0.25em 0 0.5em 0.5em; color: #533; font-size: 90%;}
+.slide pre code {display: block;}
+.slide ul {margin-left: 5%; margin-right: 7%; list-style: disc;}
+.slide li {margin-top: 0.75em; margin-right: 0;}
+.slide ul ul {line-height: 1;}
+.slide ul ul li {margin: .2em; font-size: 85%; list-style: square;}
+.slide img.leader {display: block; margin: 0 auto;}
+
+div#header, div#footer {background: #005; color: #AAB;
+ font-family: Verdana, Helvetica, sans-serif;}
+div#header {background: #005 url(bodybg.gif) -16px 0 no-repeat;
+ line-height: 1px;}
+div#footer {font-size: 0.5em; font-weight: bold; padding: 1em 0;}
+#footer h1, #footer h2 {display: block; padding: 0 1em;}
+#footer h2 {font-style: italic;}
+
+div.long {font-size: 0.75em;}
+.slide h1 {position: absolute; top: 0.7em; left: 87px; z-index: 1;
+ margin: 0; padding: 0.3em 0 0 50px; white-space: nowrap;
+ font: bold 150%/1em Helvetica, sans-serif; text-transform: capitalize;
+ color: #DDE; background: #005;}
+.slide h3 {font-size: 130%;}
+h1 abbr {font-variant: small-caps;}
+
+div#controls {position: absolute; left: 50%; bottom: 0;
+ width: 50%;
+ text-align: right; font: bold 0.9em Verdana, Helvetica, sans-serif;}
+html>body div#controls {position: fixed; padding: 0 0 1em 0;
+ top: auto;}
+div#controls form {position: absolute; bottom: 0; right: 0; width: 100%;
+ margin: 0; padding: 0;}
+#controls #navLinks a {padding: 0; margin: 0 0.5em;
+ background: #005; border: none; color: #779;
+ cursor: pointer;}
+#controls #navList {height: 1em;}
+#controls #navList #jumplist {position: absolute; bottom: 0; right: 0; background: #DDD; color: #227;}
+
+#currentSlide {text-align: center; font-size: 0.5em; color: #449;}
+
+#slide0 {padding-top: 3.5em; font-size: 90%;}
+#slide0 h1 {position: static; margin: 1em 0 0; padding: 0;
+ font: bold 2em Helvetica, sans-serif; white-space: normal;
+ color: #000; background: transparent;}
+#slide0 h2 {font: bold italic 1em Helvetica, sans-serif; margin: 0.25em;}
+#slide0 h3 {margin-top: 1.5em; font-size: 1.5em;}
+#slide0 h4 {margin-top: 0; font-size: 1em;}
+
+ul.urls {list-style: none; display: inline; margin: 0;}
+.urls li {display: inline; margin: 0;}
+.note {display: none;}
+.external {border-bottom: 1px dotted gray;}
+html>body .external {border-bottom: none;}
+.external:after {content: " \274F"; font-size: smaller; color: #77B;}
+
+.incremental, .incremental *, .incremental *:after {color: #DDE; visibility: visible;}
+img.incremental {visibility: hidden;}
+.slide .current {color: #B02;}
+
+
+/* diagnostics
+
+li:after {content: " [" attr(class) "]"; color: #F88;}
+ */ \ No newline at end of file
diff --git a/src/ui/default/print.css b/src/ui/default/print.css
new file mode 100644
index 000000000..4a3554ddd
--- /dev/null
+++ b/src/ui/default/print.css
@@ -0,0 +1,24 @@
+/* The following rule is necessary to have all slides appear in print! DO NOT REMOVE IT! */
+.slide, ul {page-break-inside: avoid; visibility: visible !important;}
+h1 {page-break-after: avoid;}
+
+body {font-size: 12pt; background: white;}
+* {color: black;}
+
+#slide0 h1 {font-size: 200%; border: none; margin: 0.5em 0 0.25em;}
+#slide0 h3 {margin: 0; padding: 0;}
+#slide0 h4 {margin: 0 0 0.5em; padding: 0;}
+#slide0 {margin-bottom: 3em;}
+
+h1 {border-top: 2pt solid gray; border-bottom: 1px dotted silver;}
+.extra {background: transparent !important;}
+div.extra, pre.extra, .example {font-size: 10pt; color: #333;}
+ul.extra a {font-weight: bold;}
+p.example {display: none;}
+
+#header {display: none;}
+#footer h1 {margin: 0; border-bottom: 1px solid; color: gray; font-style: italic;}
+#footer h2, #controls {display: none;}
+
+/* The following rule keeps the layout stuff out of print. Remove at your own risk! */
+.layout, .layout * {display: none !important;}
diff --git a/src/ui/default/s5-core.css b/src/ui/default/s5-core.css
new file mode 100644
index 000000000..86444e041
--- /dev/null
+++ b/src/ui/default/s5-core.css
@@ -0,0 +1,9 @@
+/* Do not edit or override these styles! The system will likely break if you do. */
+
+div#header, div#footer, div#controls, .slide {position: absolute;}
+html>body div#header, html>body div#footer,
+ html>body div#controls, html>body .slide {position: fixed;}
+.handout {display: none;}
+.layout {display: block;}
+.slide, .hideme, .incremental {visibility: hidden;}
+#slide0 {visibility: visible;}
diff --git a/src/ui/default/slides.css b/src/ui/default/slides.css
new file mode 100644
index 000000000..0786d7dbd
--- /dev/null
+++ b/src/ui/default/slides.css
@@ -0,0 +1,3 @@
+@import url(s5-core.css); /* required to make the slide show run at all */
+@import url(framing.css); /* sets basic placement and size of slide components */
+@import url(pretty.css); /* stuff that makes the slides look better than blah */ \ No newline at end of file
diff --git a/src/ui/default/slides.js b/src/ui/default/slides.js
new file mode 100644
index 000000000..38fe8531c
--- /dev/null
+++ b/src/ui/default/slides.js
@@ -0,0 +1,553 @@
+// S5 v1.1 slides.js -- released into the Public Domain
+//
+// Please see http://www.meyerweb.com/eric/tools/s5/credits.html for information
+// about all the wonderful and talented contributors to this code!
+
+var undef;
+var slideCSS = '';
+var snum = 0;
+var smax = 1;
+var incpos = 0;
+var number = undef;
+var s5mode = true;
+var defaultView = 'slideshow';
+var controlVis = 'visible';
+
+var isIE = navigator.appName == 'Microsoft Internet Explorer' && navigator.userAgent.indexOf('Opera') < 1 ? 1 : 0;
+var isOp = navigator.userAgent.indexOf('Opera') > -1 ? 1 : 0;
+var isGe = navigator.userAgent.indexOf('Gecko') > -1 && navigator.userAgent.indexOf('Safari') < 1 ? 1 : 0;
+
+function hasClass(object, className) {
+ if (!object.className) return false;
+ return (object.className.search('(^|\\s)' + className + '(\\s|$)') != -1);
+}
+
+function hasValue(object, value) {
+ if (!object) return false;
+ return (object.search('(^|\\s)' + value + '(\\s|$)') != -1);
+}
+
+function removeClass(object,className) {
+ if (!object) return;
+ object.className = object.className.replace(new RegExp('(^|\\s)'+className+'(\\s|$)'), RegExp.$1+RegExp.$2);
+}
+
+function addClass(object,className) {
+ if (!object || hasClass(object, className)) return;
+ if (object.className) {
+ object.className += ' '+className;
+ } else {
+ object.className = className;
+ }
+}
+
+function GetElementsWithClassName(elementName,className) {
+ var allElements = document.getElementsByTagName(elementName);
+ var elemColl = new Array();
+ for (var i = 0; i< allElements.length; i++) {
+ if (hasClass(allElements[i], className)) {
+ elemColl[elemColl.length] = allElements[i];
+ }
+ }
+ return elemColl;
+}
+
+function isParentOrSelf(element, id) {
+ if (element == null || element.nodeName=='BODY') return false;
+ else if (element.id == id) return true;
+ else return isParentOrSelf(element.parentNode, id);
+}
+
+function nodeValue(node) {
+ var result = "";
+ if (node.nodeType == 1) {
+ var children = node.childNodes;
+ for (var i = 0; i < children.length; ++i) {
+ result += nodeValue(children[i]);
+ }
+ }
+ else if (node.nodeType == 3) {
+ result = node.nodeValue;
+ }
+ return(result);
+}
+
+function slideLabel() {
+ var slideColl = GetElementsWithClassName('*','slide');
+ var list = document.getElementById('jumplist');
+ smax = slideColl.length;
+ for (var n = 0; n < smax; n++) {
+ var obj = slideColl[n];
+
+ var did = 'slide' + n.toString();
+ obj.setAttribute('id',did);
+ if (isOp) continue;
+
+ var otext = '';
+ var menu = obj.firstChild;
+ if (!menu) continue; // to cope with empty slides
+ while (menu && menu.nodeType == 3) {
+ menu = menu.nextSibling;
+ }
+ if (!menu) continue; // to cope with slides with only text nodes
+
+ var menunodes = menu.childNodes;
+ for (var o = 0; o < menunodes.length; o++) {
+ otext += nodeValue(menunodes[o]);
+ }
+ list.options[list.length] = new Option(n + ' : ' + otext, n);
+ }
+}
+
+function currentSlide() {
+ var cs;
+ if (document.getElementById) {
+ cs = document.getElementById('currentSlide');
+ } else {
+ cs = document.currentSlide;
+ }
+ cs.innerHTML = '<span id="csHere">' + snum + '<\/span> ' +
+ '<span id="csSep">\/<\/span> ' +
+ '<span id="csTotal">' + (smax-1) + '<\/span>';
+ if (snum == 0) {
+ cs.style.visibility = 'hidden';
+ } else {
+ cs.style.visibility = 'visible';
+ }
+}
+
+function go(step) {
+ if (document.getElementById('slideProj').disabled || step == 0) return;
+ var jl = document.getElementById('jumplist');
+ var cid = 'slide' + snum;
+ var ce = document.getElementById(cid);
+ if (incrementals[snum].length > 0) {
+ for (var i = 0; i < incrementals[snum].length; i++) {
+ removeClass(incrementals[snum][i], 'current');
+ removeClass(incrementals[snum][i], 'incremental');
+ }
+ }
+ if (step != 'j') {
+ snum += step;
+ lmax = smax - 1;
+ if (snum > lmax) snum = lmax;
+ if (snum < 0) snum = 0;
+ } else
+ snum = parseInt(jl.value);
+ var nid = 'slide' + snum;
+ var ne = document.getElementById(nid);
+ if (!ne) {
+ ne = document.getElementById('slide0');
+ snum = 0;
+ }
+ if (step < 0) {incpos = incrementals[snum].length} else {incpos = 0;}
+ if (incrementals[snum].length > 0 && incpos == 0) {
+ for (var i = 0; i < incrementals[snum].length; i++) {
+ if (hasClass(incrementals[snum][i], 'current'))
+ incpos = i + 1;
+ else
+ addClass(incrementals[snum][i], 'incremental');
+ }
+ }
+ if (incrementals[snum].length > 0 && incpos > 0)
+ addClass(incrementals[snum][incpos - 1], 'current');
+ ce.style.visibility = 'hidden';
+ ne.style.visibility = 'visible';
+ jl.selectedIndex = snum;
+ currentSlide();
+ number = 0;
+}
+
+function goTo(target) {
+ if (target >= smax || target == snum) return;
+ go(target - snum);
+}
+
+function subgo(step) {
+ if (step > 0) {
+ removeClass(incrementals[snum][incpos - 1],'current');
+ removeClass(incrementals[snum][incpos], 'incremental');
+ addClass(incrementals[snum][incpos],'current');
+ incpos++;
+ } else {
+ incpos--;
+ removeClass(incrementals[snum][incpos],'current');
+ addClass(incrementals[snum][incpos], 'incremental');
+ addClass(incrementals[snum][incpos - 1],'current');
+ }
+}
+
+function toggle() {
+ var slideColl = GetElementsWithClassName('*','slide');
+ var slides = document.getElementById('slideProj');
+ var outline = document.getElementById('outlineStyle');
+ if (!slides.disabled) {
+ slides.disabled = true;
+ outline.disabled = false;
+ s5mode = false;
+ fontSize('1em');
+ for (var n = 0; n < smax; n++) {
+ var slide = slideColl[n];
+ slide.style.visibility = 'visible';
+ }
+ } else {
+ slides.disabled = false;
+ outline.disabled = true;
+ s5mode = true;
+ fontScale();
+ for (var n = 0; n < smax; n++) {
+ var slide = slideColl[n];
+ slide.style.visibility = 'hidden';
+ }
+ slideColl[snum].style.visibility = 'visible';
+ }
+}
+
+function showHide(action) {
+ var obj = GetElementsWithClassName('*','hideme')[0];
+ switch (action) {
+ case 's': obj.style.visibility = 'visible'; break;
+ case 'h': obj.style.visibility = 'hidden'; break;
+ case 'k':
+ if (obj.style.visibility != 'visible') {
+ obj.style.visibility = 'visible';
+ } else {
+ obj.style.visibility = 'hidden';
+ }
+ break;
+ }
+}
+
+// 'keys' code adapted from MozPoint (http://mozpoint.mozdev.org/)
+function keys(key) {
+ if (!key) {
+ key = event;
+ key.which = key.keyCode;
+ }
+ if (key.which == 84) {
+ toggle();
+ return;
+ }
+ if (s5mode) {
+ switch (key.which) {
+ case 10: // return
+ case 13: // enter
+ if (window.event && isParentOrSelf(window.event.srcElement, 'controls')) return;
+ if (key.target && isParentOrSelf(key.target, 'controls')) return;
+ if(number != undef) {
+ goTo(number);
+ break;
+ }
+ case 32: // spacebar
+ case 34: // page down
+ case 39: // rightkey
+ case 40: // downkey
+ if(number != undef) {
+ go(number);
+ } else if (!incrementals[snum] || incpos >= incrementals[snum].length) {
+ go(1);
+ } else {
+ subgo(1);
+ }
+ break;
+ case 33: // page up
+ case 37: // leftkey
+ case 38: // upkey
+ if(number != undef) {
+ go(-1 * number);
+ } else if (!incrementals[snum] || incpos <= 0) {
+ go(-1);
+ } else {
+ subgo(-1);
+ }
+ break;
+ case 36: // home
+ goTo(0);
+ break;
+ case 35: // end
+ goTo(smax-1);
+ break;
+ case 67: // c
+ showHide('k');
+ break;
+ }
+ if (key.which < 48 || key.which > 57) {
+ number = undef;
+ } else {
+ if (window.event && isParentOrSelf(window.event.srcElement, 'controls')) return;
+ if (key.target && isParentOrSelf(key.target, 'controls')) return;
+ number = (((number != undef) ? number : 0) * 10) + (key.which - 48);
+ }
+ }
+ return false;
+}
+
+function clicker(e) {
+ number = undef;
+ var target;
+ if (window.event) {
+ target = window.event.srcElement;
+ e = window.event;
+ } else target = e.target;
+ if (target.getAttribute('href') != null || hasValue(target.rel, 'external') || isParentOrSelf(target, 'controls') || isParentOrSelf(target,'embed') || isParentOrSelf(target,'object')) return true;
+ if (!e.which || e.which == 1) {
+ if (!incrementals[snum] || incpos >= incrementals[snum].length) {
+ go(1);
+ } else {
+ subgo(1);
+ }
+ }
+}
+
+function findSlide(hash) {
+ var target = null;
+ var slides = GetElementsWithClassName('*','slide');
+ for (var i = 0; i < slides.length; i++) {
+ var targetSlide = slides[i];
+ if ( (targetSlide.name && targetSlide.name == hash)
+ || (targetSlide.id && targetSlide.id == hash) ) {
+ target = targetSlide;
+ break;
+ }
+ }
+ while(target != null && target.nodeName != 'BODY') {
+ if (hasClass(target, 'slide')) {
+ return parseInt(target.id.slice(5));
+ }
+ target = target.parentNode;
+ }
+ return null;
+}
+
+function slideJump() {
+ if (window.location.hash == null) return;
+ var sregex = /^#slide(\d+)$/;
+ var matches = sregex.exec(window.location.hash);
+ var dest = null;
+ if (matches != null) {
+ dest = parseInt(matches[1]);
+ } else {
+ dest = findSlide(window.location.hash.slice(1));
+ }
+ if (dest != null)
+ go(dest - snum);
+}
+
+function fixLinks() {
+ var thisUri = window.location.href;
+ thisUri = thisUri.slice(0, thisUri.length - window.location.hash.length);
+ var aelements = document.getElementsByTagName('A');
+ for (var i = 0; i < aelements.length; i++) {
+ var a = aelements[i].href;
+ var slideID = a.match('\#slide[0-9]{1,2}');
+ if ((slideID) && (slideID[0].slice(0,1) == '#')) {
+ var dest = findSlide(slideID[0].slice(1));
+ if (dest != null) {
+ if (aelements[i].addEventListener) {
+ aelements[i].addEventListener("click", new Function("e",
+ "if (document.getElementById('slideProj').disabled) return;" +
+ "go("+dest+" - snum); " +
+ "if (e.preventDefault) e.preventDefault();"), true);
+ } else if (aelements[i].attachEvent) {
+ aelements[i].attachEvent("onclick", new Function("",
+ "if (document.getElementById('slideProj').disabled) return;" +
+ "go("+dest+" - snum); " +
+ "event.returnValue = false;"));
+ }
+ }
+ }
+ }
+}
+
+function externalLinks() {
+ if (!document.getElementsByTagName) return;
+ var anchors = document.getElementsByTagName('a');
+ for (var i=0; i<anchors.length; i++) {
+ var anchor = anchors[i];
+ if (anchor.getAttribute('href') && hasValue(anchor.rel, 'external')) {
+ anchor.target = '_blank';
+ addClass(anchor,'external');
+ }
+ }
+}
+
+function createControls() {
+ var controlsDiv = document.getElementById("controls");
+ if (!controlsDiv) return;
+ var hider = ' onmouseover="showHide(\'s\');" onmouseout="showHide(\'h\');"';
+ var hideDiv, hideList = '';
+ if (controlVis == 'hidden') {
+ hideDiv = hider;
+ } else {
+ hideList = hider;
+ }
+ controlsDiv.innerHTML = '<form action="#" id="controlForm"' + hideDiv + '>' +
+ '<div id="navLinks">' +
+ '<a accesskey="t" id="toggle" href="javascript:toggle();">&#216;<\/a>' +
+ '<a accesskey="z" id="prev" href="javascript:go(-1);">&laquo;<\/a>' +
+ '<a accesskey="x" id="next" href="javascript:go(1);">&raquo;<\/a>' +
+ '<div id="navList"' + hideList + '><select id="jumplist" onchange="go(\'j\');"><\/select><\/div>' +
+ '<\/div><\/form>';
+ if (controlVis == 'hidden') {
+ var hidden = document.getElementById('navLinks');
+ } else {
+ var hidden = document.getElementById('jumplist');
+ }
+ addClass(hidden,'hideme');
+}
+
+function fontScale() { // causes layout problems in FireFox that get fixed if browser's Reload is used; same may be true of other Gecko-based browsers
+ if (!s5mode) return false;
+ var vScale = 22; // both yield 32 (after rounding) at 1024x768
+ var hScale = 32; // perhaps should auto-calculate based on theme's declared value?
+ if (window.innerHeight) {
+ var vSize = window.innerHeight;
+ var hSize = window.innerWidth;
+ } else if (document.documentElement.clientHeight) {
+ var vSize = document.documentElement.clientHeight;
+ var hSize = document.documentElement.clientWidth;
+ } else if (document.body.clientHeight) {
+ var vSize = document.body.clientHeight;
+ var hSize = document.body.clientWidth;
+ } else {
+ var vSize = 700; // assuming 1024x768, minus chrome and such
+ var hSize = 1024; // these do not account for kiosk mode or Opera Show
+ }
+ var newSize = Math.min(Math.round(vSize/vScale),Math.round(hSize/hScale));
+ fontSize(newSize + 'px');
+ if (isGe) { // hack to counter incremental reflow bugs
+ var obj = document.getElementsByTagName('body')[0];
+ obj.style.display = 'none';
+ obj.style.display = 'block';
+ }
+}
+
+function fontSize(value) {
+ if (!(s5ss = document.getElementById('s5ss'))) {
+ if (!isIE) {
+ document.getElementsByTagName('head')[0].appendChild(s5ss = document.createElement('style'));
+ s5ss.setAttribute('media','screen, projection');
+ s5ss.setAttribute('id','s5ss');
+ } else {
+ document.createStyleSheet();
+ document.s5ss = document.styleSheets[document.styleSheets.length - 1];
+ }
+ }
+ if (!isIE) {
+ while (s5ss.lastChild) s5ss.removeChild(s5ss.lastChild);
+ s5ss.appendChild(document.createTextNode('body {font-size: ' + value + ' !important;}'));
+ } else {
+ document.s5ss.addRule('body','font-size: ' + value + ' !important;');
+ }
+}
+
+function notOperaFix() {
+ slideCSS = document.getElementById('slideProj').href;
+ var slides = document.getElementById('slideProj');
+ var outline = document.getElementById('outlineStyle');
+ slides.setAttribute('media','screen');
+ outline.disabled = true;
+ if (isGe) {
+ slides.setAttribute('href','null'); // Gecko fix
+ slides.setAttribute('href',slideCSS); // Gecko fix
+ }
+ if (isIE && document.styleSheets && document.styleSheets[0]) {
+ document.styleSheets[0].addRule('img', 'behavior: url(ui/default/iepngfix.htc)');
+ document.styleSheets[0].addRule('div', 'behavior: url(ui/default/iepngfix.htc)');
+ document.styleSheets[0].addRule('.slide', 'behavior: url(ui/default/iepngfix.htc)');
+ }
+}
+
+function getIncrementals(obj) {
+ var incrementals = new Array();
+ if (!obj)
+ return incrementals;
+ var children = obj.childNodes;
+ for (var i = 0; i < children.length; i++) {
+ var child = children[i];
+ if (hasClass(child, 'incremental')) {
+ if (child.nodeName == 'OL' || child.nodeName == 'UL') {
+ removeClass(child, 'incremental');
+ for (var j = 0; j < child.childNodes.length; j++) {
+ if (child.childNodes[j].nodeType == 1) {
+ addClass(child.childNodes[j], 'incremental');
+ }
+ }
+ } else {
+ incrementals[incrementals.length] = child;
+ removeClass(child,'incremental');
+ }
+ }
+ if (hasClass(child, 'show-first')) {
+ if (child.nodeName == 'OL' || child.nodeName == 'UL') {
+ removeClass(child, 'show-first');
+ if (child.childNodes[isGe].nodeType == 1) {
+ removeClass(child.childNodes[isGe], 'incremental');
+ }
+ } else {
+ incrementals[incrementals.length] = child;
+ }
+ }
+ incrementals = incrementals.concat(getIncrementals(child));
+ }
+ return incrementals;
+}
+
+function createIncrementals() {
+ var incrementals = new Array();
+ for (var i = 0; i < smax; i++) {
+ incrementals[i] = getIncrementals(document.getElementById('slide'+i));
+ }
+ return incrementals;
+}
+
+function defaultCheck() {
+ var allMetas = document.getElementsByTagName('meta');
+ for (var i = 0; i< allMetas.length; i++) {
+ if (allMetas[i].name == 'defaultView') {
+ defaultView = allMetas[i].content;
+ }
+ if (allMetas[i].name == 'controlVis') {
+ controlVis = allMetas[i].content;
+ }
+ }
+}
+
+// Key trap fix, new function body for trap()
+function trap(e) {
+ if (!e) {
+ e = event;
+ e.which = e.keyCode;
+ }
+ try {
+ modifierKey = e.ctrlKey || e.altKey || e.metaKey;
+ }
+ catch(e) {
+ modifierKey = false;
+ }
+ return modifierKey || e.which == 0;
+}
+
+function startup() {
+ defaultCheck();
+ if (!isOp)
+ createControls();
+ slideLabel();
+ fixLinks();
+ externalLinks();
+ fontScale();
+ if (!isOp) {
+ notOperaFix();
+ incrementals = createIncrementals();
+ slideJump();
+ if (defaultView == 'outline') {
+ toggle();
+ }
+ document.onkeyup = keys;
+ document.onkeypress = trap;
+ document.onclick = clicker;
+ }
+}
+
+window.onload = startup;
+window.onresize = function(){setTimeout('fontScale()', 50);} \ No newline at end of file
diff --git a/src/wrappers/common.sh b/src/wrappers/common.sh
new file mode 100644
index 000000000..9605f5940
--- /dev/null
+++ b/src/wrappers/common.sh
@@ -0,0 +1,43 @@
+THIS=${0##*/}
+
+NEWLINE='
+'
+
+err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; }
+errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; }
+
+usage () {
+ err "$1 - $2" # short description
+ err "See the $1(1) man page for usage."
+}
+
+# Portable which(1).
+pathfind () {
+ oldifs="$IFS"; IFS=':'
+ for _p in $PATH; do
+ if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then
+ IFS="$oldifs"
+ return 0
+ fi
+ done
+ IFS="$oldifs"
+ return 1
+}
+
+for p in pandoc $REQUIRED; do
+ pathfind $p || {
+ err "You need '$p' to use this program!"
+ exit 1
+ }
+done
+
+CONF=$(pandoc --dump-args "$@" 2>&1) || {
+ errcode=$?
+ echo "$CONF" | sed -e '/^pandoc \[OPTIONS\] \[FILES\]/,$d' >&2
+ [ $errcode -eq 2 ] && usage "$THIS" "$SYNOPSIS"
+ exit $errcode
+}
+
+OUTPUT=$(echo "$CONF" | sed -ne '1p')
+ARGS=$(echo "$CONF" | sed -e '1d')
+
diff --git a/src/wrappers/hsmarkdown.in b/src/wrappers/hsmarkdown.in
new file mode 100644
index 000000000..17f970234
--- /dev/null
+++ b/src/wrappers/hsmarkdown.in
@@ -0,0 +1,5 @@
+#!/bin/sh
+# hsmarkdown - intended as a drop-in replacement for Markdown.pl.
+# Uses pandoc to convert from markdown to HTML, using --strict mode
+# for maximum compatibility with official markdown syntax.
+exec pandoc --from markdown --to html --strict -- "$@"
diff --git a/src/wrappers/html2markdown.in b/src/wrappers/html2markdown.in
new file mode 100644
index 000000000..0f4297128
--- /dev/null
+++ b/src/wrappers/html2markdown.in
@@ -0,0 +1,162 @@
+#!/bin/sh -e
+# converts HTML from a URL, file, or stdin to markdown
+# uses an available program to fetch URL and tidy to normalize it first
+
+REQUIRED="tidy"
+SYNOPSIS="converts HTML from a URL, file, or STDIN to markdown-formatted text."
+
+### common.sh
+
+grab_url_with () {
+ url="${1:?internal error: grab_url_with: url required}"
+
+ shift
+ cmdline="$@"
+
+ prog=
+ prog_opts=
+ if [ -n "$cmdline" ]; then
+ eval "set -- $cmdline"
+ prog=$1
+ shift
+ prog_opts="$@"
+ fi
+
+ if [ -z "$prog" ]; then
+ # Locate a sensible web grabber (note the order).
+ for p in wget lynx w3m curl links w3c; do
+ if pathfind $p; then
+ prog=$p
+ break
+ fi
+ done
+
+ [ -n "$prog" ] || {
+ errn "$THIS: Couldn't find a program to fetch the file from URL "
+ err "(e.g. wget, w3m, lynx, w3c, or curl)."
+ return 1
+ }
+ else
+ pathfind "$prog" || {
+ err "$THIS: No such web grabber '$prog' found; aborting."
+ return 1
+ }
+ fi
+
+ # Setup proper base options for known grabbers.
+ base_opts=
+ case "$prog" in
+ wget) base_opts="-O-" ;;
+ lynx) base_opts="-source" ;;
+ w3m) base_opts="-dump_source" ;;
+ curl) base_opts="" ;;
+ links) base_opts="-source" ;;
+ w3c) base_opts="-n -get" ;;
+ *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds."
+ esac
+
+ err "$THIS: invoking '$prog $base_opts $prog_opts $url'..."
+ eval "set -- $base_opts $prog_opts"
+ $prog "$@" "$url"
+}
+
+# Parse command-line arguments
+parse_arguments () {
+ while [ $# -gt 0 ]; do
+ case "$1" in
+ --encoding=*)
+ wholeopt="$1"
+ # extract encoding from after =
+ encoding="${wholeopt#*=}" ;;
+ -e|--encoding|-encoding)
+ shift
+ encoding="$1" ;;
+ --grabber=*)
+ wholeopt="$1"
+ # extract encoding from after =
+ grabber="\"${wholeopt#*=}\"" ;;
+ -g|--grabber|-grabber)
+ shift
+ grabber="$1" ;;
+ *)
+ if [ -z "$argument" ]; then
+ argument="$1"
+ else
+ err "Warning: extra argument '$1' will be ignored."
+ fi ;;
+ esac
+ shift
+ done
+}
+
+argument=
+encoding=
+grabber=
+
+oldifs="$IFS"
+IFS=$NEWLINE
+parse_arguments $ARGS
+IFS="$oldifs"
+
+inurl=
+if [ -n "$argument" ] && ! [ -f "$argument" ]; then
+ # Treat given argument as an URL.
+ inurl="$argument"
+fi
+
+### tempdir.sh
+
+if [ -n "$inurl" ]; then
+ err "Attempting to fetch file from '$inurl'..."
+
+ grabber_out=$THIS_TEMPDIR/grabber.out
+ grabber_log=$THIS_TEMPDIR/grabber.log
+ if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then
+ errn "grab_url_with failed"
+ if [ -f $grabber_log ]; then
+ err " with the following error log."
+ err
+ cat >&2 $grabber_log
+ else
+ err .
+ fi
+ exit 1
+ fi
+
+ argument="$grabber_out"
+fi
+
+if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then
+ # Try to determine character encoding if not specified
+ # and input is not STDIN.
+ encoding=$(
+ head "$argument" |
+ LC_ALL=C tr 'A-Z' 'a-z' |
+ sed -ne '/<meta .*content-type.*charset=/ {
+ s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p
+ }'
+ )
+fi
+
+if [ -n "$encoding" ] && pathfind iconv; then
+ alias to_utf8='iconv -f "$encoding" -t utf-8'
+else # assume UTF-8
+ alias to_utf8='cat'
+fi
+
+htmlinput=$THIS_TEMPDIR/htmlinput
+
+if [ -z "$argument" ]; then
+ to_utf8 > $htmlinput # read from STDIN
+elif [ -f "$argument" ]; then
+ to_utf8 "$argument" > $htmlinput # read from file
+else
+ err "File '$argument' not found."
+ exit 1
+fi
+
+if ! cat $htmlinput | pandoc --ignore-args -r html -w markdown "$@" ; then
+ err "Failed to parse HTML. Trying again with tidy..."
+ tidy -q -asxhtml -utf8 $htmlinput | \
+ pandoc --ignore-args -r html -w markdown "$@"
+fi
diff --git a/src/wrappers/markdown2pdf.in b/src/wrappers/markdown2pdf.in
new file mode 100644
index 000000000..37be69469
--- /dev/null
+++ b/src/wrappers/markdown2pdf.in
@@ -0,0 +1,81 @@
+#!/bin/sh -e
+
+REQUIRED="pdflatex"
+SYNOPSIS="converts markdown-formatted text to PDF, using pdflatex."
+
+### common.sh
+
+### tempdir.sh
+
+texname=output
+logfile=$THIS_TEMPDIR/log
+
+pandoc -s -r markdown -w latex "$@" -o $THIS_TEMPDIR/$texname.tex
+
+if [ "$OUTPUT" = "-" ]; then
+ firstinfile="$(echo $ARGS | sed -ne '1p')"
+ firstinfilebase="${firstinfile%.*}"
+ destname="${firstinfilebase:-stdin}.pdf"
+else
+ destname="$OUTPUT"
+fi
+
+(
+ origdir=$(pwd)
+ cd $THIS_TEMPDIR
+ TEXINPUTS=$origdir:$TEXINPUTS:
+ export TEXINPUTS
+ finished=no
+ runs=0
+ while [ $finished = "no" ]; do
+ pdflatex -interaction=batchmode $texname.tex >/dev/null || {
+ errcode=$?
+ err "${THIS}: pdfLaTeX failed with error code $errcode"
+ [ -f $texname.log ] && {
+ err "${THIS}: error context:"
+ sed -ne '/^!/,/^[[:space:]]*$/p' \
+ -ne '/^[Ll]a[Tt]e[Xx] [Ww]arning/,/^[[:space:]]*$/p' \
+ -ne '/^[Ee]rror/,/^[[:space:]]*$/p' $texname.log >&2
+ if grep -q "File \`ucs.sty' not found" $texname.log; then
+ err "${THIS}: Please install the 'unicode' package from CTAN:"
+ err " http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"
+ fi
+ if grep -q "File \`ulem.sty' not found" $texname.log; then
+ err "${THIS}: Please install the 'ulem' package from CTAN:"
+ err " http://www.ctan.org/tex-archive/macros/latex/contrib/misc/ulem.sty"
+ fi
+ }
+ exit $errcode
+ }
+ if [ $runs -lt 3 ] &&
+ ((grep -q "LaTeX Warning: There were undefined references." $texname.log) ||
+ (echo "$@" | grep -q -- "--toc\|--table-of-contents")); then
+ runs=$(($runs + 1))
+ if grep -q "LaTeX Warning:.*[Cc]itation" $texname.log; then
+ bibtex $texname 2>&1 >bibtex.err
+ if [ $runs -gt 2 ]; then
+ if grep -q "error message" bibtex.err ||
+ grep -q "Warning" bibtex.err; then
+ cat bibtex.err >&2
+ fi
+ fi
+ fi
+ else
+ finished=yes
+ fi
+ done
+) || exit $?
+
+is_target_exists=
+if [ -f "$destname" ]; then
+ is_target_exists=1
+ mv "$destname" "$destname~"
+fi
+
+mv -f $THIS_TEMPDIR/$texname.pdf "$destname"
+
+errn "Created $destname"
+[ -z "$is_target_exists" ] || {
+ errn " (previous file has been backed up as $destname~)"
+}
+err .
diff --git a/src/wrappers/tempdir.sh b/src/wrappers/tempdir.sh
new file mode 100644
index 000000000..f25ae7f51
--- /dev/null
+++ b/src/wrappers/tempdir.sh
@@ -0,0 +1,18 @@
+# As a security measure refuse to proceed if mktemp is not available.
+pathfind mktemp || { err "Couldn't find 'mktemp'; aborting."; exit 1; }
+
+# Avoid issues with /tmp directory on Windows/Cygwin
+cygwin=
+cygwin=$(uname | sed -ne '/^CYGWIN/p')
+if [ -n "$cygwin" ]; then
+ TMPDIR=.
+ export TMPDIR
+fi
+
+THIS_TEMPDIR=
+THIS_TEMPDIR="$(mktemp -d -t $THIS.XXXXXXXX)" || exit 1
+readonly THIS_TEMPDIR
+
+trap 'exitcode=$?
+ [ -z "$THIS_TEMPDIR" ] || rm -rf "$THIS_TEMPDIR"
+ exit $exitcode' 0 1 2 3 13 15