diff options
Diffstat (limited to 'src')
46 files changed, 9770 insertions, 0 deletions
diff --git a/src/ASCIIMathML.js b/src/ASCIIMathML.js new file mode 100644 index 000000000..282cc15fb --- /dev/null +++ b/src/ASCIIMathML.js @@ -0,0 +1,945 @@ +/* +ASCIIMathML.js +============== +This file contains JavaScript functions to convert ASCII math notation +to Presentation MathML. The conversion is done while the (X)HTML page +loads, and should work with Firefox/Mozilla/Netscape 7+ and Internet +Explorer 6+MathPlayer (http://www.dessci.com/en/products/mathplayer/). +Just add the next line to your (X)HTML page with this file in the same folder: +<script type="text/javascript" src="ASCIIMathML.js"></script> +This is a convenient and inexpensive solution for authoring MathML. + +Version 1.4.7 Dec 15, 2005, (c) Peter Jipsen http://www.chapman.edu/~jipsen +Latest version at http://www.chapman.edu/~jipsen/mathml/ASCIIMathML.js +For changes see http://www.chapman.edu/~jipsen/mathml/asciimathchanges.txt +If you use it on a webpage, please send the URL to jipsen@chapman.edu + +Modified July 2006 by John MacFarlane (added CODE to list of contexts +in which replacement does not occur, modified AMisMathMLAvailable +to better identify Safari browser, changed mathcolor to ""). + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License (at http://www.gnu.org/copyleft/gpl.html) +for more details. +*/ + +var checkForMathML = true; // check if browser can display MathML +var notifyIfNoMathML = true; // display note if no MathML capability +var alertIfNoMathML = false; // show alert box if no MathML capability +var mathcolor = ""; // change it to "" (to inherit) or any other color +var mathfontfamily = "serif"; // change to "" to inherit (works in IE) + // or another family (e.g. "arial") +var displaystyle = true; // puts limits above and below large operators +var showasciiformulaonhover = true; // helps students learn ASCIIMath +var decimalsign = "."; // change to "," if you like, beware of `(1,2)`! +var AMdelimiter1 = "`", AMescape1 = "\\\\`"; // can use other characters +var AMdelimiter2 = "$", AMescape2 = "\\\\\\$", AMdelimiter2regexp = "\\$"; +var doubleblankmathdelimiter = false; // if true, x+1 is equal to `x+1` + // for IE this works only in <!-- --> +//var separatetokens;// has been removed (email me if this is a problem) +var isIE = document.createElementNS==null; + +if (document.getElementById==null) + alert("This webpage requires a recent browser such as\ +\nMozilla/Netscape 7+ or Internet Explorer 6+MathPlayer") + +// all further global variables start with "AM" + +function AMcreateElementXHTML(t) { + if (isIE) return document.createElement(t); + else return document.createElementNS("http://www.w3.org/1999/xhtml",t); +} + +function AMnoMathMLNote() { + var nd = AMcreateElementXHTML("h3"); + nd.setAttribute("align","center") + nd.appendChild(AMcreateElementXHTML("p")); + nd.appendChild(document.createTextNode("To view the ")); + var an = AMcreateElementXHTML("a"); + an.appendChild(document.createTextNode("ASCIIMathML")); + an.setAttribute("href","http://www.chapman.edu/~jipsen/asciimath.html"); + nd.appendChild(an); + nd.appendChild(document.createTextNode(" notation use Internet Explorer 6+")); + an = AMcreateElementXHTML("a"); + an.appendChild(document.createTextNode("MathPlayer")); + an.setAttribute("href","http://www.dessci.com/en/products/mathplayer/download.htm"); + nd.appendChild(an); + nd.appendChild(document.createTextNode(" or Netscape/Mozilla/Firefox")); + nd.appendChild(AMcreateElementXHTML("p")); + return nd; +} + +function AMisMathMLavailable() { + var regex = /KHTML/; /* This line and the next two modified by JM for better Safari detection */ + if (navigator.appName.slice(0,8)=="Netscape") + if (navigator.appVersion.slice(0,1)>="5" && !regex.test(navigator.userAgent)) return null; + else return AMnoMathMLNote(); + else if (navigator.appName.slice(0,9)=="Microsoft") + try { + var ActiveX = new ActiveXObject("MathPlayer.Factory.1"); + return null; + } catch (e) { + return AMnoMathMLNote(); + } + else return AMnoMathMLNote(); +} + +// character lists for Mozilla/Netscape fonts +var AMcal = [0xEF35,0x212C,0xEF36,0xEF37,0x2130,0x2131,0xEF38,0x210B,0x2110,0xEF39,0xEF3A,0x2112,0x2133,0xEF3B,0xEF3C,0xEF3D,0xEF3E,0x211B,0xEF3F,0xEF40,0xEF41,0xEF42,0xEF43,0xEF44,0xEF45,0xEF46]; +var AMfrk = [0xEF5D,0xEF5E,0x212D,0xEF5F,0xEF60,0xEF61,0xEF62,0x210C,0x2111,0xEF63,0xEF64,0xEF65,0xEF66,0xEF67,0xEF68,0xEF69,0xEF6A,0x211C,0xEF6B,0xEF6C,0xEF6D,0xEF6E,0xEF6F,0xEF70,0xEF71,0x2128]; +var AMbbb = [0xEF8C,0xEF8D,0x2102,0xEF8E,0xEF8F,0xEF90,0xEF91,0x210D,0xEF92,0xEF93,0xEF94,0xEF95,0xEF96,0x2115,0xEF97,0x2119,0x211A,0x211D,0xEF98,0xEF99,0xEF9A,0xEF9B,0xEF9C,0xEF9D,0xEF9E,0x2124]; + +var CONST = 0, UNARY = 1, BINARY = 2, INFIX = 3, LEFTBRACKET = 4, + RIGHTBRACKET = 5, SPACE = 6, UNDEROVER = 7, DEFINITION = 8, + LEFTRIGHT = 9, TEXT = 10; // token types + +var AMsqrt = {input:"sqrt", tag:"msqrt", output:"sqrt", tex:null, ttype:UNARY}, + AMroot = {input:"root", tag:"mroot", output:"root", tex:null, ttype:BINARY}, + AMfrac = {input:"frac", tag:"mfrac", output:"/", tex:null, ttype:BINARY}, + AMdiv = {input:"/", tag:"mfrac", output:"/", tex:null, ttype:INFIX}, + AMover = {input:"stackrel", tag:"mover", output:"stackrel", tex:null, ttype:BINARY}, + AMsub = {input:"_", tag:"msub", output:"_", tex:null, ttype:INFIX}, + AMsup = {input:"^", tag:"msup", output:"^", tex:null, ttype:INFIX}, + AMtext = {input:"text", tag:"mtext", output:"text", tex:null, ttype:TEXT}, + AMmbox = {input:"mbox", tag:"mtext", output:"mbox", tex:null, ttype:TEXT}, + AMquote = {input:"\"", tag:"mtext", output:"mbox", tex:null, ttype:TEXT}; + +var AMsymbols = [ +//some greek symbols +{input:"alpha", tag:"mi", output:"\u03B1", tex:null, ttype:CONST}, +{input:"beta", tag:"mi", output:"\u03B2", tex:null, ttype:CONST}, +{input:"chi", tag:"mi", output:"\u03C7", tex:null, ttype:CONST}, +{input:"delta", tag:"mi", output:"\u03B4", tex:null, ttype:CONST}, +{input:"Delta", tag:"mo", output:"\u0394", tex:null, ttype:CONST}, +{input:"epsi", tag:"mi", output:"\u03B5", tex:"epsilon", ttype:CONST}, +{input:"varepsilon", tag:"mi", output:"\u025B", tex:null, ttype:CONST}, +{input:"eta", tag:"mi", output:"\u03B7", tex:null, ttype:CONST}, +{input:"gamma", tag:"mi", output:"\u03B3", tex:null, ttype:CONST}, +{input:"Gamma", tag:"mo", output:"\u0393", tex:null, ttype:CONST}, +{input:"iota", tag:"mi", output:"\u03B9", tex:null, ttype:CONST}, +{input:"kappa", tag:"mi", output:"\u03BA", tex:null, ttype:CONST}, +{input:"lambda", tag:"mi", output:"\u03BB", tex:null, ttype:CONST}, +{input:"Lambda", tag:"mo", output:"\u039B", tex:null, ttype:CONST}, +{input:"mu", tag:"mi", output:"\u03BC", tex:null, ttype:CONST}, +{input:"nu", tag:"mi", output:"\u03BD", tex:null, ttype:CONST}, +{input:"omega", tag:"mi", output:"\u03C9", tex:null, ttype:CONST}, +{input:"Omega", tag:"mo", output:"\u03A9", tex:null, ttype:CONST}, +{input:"phi", tag:"mi", output:"\u03C6", tex:null, ttype:CONST}, +{input:"varphi", tag:"mi", output:"\u03D5", tex:null, ttype:CONST}, +{input:"Phi", tag:"mo", output:"\u03A6", tex:null, ttype:CONST}, +{input:"pi", tag:"mi", output:"\u03C0", tex:null, ttype:CONST}, +{input:"Pi", tag:"mo", output:"\u03A0", tex:null, ttype:CONST}, +{input:"psi", tag:"mi", output:"\u03C8", tex:null, ttype:CONST}, +{input:"Psi", tag:"mi", output:"\u03A8", tex:null, ttype:CONST}, +{input:"rho", tag:"mi", output:"\u03C1", tex:null, ttype:CONST}, +{input:"sigma", tag:"mi", output:"\u03C3", tex:null, ttype:CONST}, +{input:"Sigma", tag:"mo", output:"\u03A3", tex:null, ttype:CONST}, +{input:"tau", tag:"mi", output:"\u03C4", tex:null, ttype:CONST}, +{input:"theta", tag:"mi", output:"\u03B8", tex:null, ttype:CONST}, +{input:"vartheta", tag:"mi", output:"\u03D1", tex:null, ttype:CONST}, +{input:"Theta", tag:"mo", output:"\u0398", tex:null, ttype:CONST}, +{input:"upsilon", tag:"mi", output:"\u03C5", tex:null, ttype:CONST}, +{input:"xi", tag:"mi", output:"\u03BE", tex:null, ttype:CONST}, +{input:"Xi", tag:"mo", output:"\u039E", tex:null, ttype:CONST}, +{input:"zeta", tag:"mi", output:"\u03B6", tex:null, ttype:CONST}, + +//binary operation symbols +{input:"*", tag:"mo", output:"\u22C5", tex:"cdot", ttype:CONST}, +{input:"**", tag:"mo", output:"\u22C6", tex:"star", ttype:CONST}, +{input:"//", tag:"mo", output:"/", tex:null, ttype:CONST}, +{input:"\\\\", tag:"mo", output:"\\", tex:"backslash", ttype:CONST}, +{input:"setminus", tag:"mo", output:"\\", tex:null, ttype:CONST}, +{input:"xx", tag:"mo", output:"\u00D7", tex:"times", ttype:CONST}, +{input:"-:", tag:"mo", output:"\u00F7", tex:"divide", ttype:CONST}, +{input:"@", tag:"mo", output:"\u2218", tex:"circ", ttype:CONST}, +{input:"o+", tag:"mo", output:"\u2295", tex:"oplus", ttype:CONST}, +{input:"ox", tag:"mo", output:"\u2297", tex:"otimes", ttype:CONST}, +{input:"o.", tag:"mo", output:"\u2299", tex:"odot", ttype:CONST}, +{input:"sum", tag:"mo", output:"\u2211", tex:null, ttype:UNDEROVER}, +{input:"prod", tag:"mo", output:"\u220F", tex:null, ttype:UNDEROVER}, +{input:"^^", tag:"mo", output:"\u2227", tex:"wedge", ttype:CONST}, +{input:"^^^", tag:"mo", output:"\u22C0", tex:"bigwedge", ttype:UNDEROVER}, +{input:"vv", tag:"mo", output:"\u2228", tex:"vee", ttype:CONST}, +{input:"vvv", tag:"mo", output:"\u22C1", tex:"bigvee", ttype:UNDEROVER}, +{input:"nn", tag:"mo", output:"\u2229", tex:"cap", ttype:CONST}, +{input:"nnn", tag:"mo", output:"\u22C2", tex:"bigcap", ttype:UNDEROVER}, +{input:"uu", tag:"mo", output:"\u222A", tex:"cup", ttype:CONST}, +{input:"uuu", tag:"mo", output:"\u22C3", tex:"bigcup", ttype:UNDEROVER}, + +//binary relation symbols +{input:"!=", tag:"mo", output:"\u2260", tex:"ne", ttype:CONST}, +{input:":=", tag:"mo", output:":=", tex:null, ttype:CONST}, +{input:"lt", tag:"mo", output:"<", tex:null, ttype:CONST}, +{input:"<=", tag:"mo", output:"\u2264", tex:"le", ttype:CONST}, +{input:"lt=", tag:"mo", output:"\u2264", tex:"leq", ttype:CONST}, +{input:">=", tag:"mo", output:"\u2265", tex:"ge", ttype:CONST}, +{input:"geq", tag:"mo", output:"\u2265", tex:null, ttype:CONST}, +{input:"-<", tag:"mo", output:"\u227A", tex:"prec", ttype:CONST}, +{input:"-lt", tag:"mo", output:"\u227A", tex:null, ttype:CONST}, +{input:">-", tag:"mo", output:"\u227B", tex:"succ", ttype:CONST}, +{input:"-<=", tag:"mo", output:"\u2AAF", tex:"preceq", ttype:CONST}, +{input:">-=", tag:"mo", output:"\u2AB0", tex:"succeq", ttype:CONST}, +{input:"in", tag:"mo", output:"\u2208", tex:null, ttype:CONST}, +{input:"!in", tag:"mo", output:"\u2209", tex:"notin", ttype:CONST}, +{input:"sub", tag:"mo", output:"\u2282", tex:"subset", ttype:CONST}, +{input:"sup", tag:"mo", output:"\u2283", tex:"supset", ttype:CONST}, +{input:"sube", tag:"mo", output:"\u2286", tex:"subseteq", ttype:CONST}, +{input:"supe", tag:"mo", output:"\u2287", tex:"supseteq", ttype:CONST}, +{input:"-=", tag:"mo", output:"\u2261", tex:"equiv", ttype:CONST}, +{input:"~=", tag:"mo", output:"\u2245", tex:"cong", ttype:CONST}, +{input:"~~", tag:"mo", output:"\u2248", tex:"approx", ttype:CONST}, +{input:"prop", tag:"mo", output:"\u221D", tex:"propto", ttype:CONST}, + +//logical symbols +{input:"and", tag:"mtext", output:"and", tex:null, ttype:SPACE}, +{input:"or", tag:"mtext", output:"or", tex:null, ttype:SPACE}, +{input:"not", tag:"mo", output:"\u00AC", tex:"neg", ttype:CONST}, +{input:"=>", tag:"mo", output:"\u21D2", tex:"implies", ttype:CONST}, +{input:"if", tag:"mo", output:"if", tex:null, ttype:SPACE}, +{input:"<=>", tag:"mo", output:"\u21D4", tex:"iff", ttype:CONST}, +{input:"AA", tag:"mo", output:"\u2200", tex:"forall", ttype:CONST}, +{input:"EE", tag:"mo", output:"\u2203", tex:"exists", ttype:CONST}, +{input:"_|_", tag:"mo", output:"\u22A5", tex:"bot", ttype:CONST}, +{input:"TT", tag:"mo", output:"\u22A4", tex:"top", ttype:CONST}, +{input:"|--", tag:"mo", output:"\u22A2", tex:"vdash", ttype:CONST}, +{input:"|==", tag:"mo", output:"\u22A8", tex:"models", ttype:CONST}, + +//grouping brackets +{input:"(", tag:"mo", output:"(", tex:null, ttype:LEFTBRACKET}, +{input:")", tag:"mo", output:")", tex:null, ttype:RIGHTBRACKET}, +{input:"[", tag:"mo", output:"[", tex:null, ttype:LEFTBRACKET}, +{input:"]", tag:"mo", output:"]", tex:null, ttype:RIGHTBRACKET}, +{input:"{", tag:"mo", output:"{", tex:null, ttype:LEFTBRACKET}, +{input:"}", tag:"mo", output:"}", tex:null, ttype:RIGHTBRACKET}, +{input:"|", tag:"mo", output:"|", tex:null, ttype:LEFTRIGHT}, +//{input:"||", tag:"mo", output:"||", tex:null, ttype:LEFTRIGHT}, +{input:"(:", tag:"mo", output:"\u2329", tex:"langle", ttype:LEFTBRACKET}, +{input:":)", tag:"mo", output:"\u232A", tex:"rangle", ttype:RIGHTBRACKET}, +{input:"<<", tag:"mo", output:"\u2329", tex:null, ttype:LEFTBRACKET}, +{input:">>", tag:"mo", output:"\u232A", tex:null, ttype:RIGHTBRACKET}, +{input:"{:", tag:"mo", output:"{:", tex:null, ttype:LEFTBRACKET, invisible:true}, +{input:":}", tag:"mo", output:":}", tex:null, ttype:RIGHTBRACKET, invisible:true}, + +//miscellaneous symbols +{input:"int", tag:"mo", output:"\u222B", tex:null, ttype:CONST}, +{input:"dx", tag:"mi", output:"{:d x:}", tex:null, ttype:DEFINITION}, +{input:"dy", tag:"mi", output:"{:d y:}", tex:null, ttype:DEFINITION}, +{input:"dz", tag:"mi", output:"{:d z:}", tex:null, ttype:DEFINITION}, +{input:"dt", tag:"mi", output:"{:d t:}", tex:null, ttype:DEFINITION}, +{input:"oint", tag:"mo", output:"\u222E", tex:null, ttype:CONST}, +{input:"del", tag:"mo", output:"\u2202", tex:"partial", ttype:CONST}, +{input:"grad", tag:"mo", output:"\u2207", tex:"nabla", ttype:CONST}, +{input:"+-", tag:"mo", output:"\u00B1", tex:"pm", ttype:CONST}, +{input:"O/", tag:"mo", output:"\u2205", tex:"emptyset", ttype:CONST}, +{input:"oo", tag:"mo", output:"\u221E", tex:"infty", ttype:CONST}, +{input:"aleph", tag:"mo", output:"\u2135", tex:null, ttype:CONST}, +{input:"...", tag:"mo", output:"...", tex:"ldots", ttype:CONST}, +{input:":.", tag:"mo", output:"\u2234", tex:"therefore", ttype:CONST}, +{input:"/_", tag:"mo", output:"\u2220", tex:"angle", ttype:CONST}, +{input:"\\ ", tag:"mo", output:"\u00A0", tex:null, ttype:CONST}, +{input:"quad", tag:"mo", output:"\u00A0\u00A0", tex:null, ttype:CONST}, +{input:"qquad", tag:"mo", output:"\u00A0\u00A0\u00A0\u00A0", tex:null, ttype:CONST}, +{input:"cdots", tag:"mo", output:"\u22EF", tex:null, ttype:CONST}, +{input:"vdots", tag:"mo", output:"\u22EE", tex:null, ttype:CONST}, +{input:"ddots", tag:"mo", output:"\u22F1", tex:null, ttype:CONST}, +{input:"diamond", tag:"mo", output:"\u22C4", tex:null, ttype:CONST}, +{input:"square", tag:"mo", output:"\u25A1", tex:null, ttype:CONST}, +{input:"|__", tag:"mo", output:"\u230A", tex:"lfloor", ttype:CONST}, +{input:"__|", tag:"mo", output:"\u230B", tex:"rfloor", ttype:CONST}, +{input:"|~", tag:"mo", output:"\u2308", tex:"lceiling", ttype:CONST}, +{input:"~|", tag:"mo", output:"\u2309", tex:"rceiling", ttype:CONST}, +{input:"CC", tag:"mo", output:"\u2102", tex:null, ttype:CONST}, +{input:"NN", tag:"mo", output:"\u2115", tex:null, ttype:CONST}, +{input:"QQ", tag:"mo", output:"\u211A", tex:null, ttype:CONST}, +{input:"RR", tag:"mo", output:"\u211D", tex:null, ttype:CONST}, +{input:"ZZ", tag:"mo", output:"\u2124", tex:null, ttype:CONST}, +{input:"f", tag:"mi", output:"f", tex:null, ttype:UNARY, func:true}, +{input:"g", tag:"mi", output:"g", tex:null, ttype:UNARY, func:true}, + +//standard functions +{input:"lim", tag:"mo", output:"lim", tex:null, ttype:UNDEROVER}, +{input:"Lim", tag:"mo", output:"Lim", tex:null, ttype:UNDEROVER}, +{input:"sin", tag:"mo", output:"sin", tex:null, ttype:UNARY, func:true}, +{input:"cos", tag:"mo", output:"cos", tex:null, ttype:UNARY, func:true}, +{input:"tan", tag:"mo", output:"tan", tex:null, ttype:UNARY, func:true}, +{input:"sinh", tag:"mo", output:"sinh", tex:null, ttype:UNARY, func:true}, +{input:"cosh", tag:"mo", output:"cosh", tex:null, ttype:UNARY, func:true}, +{input:"tanh", tag:"mo", output:"tanh", tex:null, ttype:UNARY, func:true}, +{input:"cot", tag:"mo", output:"cot", tex:null, ttype:UNARY, func:true}, +{input:"sec", tag:"mo", output:"sec", tex:null, ttype:UNARY, func:true}, +{input:"csc", tag:"mo", output:"csc", tex:null, ttype:UNARY, func:true}, +{input:"log", tag:"mo", output:"log", tex:null, ttype:UNARY, func:true}, +{input:"ln", tag:"mo", output:"ln", tex:null, ttype:UNARY, func:true}, +{input:"det", tag:"mo", output:"det", tex:null, ttype:UNARY, func:true}, +{input:"dim", tag:"mo", output:"dim", tex:null, ttype:CONST}, +{input:"mod", tag:"mo", output:"mod", tex:null, ttype:CONST}, +{input:"gcd", tag:"mo", output:"gcd", tex:null, ttype:UNARY, func:true}, +{input:"lcm", tag:"mo", output:"lcm", tex:null, ttype:UNARY, func:true}, +{input:"lub", tag:"mo", output:"lub", tex:null, ttype:CONST}, +{input:"glb", tag:"mo", output:"glb", tex:null, ttype:CONST}, +{input:"min", tag:"mo", output:"min", tex:null, ttype:UNDEROVER}, +{input:"max", tag:"mo", output:"max", tex:null, ttype:UNDEROVER}, + +//arrows +{input:"uarr", tag:"mo", output:"\u2191", tex:"uparrow", ttype:CONST}, +{input:"darr", tag:"mo", output:"\u2193", tex:"downarrow", ttype:CONST}, +{input:"rarr", tag:"mo", output:"\u2192", tex:"rightarrow", ttype:CONST}, +{input:"->", tag:"mo", output:"\u2192", tex:"to", ttype:CONST}, +{input:"|->", tag:"mo", output:"\u21A6", tex:"mapsto", ttype:CONST}, +{input:"larr", tag:"mo", output:"\u2190", tex:"leftarrow", ttype:CONST}, +{input:"harr", tag:"mo", output:"\u2194", tex:"leftrightarrow", ttype:CONST}, +{input:"rArr", tag:"mo", output:"\u21D2", tex:"Rightarrow", ttype:CONST}, +{input:"lArr", tag:"mo", output:"\u21D0", tex:"Leftarrow", ttype:CONST}, +{input:"hArr", tag:"mo", output:"\u21D4", tex:"Leftrightarrow", ttype:CONST}, + +//commands with argument +AMsqrt, AMroot, AMfrac, AMdiv, AMover, AMsub, AMsup, +{input:"hat", tag:"mover", output:"\u005E", tex:null, ttype:UNARY, acc:true}, +{input:"bar", tag:"mover", output:"\u00AF", tex:"overline", ttype:UNARY, acc:true}, +{input:"vec", tag:"mover", output:"\u2192", tex:null, ttype:UNARY, acc:true}, +{input:"dot", tag:"mover", output:".", tex:null, ttype:UNARY, acc:true}, +{input:"ddot", tag:"mover", output:"..", tex:null, ttype:UNARY, acc:true}, +{input:"ul", tag:"munder", output:"\u0332", tex:"underline", ttype:UNARY, acc:true}, +AMtext, AMmbox, AMquote, +{input:"bb", tag:"mstyle", atname:"fontweight", atval:"bold", output:"bb", tex:null, ttype:UNARY}, +{input:"mathbf", tag:"mstyle", atname:"fontweight", atval:"bold", output:"mathbf", tex:null, ttype:UNARY}, +{input:"sf", tag:"mstyle", atname:"fontfamily", atval:"sans-serif", output:"sf", tex:null, ttype:UNARY}, +{input:"mathsf", tag:"mstyle", atname:"fontfamily", atval:"sans-serif", output:"mathsf", tex:null, ttype:UNARY}, +{input:"bbb", tag:"mstyle", atname:"mathvariant", atval:"double-struck", output:"bbb", tex:null, ttype:UNARY, codes:AMbbb}, +{input:"mathbb", tag:"mstyle", atname:"mathvariant", atval:"double-struck", output:"mathbb", tex:null, ttype:UNARY, codes:AMbbb}, +{input:"cc", tag:"mstyle", atname:"mathvariant", atval:"script", output:"cc", tex:null, ttype:UNARY, codes:AMcal}, +{input:"mathcal", tag:"mstyle", atname:"mathvariant", atval:"script", output:"mathcal", tex:null, ttype:UNARY, codes:AMcal}, +{input:"tt", tag:"mstyle", atname:"fontfamily", atval:"monospace", output:"tt", tex:null, ttype:UNARY}, +{input:"mathtt", tag:"mstyle", atname:"fontfamily", atval:"monospace", output:"mathtt", tex:null, ttype:UNARY}, +{input:"fr", tag:"mstyle", atname:"mathvariant", atval:"fraktur", output:"fr", tex:null, ttype:UNARY, codes:AMfrk}, +{input:"mathfrak", tag:"mstyle", atname:"mathvariant", atval:"fraktur", output:"mathfrak", tex:null, ttype:UNARY, codes:AMfrk} +]; + +function compareNames(s1,s2) { + if (s1.input > s2.input) return 1 + else return -1; +} + +var AMnames = []; //list of input symbols + +function AMinitSymbols() { + var texsymbols = [], i; + for (i=0; i<AMsymbols.length; i++) + if (AMsymbols[i].tex) + texsymbols[texsymbols.length] = {input:AMsymbols[i].tex, + tag:AMsymbols[i].tag, output:AMsymbols[i].output, ttype:AMsymbols[i].ttype}; + AMsymbols = AMsymbols.concat(texsymbols); + AMsymbols.sort(compareNames); + for (i=0; i<AMsymbols.length; i++) AMnames[i] = AMsymbols[i].input; +} + +var AMmathml = "http://www.w3.org/1998/Math/MathML"; + +function AMcreateElementMathML(t) { + if (isIE) return document.createElement("m:"+t); + else return document.createElementNS(AMmathml,t); +} + +function AMcreateMmlNode(t,frag) { +// var node = AMcreateElementMathML(name); + if (isIE) var node = document.createElement("m:"+t); + else var node = document.createElementNS(AMmathml,t); + node.appendChild(frag); + return node; +} + +function newcommand(oldstr,newstr) { + AMsymbols = AMsymbols.concat([{input:oldstr, tag:"mo", output:newstr, + tex:null, ttype:DEFINITION}]); +} + +function AMremoveCharsAndBlanks(str,n) { +//remove n characters and any following blanks + var st; + if (str.charAt(n)=="\\" && str.charAt(n+1)!="\\" && str.charAt(n+1)!=" ") + st = str.slice(n+1); + else st = str.slice(n); + for (var i=0; i<st.length && st.charCodeAt(i)<=32; i=i+1); + return st.slice(i); +} + +function AMposition(arr, str, n) { +// return position >=n where str appears or would be inserted +// assumes arr is sorted + if (n==0) { + var h,m; + n = -1; + h = arr.length; + while (n+1<h) { + m = (n+h) >> 1; + if (arr[m]<str) n = m; else h = m; + } + return h; + } else + for (var i=n; i<arr.length && arr[i]<str; i++); + return i; // i=arr.length || arr[i]>=str +} + +function AMgetSymbol(str) { +//return maximal initial substring of str that appears in names +//return null if there is none + var k = 0; //new pos + var j = 0; //old pos + var mk; //match pos + var st; + var tagst; + var match = ""; + var more = true; + for (var i=1; i<=str.length && more; i++) { + st = str.slice(0,i); //initial substring of length i + j = k; + k = AMposition(AMnames, st, j); + if (k<AMnames.length && str.slice(0,AMnames[k].length)==AMnames[k]){ + match = AMnames[k]; + mk = k; + i = match.length; + } + more = k<AMnames.length && str.slice(0,AMnames[k].length)>=AMnames[k]; + } + AMpreviousSymbol=AMcurrentSymbol; + if (match!=""){ + AMcurrentSymbol=AMsymbols[mk].ttype; + return AMsymbols[mk]; + } +// if str[0] is a digit or - return maxsubstring of digits.digits + AMcurrentSymbol=CONST; + k = 1; + st = str.slice(0,1); + var integ = true; + while ("0"<=st && st<="9" && k<=str.length) { + st = str.slice(k,k+1); + k++; + } + if (st == decimalsign) { + st = str.slice(k,k+1); + if ("0"<=st && st<="9") { + integ = false; + k++; + while ("0"<=st && st<="9" && k<=str.length) { + st = str.slice(k,k+1); + k++; + } + } + } + if ((integ && k>1) || k>2) { + st = str.slice(0,k-1); + tagst = "mn"; + } else { + k = 2; + st = str.slice(0,1); //take 1 character + tagst = (("A">st || st>"Z") && ("a">st || st>"z")?"mo":"mi"); + } + if (st=="-" && AMpreviousSymbol==INFIX) { + AMcurrentSymbol = INFIX; //trick "/" into recognizing "-" on second parse + return {input:st, tag:tagst, output:st, ttype:UNARY, func:true}; + } + return {input:st, tag:tagst, output:st, ttype:CONST}; +} + +function AMremoveBrackets(node) { + var st; + if (node.nodeName=="mrow") { + st = node.firstChild.firstChild.nodeValue; + if (st=="(" || st=="[" || st=="{") node.removeChild(node.firstChild); + } + if (node.nodeName=="mrow") { + st = node.lastChild.firstChild.nodeValue; + if (st==")" || st=="]" || st=="}") node.removeChild(node.lastChild); + } +} + +/*Parsing ASCII math expressions with the following grammar +v ::= [A-Za-z] | greek letters | numbers | other constant symbols +u ::= sqrt | text | bb | other unary symbols for font commands +b ::= frac | root | stackrel binary symbols +l ::= ( | [ | { | (: | {: left brackets +r ::= ) | ] | } | :) | :} right brackets +S ::= v | lEr | uS | bSS Simple expression +I ::= S_S | S^S | S_S^S | S Intermediate expression +E ::= IE | I/I Expression +Each terminal symbol is translated into a corresponding mathml node.*/ + +var AMnestingDepth,AMpreviousSymbol,AMcurrentSymbol; + +function AMparseSexpr(str) { //parses str and returns [node,tailstr] + var symbol, node, result, i, st,// rightvert = false, + newFrag = document.createDocumentFragment(); + str = AMremoveCharsAndBlanks(str,0); + symbol = AMgetSymbol(str); //either a token or a bracket or empty + if (symbol == null || symbol.ttype == RIGHTBRACKET && AMnestingDepth > 0) { + return [null,str]; + } + if (symbol.ttype == DEFINITION) { + str = symbol.output+AMremoveCharsAndBlanks(str,symbol.input.length); + symbol = AMgetSymbol(str); + } + switch (symbol.ttype) { + case UNDEROVER: + case CONST: + str = AMremoveCharsAndBlanks(str,symbol.input.length); + return [AMcreateMmlNode(symbol.tag, //its a constant + document.createTextNode(symbol.output)),str]; + case LEFTBRACKET: //read (expr+) + AMnestingDepth++; + str = AMremoveCharsAndBlanks(str,symbol.input.length); + result = AMparseExpr(str,true); + AMnestingDepth--; + if (typeof symbol.invisible == "boolean" && symbol.invisible) + node = AMcreateMmlNode("mrow",result[0]); + else { + node = AMcreateMmlNode("mo",document.createTextNode(symbol.output)); + node = AMcreateMmlNode("mrow",node); + node.appendChild(result[0]); + } + return [node,result[1]]; + case TEXT: + if (symbol!=AMquote) str = AMremoveCharsAndBlanks(str,symbol.input.length); + if (str.charAt(0)=="{") i=str.indexOf("}"); + else if (str.charAt(0)=="(") i=str.indexOf(")"); + else if (str.charAt(0)=="[") i=str.indexOf("]"); + else if (symbol==AMquote) i=str.slice(1).indexOf("\"")+1; + else i = 0; + if (i==-1) i = str.length; + st = str.slice(1,i); + if (st.charAt(0) == " ") { + node = AMcreateElementMathML("mspace"); + node.setAttribute("width","1ex"); + newFrag.appendChild(node); + } + newFrag.appendChild( + AMcreateMmlNode(symbol.tag,document.createTextNode(st))); + if (st.charAt(st.length-1) == " ") { + node = AMcreateElementMathML("mspace"); + node.setAttribute("width","1ex"); + newFrag.appendChild(node); + } + str = AMremoveCharsAndBlanks(str,i+1); + return [AMcreateMmlNode("mrow",newFrag),str]; + case UNARY: + str = AMremoveCharsAndBlanks(str,symbol.input.length); + result = AMparseSexpr(str); + if (result[0]==null) return [AMcreateMmlNode(symbol.tag, + document.createTextNode(symbol.output)),str]; + if (typeof symbol.func == "boolean" && symbol.func) { // functions hack + st = str.charAt(0); + if (st=="^" || st=="_" || st=="/" || st=="|" || st==",") { + return [AMcreateMmlNode(symbol.tag, + document.createTextNode(symbol.output)),str]; + } else { + node = AMcreateMmlNode("mrow", + AMcreateMmlNode(symbol.tag,document.createTextNode(symbol.output))); + node.appendChild(result[0]); + return [node,result[1]]; + } + } + AMremoveBrackets(result[0]); + if (symbol.input == "sqrt") { // sqrt + return [AMcreateMmlNode(symbol.tag,result[0]),result[1]]; + } else if (typeof symbol.acc == "boolean" && symbol.acc) { // accent + node = AMcreateMmlNode(symbol.tag,result[0]); + node.appendChild(AMcreateMmlNode("mo",document.createTextNode(symbol.output))); + return [node,result[1]]; + } else { // font change command + if (!isIE && typeof symbol.codes != "undefined") { + for (i=0; i<result[0].childNodes.length; i++) + if (result[0].childNodes[i].nodeName=="mi" || result[0].nodeName=="mi") { + st = (result[0].nodeName=="mi"?result[0].firstChild.nodeValue: + result[0].childNodes[i].firstChild.nodeValue); + var newst = []; + for (var j=0; j<st.length; j++) + if (st.charCodeAt(j)>64 && st.charCodeAt(j)<91) newst = newst + + String.fromCharCode(symbol.codes[st.charCodeAt(j)-65]); + else newst = newst + st.charAt(j); + if (result[0].nodeName=="mi") + result[0]=AMcreateElementMathML("mo"). + appendChild(document.createTextNode(newst)); + else result[0].replaceChild(AMcreateElementMathML("mo"). + appendChild(document.createTextNode(newst)),result[0].childNodes[i]); + } + } + node = AMcreateMmlNode(symbol.tag,result[0]); + node.setAttribute(symbol.atname,symbol.atval); + return [node,result[1]]; + } + case BINARY: + str = AMremoveCharsAndBlanks(str,symbol.input.length); + result = AMparseSexpr(str); + if (result[0]==null) return [AMcreateMmlNode("mo", + document.createTextNode(symbol.input)),str]; + AMremoveBrackets(result[0]); + var result2 = AMparseSexpr(result[1]); + if (result2[0]==null) return [AMcreateMmlNode("mo", + document.createTextNode(symbol.input)),str]; + AMremoveBrackets(result2[0]); + if (symbol.input=="root" || symbol.input=="stackrel") + newFrag.appendChild(result2[0]); + newFrag.appendChild(result[0]); + if (symbol.input=="frac") newFrag.appendChild(result2[0]); + return [AMcreateMmlNode(symbol.tag,newFrag),result2[1]]; + case INFIX: + str = AMremoveCharsAndBlanks(str,symbol.input.length); + return [AMcreateMmlNode("mo",document.createTextNode(symbol.output)),str]; + case SPACE: + str = AMremoveCharsAndBlanks(str,symbol.input.length); + node = AMcreateElementMathML("mspace"); + node.setAttribute("width","1ex"); + newFrag.appendChild(node); + newFrag.appendChild( + AMcreateMmlNode(symbol.tag,document.createTextNode(symbol.output))); + node = AMcreateElementMathML("mspace"); + node.setAttribute("width","1ex"); + newFrag.appendChild(node); + return [AMcreateMmlNode("mrow",newFrag),str]; + case LEFTRIGHT: +// if (rightvert) return [null,str]; else rightvert = true; + AMnestingDepth++; + str = AMremoveCharsAndBlanks(str,symbol.input.length); + result = AMparseExpr(str,false); + AMnestingDepth--; + var st = ""; + if (result[0].lastChild!=null) + st = result[0].lastChild.firstChild.nodeValue; + if (st == "|") { // its an absolute value subterm + node = AMcreateMmlNode("mo",document.createTextNode(symbol.output)); + node = AMcreateMmlNode("mrow",node); + node.appendChild(result[0]); + return [node,result[1]]; + } else { // the "|" is a \mid + node = AMcreateMmlNode("mo",document.createTextNode(symbol.output)); + node = AMcreateMmlNode("mrow",node); + return [node,str]; + } + default: +//alert("default"); + str = AMremoveCharsAndBlanks(str,symbol.input.length); + return [AMcreateMmlNode(symbol.tag, //its a constant + document.createTextNode(symbol.output)),str]; + } +} + +function AMparseIexpr(str) { + var symbol, sym1, sym2, node, result, underover; + str = AMremoveCharsAndBlanks(str,0); + sym1 = AMgetSymbol(str); + result = AMparseSexpr(str); + node = result[0]; + str = result[1]; + symbol = AMgetSymbol(str); + if (symbol.ttype == INFIX && symbol.input != "/") { + str = AMremoveCharsAndBlanks(str,symbol.input.length); +// if (symbol.input == "/") result = AMparseIexpr(str); else ... + result = AMparseSexpr(str); + if (result[0] == null) // show box in place of missing argument + result[0] = AMcreateMmlNode("mo",document.createTextNode("\u25A1")); + else AMremoveBrackets(result[0]); + str = result[1]; +// if (symbol.input == "/") AMremoveBrackets(node); + if (symbol.input == "_") { + sym2 = AMgetSymbol(str); + underover = (sym1.ttype == UNDEROVER); + if (sym2.input == "^") { + str = AMremoveCharsAndBlanks(str,sym2.input.length); + var res2 = AMparseSexpr(str); + AMremoveBrackets(res2[0]); + str = res2[1]; + node = AMcreateMmlNode((underover?"munderover":"msubsup"),node); + node.appendChild(result[0]); + node.appendChild(res2[0]); + node = AMcreateMmlNode("mrow",node); // so sum does not stretch + } else { + node = AMcreateMmlNode((underover?"munder":"msub"),node); + node.appendChild(result[0]); + } + } else { + node = AMcreateMmlNode(symbol.tag,node); + node.appendChild(result[0]); + } + } + return [node,str]; +} + +function AMparseExpr(str,rightbracket) { + var symbol, node, result, i, nodeList = [], + newFrag = document.createDocumentFragment(); + do { + str = AMremoveCharsAndBlanks(str,0); + result = AMparseIexpr(str); + node = result[0]; + str = result[1]; + symbol = AMgetSymbol(str); + if (symbol.ttype == INFIX && symbol.input == "/") { + str = AMremoveCharsAndBlanks(str,symbol.input.length); + result = AMparseIexpr(str); + if (result[0] == null) // show box in place of missing argument + result[0] = AMcreateMmlNode("mo",document.createTextNode("\u25A1")); + else AMremoveBrackets(result[0]); + str = result[1]; + AMremoveBrackets(node); + node = AMcreateMmlNode(symbol.tag,node); + node.appendChild(result[0]); + newFrag.appendChild(node); + symbol = AMgetSymbol(str); + } + else if (node!=undefined) newFrag.appendChild(node); + } while ((symbol.ttype != RIGHTBRACKET && + (symbol.ttype != LEFTRIGHT || rightbracket) + || AMnestingDepth == 0) && symbol!=null && symbol.output!=""); + if (symbol.ttype == RIGHTBRACKET || symbol.ttype == LEFTRIGHT) { +// if (AMnestingDepth > 0) AMnestingDepth--; + var len = newFrag.childNodes.length; + if (len>0 && newFrag.childNodes[len-1].nodeName == "mrow" && len>1 && + newFrag.childNodes[len-2].nodeName == "mo" && + newFrag.childNodes[len-2].firstChild.nodeValue == ",") { //matrix + var right = newFrag.childNodes[len-1].lastChild.firstChild.nodeValue; + if (right==")" || right=="]") { + var left = newFrag.childNodes[len-1].firstChild.firstChild.nodeValue; + if (left=="(" && right==")" && symbol.output != "}" || + left=="[" && right=="]") { + var pos = []; // positions of commas + var matrix = true; + var m = newFrag.childNodes.length; + for (i=0; matrix && i<m; i=i+2) { + pos[i] = []; + node = newFrag.childNodes[i]; + if (matrix) matrix = node.nodeName=="mrow" && + (i==m-1 || node.nextSibling.nodeName=="mo" && + node.nextSibling.firstChild.nodeValue==",")&& + node.firstChild.firstChild.nodeValue==left && + node.lastChild.firstChild.nodeValue==right; + if (matrix) + for (var j=0; j<node.childNodes.length; j++) + if (node.childNodes[j].firstChild.nodeValue==",") + pos[i][pos[i].length]=j; + if (matrix && i>1) matrix = pos[i].length == pos[i-2].length; + } + if (matrix) { + var row, frag, n, k, table = document.createDocumentFragment(); + for (i=0; i<m; i=i+2) { + row = document.createDocumentFragment(); + frag = document.createDocumentFragment(); + node = newFrag.firstChild; // <mrow>(-,-,...,-,-)</mrow> + n = node.childNodes.length; + k = 0; + node.removeChild(node.firstChild); //remove ( + for (j=1; j<n-1; j++) { + if (typeof pos[i][k] != "undefined" && j==pos[i][k]){ + node.removeChild(node.firstChild); //remove , + row.appendChild(AMcreateMmlNode("mtd",frag)); + k++; + } else frag.appendChild(node.firstChild); + } + row.appendChild(AMcreateMmlNode("mtd",frag)); + if (newFrag.childNodes.length>2) { + newFrag.removeChild(newFrag.firstChild); //remove <mrow>)</mrow> + newFrag.removeChild(newFrag.firstChild); //remove <mo>,</mo> + } + table.appendChild(AMcreateMmlNode("mtr",row)); + } + node = AMcreateMmlNode("mtable",table); + if (typeof symbol.invisible == "boolean" && symbol.invisible) node.setAttribute("columnalign","left"); + newFrag.replaceChild(node,newFrag.firstChild); + } + } + } + } + str = AMremoveCharsAndBlanks(str,symbol.input.length); + if (typeof symbol.invisible != "boolean" || !symbol.invisible) { + node = AMcreateMmlNode("mo",document.createTextNode(symbol.output)); + newFrag.appendChild(node); + } + } + return [newFrag,str]; +} + +function AMparseMath(str) { + var result, node = AMcreateElementMathML("mstyle"); + if (mathcolor != "") node.setAttribute("mathcolor",mathcolor); + if (displaystyle) node.setAttribute("displaystyle","true"); + if (mathfontfamily != "") node.setAttribute("fontfamily",mathfontfamily); + AMnestingDepth = 0; + node.appendChild(AMparseExpr(str.replace(/^\s+/g,""),false)[0]); + node = AMcreateMmlNode("math",node); + if (showasciiformulaonhover) //fixed by djhsu so newline + node.setAttribute("title",str.replace(/\s+/g," "));//does not show in Gecko + if (mathfontfamily != "" && (isIE || mathfontfamily != "serif")) { + var fnode = AMcreateElementXHTML("font"); + fnode.setAttribute("face",mathfontfamily); + fnode.appendChild(node); + return fnode; + } + return node; +} + +function AMstrarr2docFrag(arr, linebreaks) { + var newFrag=document.createDocumentFragment(); + var expr = false; + for (var i=0; i<arr.length; i++) { + if (expr) newFrag.appendChild(AMparseMath(arr[i])); + else { + var arri = (linebreaks ? arr[i].split("\n\n") : [arr[i]]); + newFrag.appendChild(AMcreateElementXHTML("span"). + appendChild(document.createTextNode(arri[0]))); + for (var j=1; j<arri.length; j++) { + newFrag.appendChild(AMcreateElementXHTML("p")); + newFrag.appendChild(AMcreateElementXHTML("span"). + appendChild(document.createTextNode(arri[j]))); + } + } + expr = !expr; + } + return newFrag; +} + +function AMprocessNodeR(n, linebreaks) { + var mtch, str, arr, frg, i; + if (n.childNodes.length == 0) { + if ((n.nodeType!=8 || linebreaks) && + n.parentNode.nodeName!="form" && n.parentNode.nodeName!="FORM" && + n.parentNode.nodeName!="textarea" && n.parentNode.nodeName!="TEXTAREA" && + n.parentNode.nodeName!="code" && n.parentNode.nodeName!="CODE" && /* added by JM */ + n.parentNode.nodeName!="pre" && n.parentNode.nodeName!="PRE") { + str = n.nodeValue; + if (!(str == null)) { + str = str.replace(/\r\n\r\n/g,"\n\n"); + if (doubleblankmathdelimiter) { + str = str.replace(/\x20\x20\./g," "+AMdelimiter1+"."); + str = str.replace(/\x20\x20,/g," "+AMdelimiter1+","); + str = str.replace(/\x20\x20/g," "+AMdelimiter1+" "); + } + str = str.replace(/\x20+/g," "); + str = str.replace(/\s*\r\n/g," "); + mtch = false; + str = str.replace(new RegExp(AMescape2, "g"), + function(st){mtch=true;return "AMescape2"}); + str = str.replace(new RegExp(AMescape1, "g"), + function(st){mtch=true;return "AMescape1"}); + str = str.replace(new RegExp(AMdelimiter2regexp, "g"),AMdelimiter1); + arr = str.split(AMdelimiter1); + for (i=0; i<arr.length; i++) + arr[i]=arr[i].replace(/AMescape2/g,AMdelimiter2). + replace(/AMescape1/g,AMdelimiter1); + if (arr.length>1 || mtch) { + if (checkForMathML) { + checkForMathML = false; + var nd = AMisMathMLavailable(); + AMnoMathML = nd != null; + if (AMnoMathML && notifyIfNoMathML) + if (alertIfNoMathML) + alert("To view the ASCIIMathML notation use Internet Explorer 6 +\nMathPlayer (free from www.dessci.com)\n\ + or Firefox/Mozilla/Netscape"); + else AMbody.insertBefore(nd,AMbody.childNodes[0]); + } + if (!AMnoMathML) { + frg = AMstrarr2docFrag(arr,n.nodeType==8); + var len = frg.childNodes.length; + n.parentNode.replaceChild(frg,n); + return len-1; + } else return 0; + } + } + } else return 0; + } else if (n.nodeName!="math") { + for (i=0; i<n.childNodes.length; i++) + i += AMprocessNodeR(n.childNodes[i], linebreaks); + } + return 0; +} + +function AMprocessNode(n, linebreaks, spanclassAM) { + var frag,st; + if (spanclassAM!=null) { + frag = document.getElementsByTagName("span") + for (var i=0;i<frag.length;i++) + if (frag[i].className == "AM") + AMprocessNodeR(frag[i],linebreaks); + } else { + try { + st = n.innerHTML; + } catch(err) {} + if (st==null || + st.indexOf(AMdelimiter1)!=-1 || st.indexOf(AMdelimiter2)!=-1) + AMprocessNodeR(n,linebreaks); + } + if (isIE) { //needed to match size and font of formula to surrounding text + frag = document.getElementsByTagName('math'); + for (var i=0;i<frag.length;i++) frag[i].update() + } +} + +var AMbody; +var AMnoMathML = false, AMtranslated = false; + +function translate(spanclassAM) { + if (!AMtranslated) { // run this only once + AMtranslated = true; + AMinitSymbols(); + AMbody = document.getElementsByTagName("body")[0]; + AMprocessNode(AMbody, false, spanclassAM); + } +} + +if (isIE) { // avoid adding MathPlayer info explicitly to each webpage + document.write("<object id=\"mathplayer\"\ + classid=\"clsid:32F66A20-7614-11D4-BD11-00104BD3F987\"></object>"); + document.write("<?import namespace=\"m\" implementation=\"#mathplayer\"?>"); +} + +// GO1.1 Generic onload by Brothercake +// http://www.brothercake.com/ +//onload function (replaces the onload="translate()" in the <body> tag) +function generic() +{ + translate(); +}; +//setup onload function +if(typeof window.addEventListener != 'undefined') +{ + //.. gecko, safari, konqueror and standard + window.addEventListener('load', generic, false); +} +else if(typeof document.addEventListener != 'undefined') +{ + //.. opera 7 + document.addEventListener('load', generic, false); +} +else if(typeof window.attachEvent != 'undefined') +{ + //.. win/ie + window.attachEvent('onload', generic); +} +//** remove this condition to degrade older browsers +else +{ + //.. mac/ie5 and anything else that gets this far + //if there's an existing onload function + if(typeof window.onload == 'function') + { + //store it + var existing = onload; + //add new onload handler + window.onload = function() + { + //call existing onload function + existing(); + //call generic onload function + generic(); + }; + } + else + { + //setup onload function + window.onload = generic; + } +} diff --git a/src/Main.hs b/src/Main.hs new file mode 100644 index 000000000..ae9f61f7f --- /dev/null +++ b/src/Main.hs @@ -0,0 +1,512 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Main + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Parses command-line options and calls the appropriate readers and +writers. +-} +module Main where +import Text.Pandoc +import Text.Pandoc.UTF8 +import Text.Pandoc.Shared ( joinWithSep ) +import Text.Regex ( mkRegex, matchRegex ) +import System.Environment ( getArgs, getProgName, getEnvironment ) +import System.Exit ( exitWith, ExitCode (..) ) +import System.Console.GetOpt +import System.IO +import Data.Maybe ( fromMaybe ) +import Data.List ( isPrefixOf ) +import Data.Char ( toLower ) +import Control.Monad ( (>>=) ) + +copyrightMessage :: String +copyrightMessage = "\nCopyright (C) 2006-7 John MacFarlane\n\ + \Web: http://johnmacfarlane.net/pandoc\n\ + \This is free software; see the source for copying conditions. There is no\n\ + \warranty, not even for merchantability or fitness for a particular purpose." + +-- | Association list of formats and readers. +readers :: [(String, ParserState -> String -> Pandoc)] +readers = [("native" , readPandoc) + ,("markdown" , readMarkdown) + ,("rst" , readRST) + ,("html" , readHtml) + ,("latex" , readLaTeX) + ] + +-- | Reader for native Pandoc format. +readPandoc :: ParserState -> String -> Pandoc +readPandoc state input = read input + +-- | Association list of formats and pairs of writers and default headers. +writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ] +writers = [("native" , (writeDoc, "")) + ,("html" , (writeHtmlString, "")) + ,("s5" , (writeS5String, defaultS5Header)) + ,("docbook" , (writeDocbook, defaultDocbookHeader)) + ,("latex" , (writeLaTeX, defaultLaTeXHeader)) + ,("context" , (writeConTeXt, defaultConTeXtHeader)) + ,("man" , (writeMan, "")) + ,("markdown" , (writeMarkdown, "")) + ,("rst" , (writeRST, "")) + ,("rtf" , (writeRTF, defaultRTFHeader)) + ] + +-- | Writer for Pandoc native format. +writeDoc :: WriterOptions -> Pandoc -> String +writeDoc options = prettyPandoc + +-- | Data structure for command line options. +data Opt = Opt + { optPreserveTabs :: Bool -- ^ Convert tabs to spaces + , optTabStop :: Int -- ^ Number of spaces per tab + , optStandalone :: Bool -- ^ Include header, footer + , optReader :: String -- ^ Reader format + , optWriter :: String -- ^ Writer format + , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX + , optCSS :: String -- ^ CSS file to link to + , optTableOfContents :: Bool -- ^ Include table of contents + , optIncludeInHeader :: String -- ^ File to include in header + , optIncludeBeforeBody :: String -- ^ File to include at top of body + , optIncludeAfterBody :: String -- ^ File to include at end of body + , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" + , optTitlePrefix :: String -- ^ Optional prefix for HTML title + , optOutputFile :: String -- ^ Name of output file + , optNumberSections :: Bool -- ^ Number sections in LaTeX + , optIncremental :: Bool -- ^ Use incremental lists in S5 + , optSmart :: Bool -- ^ Use smart typography + , optUseASCIIMathML :: Bool -- ^ Use ASCIIMathML + , optASCIIMathMLURL :: Maybe String -- ^ URL to ASCIIMathML.js + , optDumpArgs :: Bool -- ^ Output command-line arguments + , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments + , optStrict :: Bool -- ^ Use strict markdown syntax + , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , optWrapText :: Bool -- ^ Wrap text + } + +-- | Defaults for command-line options. +defaultOpts :: Opt +defaultOpts = Opt + { optPreserveTabs = False + , optTabStop = 4 + , optStandalone = False + , optReader = "" -- null for default reader + , optWriter = "" -- null for default writer + , optParseRaw = False + , optCSS = "" + , optTableOfContents = False + , optIncludeInHeader = "" + , optIncludeBeforeBody = "" + , optIncludeAfterBody = "" + , optCustomHeader = "DEFAULT" + , optTitlePrefix = "" + , optOutputFile = "-" -- "-" means stdout + , optNumberSections = False + , optIncremental = False + , optSmart = False + , optUseASCIIMathML = False + , optASCIIMathMLURL = Nothing + , optDumpArgs = False + , optIgnoreArgs = False + , optStrict = False + , optReferenceLinks = False + , optWrapText = True + } + +-- | A list of functions, each transforming the options data structure +-- in response to a command-line option. +options :: [OptDescr (Opt -> IO Opt)] +options = + [ Option "fr" ["from","read"] + (ReqArg + (\arg opt -> return opt { optReader = map toLower arg }) + "FORMAT") + "" -- ("(" ++ (joinWithSep ", " $ map fst readers) ++ ")") + + , Option "tw" ["to","write"] + (ReqArg + (\arg opt -> return opt { optWriter = map toLower arg }) + "FORMAT") + "" -- ("(" ++ (joinWithSep ", " $ map fst writers) ++ ")") + + , Option "s" ["standalone"] + (NoArg + (\opt -> return opt { optStandalone = True })) + "" -- "Include needed header and footer on output" + + , Option "o" ["output"] + (ReqArg + (\arg opt -> return opt { optOutputFile = arg }) + "FILENAME") + "" -- "Name of output file" + + , Option "p" ["preserve-tabs"] + (NoArg + (\opt -> return opt { optPreserveTabs = True })) + "" -- "Preserve tabs instead of converting to spaces" + + , Option "" ["tab-stop"] + (ReqArg + (\arg opt -> return opt { optTabStop = (read arg) } ) + "TABSTOP") + "" -- "Tab stop (default 4)" + + , Option "" ["strict"] + (NoArg + (\opt -> return opt { optStrict = True } )) + "" -- "Disable markdown syntax extensions" + + , Option "" ["reference-links"] + (NoArg + (\opt -> return opt { optReferenceLinks = True } )) + "" -- "Use reference links in parsing HTML" + + , Option "R" ["parse-raw"] + (NoArg + (\opt -> return opt { optParseRaw = True })) + "" -- "Parse untranslatable HTML codes and LaTeX environments as raw" + + , Option "S" ["smart"] + (NoArg + (\opt -> return opt { optSmart = True })) + "" -- "Use smart quotes, dashes, and ellipses" + + , Option "m" ["asciimathml"] + (OptArg + (\arg opt -> return opt { optUseASCIIMathML = True, + optASCIIMathMLURL = arg, + optStandalone = True }) + "URL") + "" -- "Use ASCIIMathML script in html output" + + , Option "i" ["incremental"] + (NoArg + (\opt -> return opt { optIncremental = True })) + "" -- "Make list items display incrementally in S5" + + , Option "N" ["number-sections"] + (NoArg + (\opt -> return opt { optNumberSections = True })) + "" -- "Number sections in LaTeX" + + , Option "" ["no-wrap"] + (NoArg + (\opt -> return opt { optWrapText = False })) + "" -- "Do not wrap text in output" + + , Option "" ["toc", "table-of-contents"] + (NoArg + (\opt -> return opt { optTableOfContents = True })) + "" -- "Include table of contents" + + , Option "c" ["css"] + (ReqArg + (\arg opt -> return opt { optCSS = arg, + optStandalone = True }) + "CSS") + "" -- "Link to CSS style sheet" + + , Option "H" ["include-in-header"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optIncludeInHeader = fromUTF8 text, + optStandalone = True }) + "FILENAME") + "" -- "File to include at end of header (implies -s)" + + , Option "B" ["include-before-body"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optIncludeBeforeBody = fromUTF8 text }) + "FILENAME") + "" -- "File to include before document body" + + , Option "A" ["include-after-body"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optIncludeAfterBody = fromUTF8 text }) + "FILENAME") + "" -- "File to include after document body" + + , Option "C" ["custom-header"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optCustomHeader = fromUTF8 text, + optStandalone = True }) + "FILENAME") + "" -- "File to use for custom header (implies -s)" + + , Option "T" ["title-prefix"] + (ReqArg + (\arg opt -> return opt { optTitlePrefix = arg, + optStandalone = True }) + "STRING") + "" -- "String to prefix to HTML window title" + + , Option "D" ["print-default-header"] + (ReqArg + (\arg opt -> do + let header = case (lookup arg writers) of + Just (writer, head) -> head + Nothing -> error ("Unknown reader: " ++ arg) + hPutStr stdout header + exitWith ExitSuccess) + "FORMAT") + "" -- "Print default header for FORMAT" + + , Option "" ["dump-args"] + (NoArg + (\opt -> return opt { optDumpArgs = True })) + "" -- "Print output filename and arguments to stdout." + + , Option "" ["ignore-args"] + (NoArg + (\opt -> return opt { optIgnoreArgs = True })) + "" -- "Ignore command-line arguments." + + , Option "v" ["version"] + (NoArg + (\_ -> do + prg <- getProgName + hPutStrLn stderr (prg ++ " " ++ pandocVersion ++ + copyrightMessage) + exitWith $ ExitFailure 4)) + "" -- "Print version" + + , Option "h" ["help"] + (NoArg + (\_ -> do + prg <- getProgName + hPutStr stderr (usageMessage prg options) + exitWith $ ExitFailure 2)) + "" -- "Show help" + ] + +-- Returns usage message +usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String +usageMessage programName options = usageInfo + (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ + (joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++ + (joinWithSep ", " $ map fst writers) ++ "\nOptions:") + options + +-- Determine default reader based on source file extensions +defaultReaderName :: [String] -> String +defaultReaderName [] = "markdown" +defaultReaderName (x:xs) = + let x' = map toLower x in + case (matchRegex (mkRegex ".*\\.(.*)") x') of + Nothing -> defaultReaderName xs -- no extension + Just ["xhtml"] -> "html" + Just ["html"] -> "html" + Just ["htm"] -> "html" + Just ["tex"] -> "latex" + Just ["latex"] -> "latex" + Just ["ltx"] -> "latex" + Just ["rst"] -> "rst" + Just ["native"] -> "native" + Just _ -> "markdown" + +-- Determine default writer based on output file extension +defaultWriterName :: String -> String +defaultWriterName "-" = "html" -- no output file +defaultWriterName x = + let x' = map toLower x in + case (matchRegex (mkRegex ".*\\.(.*)") x') of + Nothing -> "markdown" -- no extension + Just [""] -> "markdown" -- empty extension + Just ["tex"] -> "latex" + Just ["latex"] -> "latex" + Just ["ltx"] -> "latex" + Just ["context"] -> "context" + Just ["ctx"] -> "context" + Just ["rtf"] -> "rtf" + Just ["rst"] -> "rst" + Just ["s5"] -> "s5" + Just ["native"] -> "native" + Just ["txt"] -> "markdown" + Just ["text"] -> "markdown" + Just ["md"] -> "markdown" + Just ["markdown"] -> "markdown" + Just ["db"] -> "docbook" + Just ["xml"] -> "docbook" + Just ["sgml"] -> "docbook" + Just [[x]] | x `elem` ['1'..'9'] -> "man" + Just _ -> "html" + +main = do + + rawArgs <- getArgs + prg <- getProgName + let compatMode = (prg == "hsmarkdown") + + let (actions, args, errors) = if compatMode + then ([], rawArgs, []) + else getOpt Permute options rawArgs + + if (not (null errors)) + then do + name <- getProgName + mapM (\e -> hPutStrLn stderr e) errors + hPutStr stderr (usageMessage name options) + exitWith $ ExitFailure 2 + else + return () + + let defaultOpts' = if compatMode + then defaultOpts { optReader = "markdown" + , optWriter = "html" + , optStrict = True } + else defaultOpts + + -- thread option data structure through all supplied option actions + opts <- foldl (>>=) (return defaultOpts') actions + + let Opt { optPreserveTabs = preserveTabs + , optTabStop = tabStop + , optStandalone = standalone + , optReader = readerName + , optWriter = writerName + , optParseRaw = parseRaw + , optCSS = css + , optTableOfContents = toc + , optIncludeInHeader = includeHeader + , optIncludeBeforeBody = includeBefore + , optIncludeAfterBody = includeAfter + , optCustomHeader = customHeader + , optTitlePrefix = titlePrefix + , optOutputFile = outputFile + , optNumberSections = numberSections + , optIncremental = incremental + , optSmart = smart + , optUseASCIIMathML = useAsciiMathML + , optASCIIMathMLURL = asciiMathMLURL + , optDumpArgs = dumpArgs + , optIgnoreArgs = ignoreArgs + , optStrict = strict + , optReferenceLinks = referenceLinks + , optWrapText = wrap + } = opts + + if dumpArgs + then do + hPutStrLn stdout outputFile + mapM (\arg -> hPutStrLn stdout arg) args + exitWith $ ExitSuccess + else return () + + let sources = if ignoreArgs then [] else args + + -- assign reader and writer based on options and filenames + let readerName' = if null readerName + then defaultReaderName sources + else readerName + + let writerName' = if null writerName + then defaultWriterName outputFile + else writerName + + reader <- case (lookup readerName' readers) of + Just r -> return r + Nothing -> error ("Unknown reader: " ++ readerName') + + (writer, defaultHeader) <- case (lookup writerName' writers) of + Just (w,h) -> return (w, h) + Nothing -> error ("Unknown writer: " ++ writerName') + + output <- if (outputFile == "-") + then return stdout + else openFile outputFile WriteMode + + environment <- getEnvironment + let columns = case lookup "COLUMNS" environment of + Just cols -> read cols + Nothing -> stateColumns defaultParserState + + let tabFilter _ [] = "" + tabFilter _ ('\n':xs) = '\n':(tabFilter tabStop xs) + -- remove DOS line endings + tabFilter _ ('\r':'\n':xs) = '\n':(tabFilter tabStop xs) + tabFilter _ ('\r':xs) = '\n':(tabFilter tabStop xs) + tabFilter spsToNextStop ('\t':xs) = + if preserveTabs + then '\t':(tabFilter tabStop xs) + else replicate spsToNextStop ' ' ++ tabFilter tabStop xs + tabFilter 1 (x:xs) = + x:(tabFilter tabStop xs) + tabFilter spsToNextStop (x:xs) = + x:(tabFilter (spsToNextStop - 1) xs) + + let startParserState = + defaultParserState { stateParseRaw = parseRaw, + stateTabStop = tabStop, + stateStandalone = standalone && (not strict), + stateSmart = smart || writerName' `elem` + ["latex", "context"], + stateColumns = columns, + stateStrict = strict } + let csslink = if (css == "") + then "" + else "<link rel=\"stylesheet\" href=\"" ++ css ++ + "\" type=\"text/css\" media=\"all\" />\n" + let header = (if (customHeader == "DEFAULT") + then defaultHeader + else customHeader) ++ csslink ++ includeHeader + let writerOptions = WriterOptions { writerStandalone = standalone && + (not strict), + writerHeader = header, + writerTitlePrefix = titlePrefix, + writerTabStop = tabStop, + writerTableOfContents = toc && + (not strict) && + writerName/="s5", + writerUseASCIIMathML = useAsciiMathML, + writerASCIIMathMLURL = asciiMathMLURL, + writerS5 = (writerName=="s5"), + writerIgnoreNotes = False, + writerIncremental = incremental, + writerNumberSections = numberSections, + writerIncludeBefore = includeBefore, + writerIncludeAfter = includeAfter, + writerStrictMarkdown = strict, + writerReferenceLinks = referenceLinks, + writerWrapText = wrap } + + (readSources sources) >>= (hPutStrLn output . toUTF8 . + (writer writerOptions) . + (reader startParserState) . tabFilter tabStop . + fromUTF8 . (joinWithSep "\n")) >> + hClose output + + where + readSources [] = mapM readSource ["-"] + readSources sources = mapM readSource sources + readSource "-" = getContents + readSource source = readFile source diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs new file mode 100644 index 000000000..7633bf7ef --- /dev/null +++ b/src/Text/Pandoc.hs @@ -0,0 +1,110 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This helper module exports the main writers, readers, and data +structure definitions from the Pandoc libraries. + +A typical application will chain together a reader and a writer +to convert strings from one format to another. For example, the +following simple program will act as a filter converting markdown +fragments to reStructuredText, using reference-style links instead of +inline links: + +> module Main where +> import Text.Pandoc +> +> markdownToRST :: String -> String +> markdownToRST = toUTF8 . +> (writeRST defaultWriterOptions {writerReferenceLinks = True}) . +> (readMarkdown defaultParserState) . fromUTF8 +> +> main = interact markdownToRST + +-} + +module Text.Pandoc + ( + -- * Definitions + module Text.Pandoc.Definition + -- * Readers: converting /to/ Pandoc format + , readMarkdown + , readRST + , readLaTeX + , readHtml + -- * Parser state used in readers + , ParserState (..) + , defaultParserState + , ParserContext (..) + , QuoteContext (..) + , KeyTable + , NoteTable + , HeaderType (..) + -- * Writers: converting /from/ Pandoc format + , writeMarkdown + , writeRST + , writeLaTeX + , writeConTeXt + , writeHtml + , writeHtmlString + , writeS5 + , writeS5String + , writeDocbook + , writeMan + , writeRTF + , prettyPandoc + -- * Writer options used in writers + , WriterOptions (..) + , defaultWriterOptions + -- * Default headers for various output formats + , module Text.Pandoc.Writers.DefaultHeaders + -- * Functions for converting to and from UTF-8 + , module Text.Pandoc.UTF8 + -- * Version + , pandocVersion + ) where + +import Text.Pandoc.Definition +import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.RST +import Text.Pandoc.Readers.LaTeX +import Text.Pandoc.Readers.HTML +import Text.Pandoc.Writers.Markdown +import Text.Pandoc.Writers.RST +import Text.Pandoc.Writers.LaTeX +import Text.Pandoc.Writers.ConTeXt +import Text.Pandoc.Writers.HTML +import Text.Pandoc.Writers.S5 +import Text.Pandoc.Writers.Docbook +import Text.Pandoc.Writers.Man +import Text.Pandoc.Writers.RTF +import Text.Pandoc.Writers.DefaultHeaders +import Text.Pandoc.UTF8 +import Text.Pandoc.Shared + +-- | Version number of pandoc library. +pandocVersion :: String +pandocVersion = "0.45" diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs new file mode 100644 index 000000000..cfc22cb3e --- /dev/null +++ b/src/Text/Pandoc/Blocks.hs @@ -0,0 +1,145 @@ +{- +Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Blocks + Copyright : Copyright (C) 2007 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions for the manipulation of fixed-width blocks of text. +These are used in the construction of plain-text tables. +-} + +module Text.Pandoc.Blocks + ( + TextBlock (..), + docToBlock, + blockToDoc, + widthOfBlock, + heightOfBlock, + hcatBlocks, + hsepBlocks, + centerAlignBlock, + leftAlignBlock, + rightAlignBlock + ) +where +import Text.PrettyPrint +import Data.List ( intersperse ) + +-- | A fixed-width block of text. Parameters are width of block, +-- height of block, and list of lines. +data TextBlock = TextBlock Int Int [String] +instance Show TextBlock where + show x = show $ blockToDoc x + +-- | Break lines in a list of lines so that none are greater than +-- a given width. +breakLines :: Int -- ^ Maximum length of lines. + -> [String] -- ^ List of lines. + -> [String] +breakLines width [] = [] +breakLines width (l:ls) = + if length l > width + then (take width l):(breakLines width ((drop width l):ls)) + else l:(breakLines width ls) + +-- | Convert a @Doc@ element into a @TextBlock@ with a specified width. +docToBlock :: Int -- ^ Width of text block. + -> Doc -- ^ @Doc@ to convert. + -> TextBlock +docToBlock width doc = + let rendered = renderStyle (style {lineLength = width, + ribbonsPerLine = 1}) doc + lns = breakLines width $ lines rendered + in TextBlock width (length lns) lns + +-- | Convert a @TextBlock@ to a @Doc@ element. +blockToDoc :: TextBlock -> Doc +blockToDoc (TextBlock _ _ lns) = + if null lns + then empty + else vcat $ map text lns + +-- | Returns width of a @TextBlock@ (number of columns). +widthOfBlock :: TextBlock -> Int +widthOfBlock (TextBlock width _ _) = width + +-- | Returns height of a @TextBlock@ (number of rows). +heightOfBlock :: TextBlock -> Int +heightOfBlock (TextBlock _ height _) = height + +-- | Pads a string out to a given width using spaces. +hPad :: Int -- ^ Desired width. + -> String -- ^ String to pad. + -> String +hPad width line = + let lineLength = length line + in if lineLength <= width + then line ++ replicate (width - lineLength) ' ' + else take width line + +-- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in +-- which they appear side by side. +hcatBlocks :: [TextBlock] -> TextBlock +hcatBlocks [] = TextBlock 0 0 [] +hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd. +hcatBlocks ((TextBlock width1 height1 lns1):xs) = + let (TextBlock width2 height2 lns2) = hcatBlocks xs + height = max height1 height2 + width = width1 + width2 + lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) "" + lns2' = lns2 ++ replicate (height - height2) "" + lns = zipWith (++) lns1' lns2' + in TextBlock width height lns + +-- | Like @hcatBlocks@, but inserts space between the @TextBlock@s. +hsepBlocks :: [TextBlock] -> TextBlock +hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "])) + +isWhitespace x = x `elem` " \t" + +-- | Left-aligns the contents of a @TextBlock@ within the block. +leftAlignBlock :: TextBlock -> TextBlock +leftAlignBlock (TextBlock width height lns) = + TextBlock width height $ map (dropWhile isWhitespace) lns + +-- | Right-aligns the contents of a @TextBlock@ within the block. +rightAlignBlock :: TextBlock -> TextBlock +rightAlignBlock (TextBlock width height lns) = + let rightAlignLine ln = + let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln + in reverse (rest ++ spaces) + in TextBlock width height $ map rightAlignLine lns + +-- | Centers the contents of a @TextBlock@ within the block. +centerAlignBlock :: TextBlock -> TextBlock +centerAlignBlock (TextBlock width height lns) = + let centerAlignLine ln = + let ln' = hPad width ln + (startSpaces, rest) = span isWhitespace ln' + endSpaces = takeWhile isWhitespace (reverse ln') + numSpaces = length (startSpaces ++ endSpaces) + startSpaces' = replicate (quot numSpaces 2) ' ' + in startSpaces' ++ rest + in TextBlock width height $ map centerAlignLine lns + diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs new file mode 100644 index 000000000..466f5d8f4 --- /dev/null +++ b/src/Text/Pandoc/CharacterReferences.hs @@ -0,0 +1,327 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.CharacterReferences + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions for parsing character references. +-} +module Text.Pandoc.CharacterReferences ( + characterReference, + decodeCharacterReferences, + ) where +import Data.Char ( chr ) +import Text.ParserCombinators.Parsec +import qualified Data.Map as Map + +-- | Parse character entity. +characterReference :: GenParser Char st Char +characterReference = try $ do + st <- char '&' + character <- numRef <|> entity + end <- char ';' + return character + +numRef :: GenParser Char st Char +numRef = do + char '#' + num <- hexNum <|> decNum + return $ chr $ num + +hexNum :: GenParser Char st Int +hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++) + +decNum :: GenParser Char st Int +decNum = many1 digit >>= return . read + +entity :: GenParser Char st Char +entity = do + body <- many1 alphaNum + return $ Map.findWithDefault '?' body entityTable + +-- | Convert entities in a string to characters. +decodeCharacterReferences :: String -> String +decodeCharacterReferences str = + case parse (many (characterReference <|> anyChar)) str str of + Left err -> error $ "\nError: " ++ show err + Right result -> result + +entityTable :: Map.Map String Char +entityTable = Map.fromList entityTableList + +entityTableList :: [(String, Char)] +entityTableList = [ + ("quot", chr 34), + ("amp", chr 38), + ("lt", chr 60), + ("gt", chr 62), + ("nbsp", chr 160), + ("iexcl", chr 161), + ("cent", chr 162), + ("pound", chr 163), + ("curren", chr 164), + ("yen", chr 165), + ("brvbar", chr 166), + ("sect", chr 167), + ("uml", chr 168), + ("copy", chr 169), + ("ordf", chr 170), + ("laquo", chr 171), + ("not", chr 172), + ("shy", chr 173), + ("reg", chr 174), + ("macr", chr 175), + ("deg", chr 176), + ("plusmn", chr 177), + ("sup2", chr 178), + ("sup3", chr 179), + ("acute", chr 180), + ("micro", chr 181), + ("para", chr 182), + ("middot", chr 183), + ("cedil", chr 184), + ("sup1", chr 185), + ("ordm", chr 186), + ("raquo", chr 187), + ("frac14", chr 188), + ("frac12", chr 189), + ("frac34", chr 190), + ("iquest", chr 191), + ("Agrave", chr 192), + ("Aacute", chr 193), + ("Acirc", chr 194), + ("Atilde", chr 195), + ("Auml", chr 196), + ("Aring", chr 197), + ("AElig", chr 198), + ("Ccedil", chr 199), + ("Egrave", chr 200), + ("Eacute", chr 201), + ("Ecirc", chr 202), + ("Euml", chr 203), + ("Igrave", chr 204), + ("Iacute", chr 205), + ("Icirc", chr 206), + ("Iuml", chr 207), + ("ETH", chr 208), + ("Ntilde", chr 209), + ("Ograve", chr 210), + ("Oacute", chr 211), + ("Ocirc", chr 212), + ("Otilde", chr 213), + ("Ouml", chr 214), + ("times", chr 215), + ("Oslash", chr 216), + ("Ugrave", chr 217), + ("Uacute", chr 218), + ("Ucirc", chr 219), + ("Uuml", chr 220), + ("Yacute", chr 221), + ("THORN", chr 222), + ("szlig", chr 223), + ("agrave", chr 224), + ("aacute", chr 225), + ("acirc", chr 226), + ("atilde", chr 227), + ("auml", chr 228), + ("aring", chr 229), + ("aelig", chr 230), + ("ccedil", chr 231), + ("egrave", chr 232), + ("eacute", chr 233), + ("ecirc", chr 234), + ("euml", chr 235), + ("igrave", chr 236), + ("iacute", chr 237), + ("icirc", chr 238), + ("iuml", chr 239), + ("eth", chr 240), + ("ntilde", chr 241), + ("ograve", chr 242), + ("oacute", chr 243), + ("ocirc", chr 244), + ("otilde", chr 245), + ("ouml", chr 246), + ("divide", chr 247), + ("oslash", chr 248), + ("ugrave", chr 249), + ("uacute", chr 250), + ("ucirc", chr 251), + ("uuml", chr 252), + ("yacute", chr 253), + ("thorn", chr 254), + ("yuml", chr 255), + ("OElig", chr 338), + ("oelig", chr 339), + ("Scaron", chr 352), + ("scaron", chr 353), + ("Yuml", chr 376), + ("fnof", chr 402), + ("circ", chr 710), + ("tilde", chr 732), + ("Alpha", chr 913), + ("Beta", chr 914), + ("Gamma", chr 915), + ("Delta", chr 916), + ("Epsilon", chr 917), + ("Zeta", chr 918), + ("Eta", chr 919), + ("Theta", chr 920), + ("Iota", chr 921), + ("Kappa", chr 922), + ("Lambda", chr 923), + ("Mu", chr 924), + ("Nu", chr 925), + ("Xi", chr 926), + ("Omicron", chr 927), + ("Pi", chr 928), + ("Rho", chr 929), + ("Sigma", chr 931), + ("Tau", chr 932), + ("Upsilon", chr 933), + ("Phi", chr 934), + ("Chi", chr 935), + ("Psi", chr 936), + ("Omega", chr 937), + ("alpha", chr 945), + ("beta", chr 946), + ("gamma", chr 947), + ("delta", chr 948), + ("epsilon", chr 949), + ("zeta", chr 950), + ("eta", chr 951), + ("theta", chr 952), + ("iota", chr 953), + ("kappa", chr 954), + ("lambda", chr 955), + ("mu", chr 956), + ("nu", chr 957), + ("xi", chr 958), + ("omicron", chr 959), + ("pi", chr 960), + ("rho", chr 961), + ("sigmaf", chr 962), + ("sigma", chr 963), + ("tau", chr 964), + ("upsilon", chr 965), + ("phi", chr 966), + ("chi", chr 967), + ("psi", chr 968), + ("omega", chr 969), + ("thetasym", chr 977), + ("upsih", chr 978), + ("piv", chr 982), + ("ensp", chr 8194), + ("emsp", chr 8195), + ("thinsp", chr 8201), + ("zwnj", chr 8204), + ("zwj", chr 8205), + ("lrm", chr 8206), + ("rlm", chr 8207), + ("ndash", chr 8211), + ("mdash", chr 8212), + ("lsquo", chr 8216), + ("rsquo", chr 8217), + ("sbquo", chr 8218), + ("ldquo", chr 8220), + ("rdquo", chr 8221), + ("bdquo", chr 8222), + ("dagger", chr 8224), + ("Dagger", chr 8225), + ("bull", chr 8226), + ("hellip", chr 8230), + ("permil", chr 8240), + ("prime", chr 8242), + ("Prime", chr 8243), + ("lsaquo", chr 8249), + ("rsaquo", chr 8250), + ("oline", chr 8254), + ("frasl", chr 8260), + ("euro", chr 8364), + ("image", chr 8465), + ("weierp", chr 8472), + ("real", chr 8476), + ("trade", chr 8482), + ("alefsym", chr 8501), + ("larr", chr 8592), + ("uarr", chr 8593), + ("rarr", chr 8594), + ("darr", chr 8595), + ("harr", chr 8596), + ("crarr", chr 8629), + ("lArr", chr 8656), + ("uArr", chr 8657), + ("rArr", chr 8658), + ("dArr", chr 8659), + ("hArr", chr 8660), + ("forall", chr 8704), + ("part", chr 8706), + ("exist", chr 8707), + ("empty", chr 8709), + ("nabla", chr 8711), + ("isin", chr 8712), + ("notin", chr 8713), + ("ni", chr 8715), + ("prod", chr 8719), + ("sum", chr 8721), + ("minus", chr 8722), + ("lowast", chr 8727), + ("radic", chr 8730), + ("prop", chr 8733), + ("infin", chr 8734), + ("ang", chr 8736), + ("and", chr 8743), + ("or", chr 8744), + ("cap", chr 8745), + ("cup", chr 8746), + ("int", chr 8747), + ("there4", chr 8756), + ("sim", chr 8764), + ("cong", chr 8773), + ("asymp", chr 8776), + ("ne", chr 8800), + ("equiv", chr 8801), + ("le", chr 8804), + ("ge", chr 8805), + ("sub", chr 8834), + ("sup", chr 8835), + ("nsub", chr 8836), + ("sube", chr 8838), + ("supe", chr 8839), + ("oplus", chr 8853), + ("otimes", chr 8855), + ("perp", chr 8869), + ("sdot", chr 8901), + ("lceil", chr 8968), + ("rceil", chr 8969), + ("lfloor", chr 8970), + ("rfloor", chr 8971), + ("lang", chr 9001), + ("rang", chr 9002), + ("loz", chr 9674), + ("spades", chr 9824), + ("clubs", chr 9827), + ("hearts", chr 9829), + ("diams", chr 9830) + ] diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs new file mode 100644 index 000000000..7d1125c5a --- /dev/null +++ b/src/Text/Pandoc/Definition.hs @@ -0,0 +1,116 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Definition + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Definition of 'Pandoc' data structure for format-neutral representation +of documents. +-} +module Text.Pandoc.Definition where + +data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show) + +-- | Bibliographic information for the document: title (list of 'Inline'), +-- authors (list of strings), date (string). +data Meta = Meta [Inline] -- title + [String] -- authors + String -- date + deriving (Eq, Show, Read) + +-- | Alignment of a table column. +data Alignment = AlignLeft + | AlignRight + | AlignCenter + | AlignDefault deriving (Eq, Show, Read) + +-- | List attributes. +type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) + +-- | Style of list numbers. +data ListNumberStyle = DefaultStyle + | Decimal + | LowerRoman + | UpperRoman + | LowerAlpha + | UpperAlpha deriving (Eq, Show, Read) + +-- | Delimiter of list numbers. +data ListNumberDelim = DefaultDelim + | Period + | OneParen + | TwoParens deriving (Eq, Show, Read) + +-- | Block element. +data Block + = Plain [Inline] -- ^ Plain text, not a paragraph + | Para [Inline] -- ^ Paragraph + | CodeBlock String -- ^ Code block (literal) + | RawHtml String -- ^ Raw HTML block (literal) + | BlockQuote [Block] -- ^ Block quote (list of blocks) + | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes + -- and a list of items, each a list of blocks) + | BulletList [[Block]] -- ^ Bullet list (list of items, each + -- a list of blocks) + | DefinitionList [([Inline],[Block])] -- ^ Definition list + -- (list of items, each a pair of an inline list, + -- the term, and a block list) + | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) + | HorizontalRule -- ^ Horizontal rule + | Table [Inline] [Alignment] [Float] [[Block]] [[[Block]]] -- ^ Table, + -- with caption, column alignments, + -- relative column widths, column headers + -- (each a list of blocks), and rows + -- (each a list of lists of blocks) + | Null -- ^ Nothing + deriving (Eq, Read, Show) + +-- | Type of quotation marks to use in Quoted inline. +data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read) + +type Target = (String, String) -- ^ Link target (URL, title) + +-- | Inline elements. +data Inline + = Str String -- ^ Text (string) + | Emph [Inline] -- ^ Emphasized text (list of inlines) + | Strong [Inline] -- ^ Strongly emphasized text (list of inlines) + | Strikeout [Inline] -- ^ Strikeout text (list of inlines) + | Superscript [Inline] -- ^ Superscripted text (list of inlines) + | Subscript [Inline] -- ^ Subscripted text (list of inlines) + | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) + | Code String -- ^ Inline code (literal) + | Space -- ^ Inter-word space + | EmDash -- ^ Em dash + | EnDash -- ^ En dash + | Apostrophe -- ^ Apostrophe + | Ellipses -- ^ Ellipses + | LineBreak -- ^ Hard line break + | TeX String -- ^ LaTeX code (literal) + | HtmlInline String -- ^ HTML code (literal) + | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target + | Image [Inline] Target -- ^ Image: alt text (list of inlines), target + -- and target + | Note [Block] -- ^ Footnote or endnote + deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs new file mode 100644 index 000000000..70a071152 --- /dev/null +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -0,0 +1,496 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.HTML + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of HTML to 'Pandoc' document. +-} +module Text.Pandoc.Readers.HTML ( + readHtml, + rawHtmlInline, + rawHtmlBlock, + anyHtmlBlockTag, + anyHtmlInlineTag, + anyHtmlTag, + anyHtmlEndTag, + htmlEndTag, + extractTagType, + htmlBlockElement + ) where + +import Text.ParserCombinators.Parsec +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.CharacterReferences ( characterReference, + decodeCharacterReferences ) +import Data.Maybe ( fromMaybe ) +import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf ) +import Data.Char ( toUpper, toLower, isAlphaNum ) + +-- | Convert HTML-formatted string to 'Pandoc' document. +readHtml :: ParserState -- ^ Parser state + -> String -- ^ String to parse + -> Pandoc +readHtml = readWith parseHtml + +-- +-- Constants +-- + +eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", + "map", "area", "object", "script"] + +inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", + "br", "cite", "code", "dfn", "em", "font", "i", "img", + "input", "kbd", "label", "q", "s", "samp", "select", + "small", "span", "strike", "strong", "sub", "sup", + "textarea", "tt", "u", "var"] ++ eitherBlockOrInline + +blockHtmlTags = ["address", "blockquote", "center", "dir", "div", + "dl", "fieldset", "form", "h1", "h2", "h3", "h4", + "h5", "h6", "hr", "isindex", "menu", "noframes", + "noscript", "ol", "p", "pre", "table", "ul", "dd", + "dt", "frameset", "li", "tbody", "td", "tfoot", + "th", "thead", "tr"] ++ eitherBlockOrInline + +-- +-- HTML utility functions +-- + +-- | Read blocks until end tag. +blocksTilEnd tag = do + blocks <- manyTill (block >>~ spaces) (htmlEndTag tag) + return $ filter (/= Null) blocks + +-- | Read inlines until end tag. +inlinesTilEnd tag = manyTill inline (htmlEndTag tag) + +-- | Parse blocks between open and close tag. +blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag + +-- | Parse inlines between open and close tag. +inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag + +-- | Extract type from a tag: e.g. @br@ from @\<br\>@ +extractTagType :: String -> String +extractTagType ('<':rest) = + let isSpaceOrSlash c = c `elem` "/ \n\t" in + map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest +extractTagType _ = "" + +-- | Parse any HTML tag (opening or self-closing) and return text of tag +anyHtmlTag = try $ do + char '<' + spaces + tag <- many1 alphaNum + attribs <- many htmlAttribute + spaces + ender <- option "" (string "/") + let ender' = if null ender then "" else " /" + spaces + char '>' + return $ "<" ++ tag ++ + concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" + +anyHtmlEndTag = try $ do + char '<' + spaces + char '/' + spaces + tagType <- many1 alphaNum + spaces + char '>' + return $ "</" ++ tagType ++ ">" + +htmlTag :: String -> GenParser Char st (String, [(String, String)]) +htmlTag tag = try $ do + char '<' + spaces + stringAnyCase tag + attribs <- many htmlAttribute + spaces + optional (string "/") + spaces + char '>' + return (tag, (map (\(name, content, raw) -> (name, content)) attribs)) + +-- parses a quoted html attribute value +quoted quoteChar = do + result <- between (char quoteChar) (char quoteChar) + (many (noneOf [quoteChar])) + return (result, [quoteChar]) + +htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute + +-- minimized boolean attribute +htmlMinimizedAttribute = try $ do + many1 space + name <- many1 (choice [letter, oneOf ".-_:"]) + return (name, name, name) + +htmlRegularAttribute = try $ do + many1 space + name <- many1 (choice [letter, oneOf ".-_:"]) + spaces + char '=' + spaces + (content, quoteStr) <- choice [ (quoted '\''), + (quoted '"'), + (do + a <- many (alphaNum <|> (oneOf "-._:")) + return (a,"")) ] + return (name, content, + (name ++ "=" ++ quoteStr ++ content ++ quoteStr)) + +-- | Parse an end tag of type 'tag' +htmlEndTag tag = try $ do + char '<' + spaces + char '/' + spaces + stringAnyCase tag + spaces + char '>' + return $ "</" ++ tag ++ ">" + +-- | Returns @True@ if the tag is (or can be) an inline tag. +isInline tag = (extractTagType tag) `elem` inlineHtmlTags + +-- | Returns @True@ if the tag is (or can be) a block tag. +isBlock tag = (extractTagType tag) `elem` blockHtmlTags + +anyHtmlBlockTag = try $ do + tag <- anyHtmlTag <|> anyHtmlEndTag + if isBlock tag then return tag else fail "inline tag" + +anyHtmlInlineTag = try $ do + tag <- anyHtmlTag <|> anyHtmlEndTag + if isInline tag then return tag else fail "not an inline tag" + +-- | Parses material between script tags. +-- Scripts must be treated differently, because they can contain '<>' etc. +htmlScript = try $ do + open <- string "<script" + rest <- manyTill anyChar (htmlEndTag "script") + return $ open ++ rest ++ "</script>" + +htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ] + +rawHtmlBlock = try $ do + notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") + body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag + sp <- many space + state <- getState + if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null + +-- | Parses an HTML comment. +htmlComment = try $ do + string "<!--" + comment <- manyTill anyChar (try (string "-->")) + return $ "<!--" ++ comment ++ "-->" + +-- +-- parsing documents +-- + +xmlDec = try $ do + string "<?" + rest <- manyTill anyChar (char '>') + return $ "<?" ++ rest ++ ">" + +definition = try $ do + string "<!" + rest <- manyTill anyChar (char '>') + return $ "<!" ++ rest ++ ">" + +nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >> + ((rawHtmlBlock >> return ' ') <|> anyChar) + +parseTitle = try $ do + (tag, _) <- htmlTag "title" + contents <- inlinesTilEnd tag + spaces + return contents + +-- parse header and return meta-information (for now, just title) +parseHead = try $ do + htmlTag "head" + spaces + skipMany nonTitleNonHead + contents <- option [] parseTitle + skipMany nonTitleNonHead + htmlTag "/head" + return (contents, [], "") + +skipHtmlTag tag = optional (htmlTag tag) + +-- h1 class="title" representation of title in body +bodyTitle = try $ do + (tag, attribs) <- htmlTag "h1" + cl <- case (extractAttribute "class" attribs) of + Just "title" -> return "" + otherwise -> fail "not title" + inlinesTilEnd "h1" + +parseHtml = do + sepEndBy (choice [xmlDec, definition, htmlComment]) spaces + skipHtmlTag "html" + spaces + (title, authors, date) <- option ([], [], "") parseHead + spaces + skipHtmlTag "body" + spaces + optional bodyTitle -- skip title in body, because it's represented in meta + blocks <- parseBlocks + spaces + optional (htmlEndTag "body") + spaces + optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html> + eof + return $ Pandoc (Meta title authors date) blocks + +-- +-- parsing blocks +-- + +parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null)) + +block = choice [ codeBlock + , header + , hrule + , list + , blockQuote + , para + , plain + , rawHtmlBlock ] <?> "block" + +-- +-- header blocks +-- + +header = choice (map headerLevel (enumFromTo 1 5)) <?> "header" + +headerLevel n = try $ do + let level = "h" ++ show n + (tag, attribs) <- htmlTag level + contents <- inlinesTilEnd level + return $ Header n (normalizeSpaces contents) + +-- +-- hrule block +-- + +hrule = try $ do + (tag, attribs) <- htmlTag "hr" + state <- getState + if not (null attribs) && stateParseRaw state + then unexpected "attributes in hr" -- parse as raw in this case + else return HorizontalRule + +-- +-- code blocks +-- + +-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are +-- skipped, because they are not portable to output formats other than HTML. +codeBlock = try $ do + htmlTag "pre" + result <- manyTill + (many1 (satisfy (/= '<')) <|> + ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) + (htmlEndTag "pre") + let result' = concat result + -- drop leading newline if any + let result'' = if "\n" `isPrefixOf` result' + then drop 1 result' + else result' + -- drop trailing newline if any + let result''' = if "\n" `isSuffixOf` result'' + then init result'' + else result'' + return $ CodeBlock $ decodeCharacterReferences result''' + +-- +-- block quotes +-- + +blockQuote = try $ htmlTag "blockquote" >> spaces >> + blocksTilEnd "blockquote" >>= (return . BlockQuote) + +-- +-- list blocks +-- + +list = choice [ bulletList, orderedList, definitionList ] <?> "list" + +orderedList = try $ do + (_, attribs) <- htmlTag "ol" + (start, style) <- option (1, DefaultStyle) $ + do failIfStrict + let sta = fromMaybe "1" $ + lookup "start" attribs + let sty = fromMaybe (fromMaybe "" $ + lookup "style" attribs) $ + lookup "class" attribs + let sty' = case sty of + "lower-roman" -> LowerRoman + "upper-roman" -> UpperRoman + "lower-alpha" -> LowerAlpha + "upper-alpha" -> UpperAlpha + "decimal" -> Decimal + _ -> DefaultStyle + return (read sta, sty') + spaces + items <- sepEndBy1 (blocksIn "li") spaces + htmlEndTag "ol" + return $ OrderedList (start, style, DefaultDelim) items + +bulletList = try $ do + htmlTag "ul" + spaces + items <- sepEndBy1 (blocksIn "li") spaces + htmlEndTag "ul" + return $ BulletList items + +definitionList = try $ do + failIfStrict -- def lists not part of standard markdown + tag <- htmlTag "dl" + spaces + items <- sepEndBy1 definitionListItem spaces + htmlEndTag "dl" + return $ DefinitionList items + +definitionListItem = try $ do + terms <- sepEndBy1 (inlinesIn "dt") spaces + defs <- sepEndBy1 (blocksIn "dd") spaces + let term = joinWithSep [LineBreak] terms + return (term, concat defs) + +-- +-- paragraph block +-- + +para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= + return . Para . normalizeSpaces + +-- +-- plain block +-- + +plain = many1 inline >>= return . Plain . normalizeSpaces + +-- +-- inline +-- + +inline = choice [ charRef + , strong + , emph + , superscript + , subscript + , strikeout + , spanStrikeout + , code + , str + , linebreak + , whitespace + , link + , image + , rawHtmlInline + ] <?> "inline" + +code = try $ do + htmlTag "code" + result <- manyTill anyChar (htmlEndTag "code") + -- remove internal line breaks, leading and trailing space, + -- and decode character references + return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ + joinWithSep " " $ lines result + +rawHtmlInline = do + result <- htmlScript <|> htmlComment <|> anyHtmlInlineTag + state <- getState + if stateParseRaw state then return (HtmlInline result) else return (Str "") + +betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= + return . normalizeSpaces + +emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph + +strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong + +superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript + +subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript + +strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= + return . Strikeout + +spanStrikeout = try $ do + failIfStrict -- strict markdown has no strikeout, so treat as raw HTML + (tag, attributes) <- htmlTag "span" + result <- case (extractAttribute "class" attributes) of + Just "strikeout" -> inlinesTilEnd "span" + _ -> fail "not a strikeout" + return $ Strikeout result + +whitespace = many1 space >> return Space + +-- hard line break +linebreak = htmlTag "br" >> optional newline >> return LineBreak + +str = many1 (noneOf "<& \t\n") >>= return . Str + +-- +-- links and images +-- + +-- extract contents of attribute (attribute names are case-insensitive) +extractAttribute name [] = Nothing +extractAttribute name ((attrName, contents):rest) = + let name' = map toLower name + attrName' = map toLower attrName + in if attrName' == name' + then Just (decodeCharacterReferences contents) + else extractAttribute name rest + +link = try $ do + (tag, attributes) <- htmlTag "a" + url <- case (extractAttribute "href" attributes) of + Just url -> return url + Nothing -> fail "no href" + let title = fromMaybe "" $ extractAttribute "title" attributes + label <- inlinesTilEnd "a" + return $ Link (normalizeSpaces label) (url, title) + +image = try $ do + (tag, attributes) <- htmlTag "img" + url <- case (extractAttribute "src" attributes) of + Just url -> return url + Nothing -> fail "no src" + let title = fromMaybe "" $ extractAttribute "title" attributes + let alt = fromMaybe "" (extractAttribute "alt" attributes) + return $ Image [Str alt] (url, title) + diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs new file mode 100644 index 000000000..37cc2bfe4 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -0,0 +1,651 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.LaTeX + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of LaTeX to 'Pandoc' document. +-} +module Text.Pandoc.Readers.LaTeX ( + readLaTeX, + rawLaTeXInline, + rawLaTeXEnvironment + ) where + +import Text.ParserCombinators.Parsec +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Data.Maybe ( fromMaybe ) +import Data.Char ( chr ) +import Data.List ( isPrefixOf, isSuffixOf ) + +-- | Parse LaTeX from string and return 'Pandoc' document. +readLaTeX :: ParserState -- ^ Parser state, including options for parser + -> String -- ^ String to parse + -> Pandoc +readLaTeX = readWith parseLaTeX + +-- characters with special meaning +specialChars = "\\`$%^&_~#{}\n \t|<>'\"-" + +-- +-- utility functions +-- + +-- | Returns text between brackets and its matching pair. +bracketedText openB closeB = do + result <- charsInBalanced' openB closeB + return $ [openB] ++ result ++ [closeB] + +-- | Returns an option or argument of a LaTeX command. +optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']' + +-- | True if the string begins with '{'. +isArg ('{':rest) = True +isArg other = False + +-- | Returns list of options and arguments of a LaTeX command. +commandArgs = many optOrArg + +-- | Parses LaTeX command, returns (name, star, list of options or arguments). +command = do + char '\\' + name <- many1 letter + star <- option "" (string "*") -- some commands have starred versions + args <- commandArgs + return (name, star, args) + +begin name = try $ do + string $ "\\begin{" ++ name ++ "}" + optional commandArgs + spaces + return name + +end name = try $ do + string $ "\\end{" ++ name ++ "}" + spaces + return name + +-- | Returns a list of block elements containing the contents of an +-- environment. +environment name = try $ begin name >> spaces >> manyTill block (end name) + +anyEnvironment = try $ do + string "\\begin{" + name <- many letter + star <- option "" (string "*") -- some environments have starred variants + char '}' + optional commandArgs + spaces + contents <- manyTill block (end (name ++ star)) + return $ BlockQuote contents + +-- +-- parsing documents +-- + +-- | Process LaTeX preamble, extracting metadata. +processLaTeXPreamble = try $ manyTill + (choice [bibliographic, comment, unknownCommand, nullBlock]) + (try (string "\\begin{document}")) >> + spaces + +-- | Parse LaTeX and return 'Pandoc'. +parseLaTeX = do + optional processLaTeXPreamble -- preamble might not be present (fragment) + spaces + blocks <- parseBlocks + spaces + optional $ try (string "\\end{document}" >> many anyChar) + -- might not be present (fragment) + spaces + eof + state <- getState + let blocks' = filter (/= Null) blocks + let title' = stateTitle state + let authors' = stateAuthors state + let date' = stateDate state + return $ Pandoc (Meta title' authors' date') blocks' + +-- +-- parsing blocks +-- + +parseBlocks = spaces >> many block + +block = choice [ hrule + , codeBlock + , header + , list + , blockQuote + , mathBlock + , comment + , bibliographic + , para + , specialEnvironment + , itemBlock + , unknownEnvironment + , unknownCommand ] <?> "block" + +-- +-- header blocks +-- + +header = try $ do + char '\\' + subs <- many (try (string "sub")) + string "section" + optional (char '*') + char '{' + title <- manyTill inline (char '}') + spaces + return $ Header (length subs + 1) (normalizeSpaces title) + +-- +-- hrule block +-- + +hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", + "\\newpage" ] >> spaces >> return HorizontalRule + +-- +-- code blocks +-- + +codeBlock = codeBlock1 <|> codeBlock2 + +codeBlock1 = try $ do + string "\\begin{verbatim}" -- don't use begin function because it + -- gobbles whitespace + optional blanklines -- we want to gobble blank lines, but not + -- leading space + contents <- manyTill anyChar (try (string "\\end{verbatim}")) + spaces + return $ CodeBlock (stripTrailingNewlines contents) + +codeBlock2 = try $ do + string "\\begin{Verbatim}" -- used by fancyvrb package + option "" blanklines + contents <- manyTill anyChar (try (string "\\end{Verbatim}")) + spaces + return $ CodeBlock (stripTrailingNewlines contents) + +-- +-- block quotes +-- + +blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= + return . BlockQuote + +-- +-- math block +-- + +mathBlock = mathBlockWith (begin "equation") (end "equation") <|> + mathBlockWith (begin "displaymath") (end "displaymath") <|> + mathBlockWith (string "\\[") (string "\\]") <?> "math block" + +mathBlockWith start end = try $ do + start + spaces + result <- manyTill anyChar end + spaces + return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]] + +-- +-- list blocks +-- + +list = bulletList <|> orderedList <|> definitionList <?> "list" + +listItem = try $ do + ("item", _, args) <- command + spaces + state <- getState + let oldParserContext = stateParserContext state + updateState (\state -> state {stateParserContext = ListItemState}) + blocks <- many block + updateState (\state -> state {stateParserContext = oldParserContext}) + opt <- case args of + ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> + parseFromString (many inline) $ tail $ init x + _ -> return [] + return (opt, blocks) + +orderedList = try $ do + string "\\begin{enumerate}" + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ + try $ do failIfStrict + char '[' + res <- anyOrderedListMarker + char ']' + return res + spaces + option "" $ try $ do string "\\setlength{\\itemindent}" + char '{' + manyTill anyChar (char '}') + spaces + start <- option 1 $ try $ do failIfStrict + string "\\setcounter{enum" + many1 (oneOf "iv") + string "}{" + num <- many1 digit + char '}' + spaces + return $ (read num) + 1 + items <- many listItem + end "enumerate" + spaces + return $ OrderedList (start, style, delim) $ map snd items + +bulletList = try $ do + begin "itemize" + spaces + items <- many listItem + end "itemize" + spaces + return (BulletList $ map snd items) + +definitionList = try $ do + begin "description" + spaces + items <- many listItem + end "description" + spaces + return (DefinitionList items) + +-- +-- paragraph block +-- + +para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces + +-- +-- title authors date +-- + +bibliographic = choice [ maketitle, title, authors, date ] + +maketitle = try (string "\\maketitle") >> spaces >> return Null + +title = try $ do + string "\\title{" + tit <- manyTill inline (char '}') + spaces + updateState (\state -> state { stateTitle = tit }) + return Null + +authors = try $ do + string "\\author{" + authors <- manyTill anyChar (char '}') + spaces + let authors' = map removeLeadingTrailingSpace $ lines $ + substitute "\\\\" "\n" authors + updateState (\state -> state { stateAuthors = authors' }) + return Null + +date = try $ do + string "\\date{" + date' <- manyTill anyChar (char '}') + spaces + updateState (\state -> state { stateDate = date' }) + return Null + +-- +-- item block +-- for use in unknown environments that aren't being parsed as raw latex +-- + +-- this forces items to be parsed in different blocks +itemBlock = try $ do + ("item", _, args) <- command + state <- getState + if (stateParserContext state == ListItemState) + then fail "item should be handled by list block" + else if null args + then return Null + else return $ Plain [Str (stripFirstAndLast (head args))] + +-- +-- raw LaTeX +-- + +specialEnvironment = do -- these are always parsed as raw + lookAhead (choice (map (\name -> begin name) ["tabular", "figure", + "tabbing", "eqnarry", "picture", "table", "verse", "theorem"])) + rawLaTeXEnvironment + +-- | Parse any LaTeX environment and return a Para block containing +-- the whole literal environment as raw TeX. +rawLaTeXEnvironment :: GenParser Char st Block +rawLaTeXEnvironment = try $ do + string "\\begin{" + name <- many1 letter + star <- option "" (string "*") -- for starred variants + let name' = name ++ star + char '}' + args <- option [] commandArgs + let argStr = concat args + contents <- manyTill (choice [ (many1 (noneOf "\\")), + (do + (Para [TeX str]) <- rawLaTeXEnvironment + return str), + string "\\" ]) + (end name') + spaces + return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++ + concat contents ++ "\\end{" ++ name' ++ "}"] + +unknownEnvironment = try $ do + state <- getState + result <- if stateParseRaw state -- check whether we should include raw TeX + then rawLaTeXEnvironment -- if so, get whole raw environment + else anyEnvironment -- otherwise just the contents + return result + +unknownCommand = try $ do + notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", + "document"] + (name, star, args) <- command + spaces + let argStr = concat args + state <- getState + if name == "item" && (stateParserContext state) == ListItemState + then fail "should not be parsed as raw" + else string "" + if stateParseRaw state + then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)] + else return $ Plain [Str (joinWithSep " " args)] + +-- latex comment +comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null + +-- +-- inline +-- + +inline = choice [ str + , endline + , whitespace + , quoted + , apostrophe + , spacer + , strong + , math + , ellipses + , emDash + , enDash + , hyphen + , emph + , strikeout + , superscript + , subscript + , ref + , lab + , code + , url + , link + , image + , footnote + , linebreak + , accentedChar + , specialChar + , rawLaTeXInline + , escapedChar + , unescapedChar + ] <?> "inline" + +accentedChar = normalAccentedChar <|> specialAccentedChar + +normalAccentedChar = try $ do + char '\\' + accent <- oneOf "'`^\"~" + character <- (try $ char '{' >> letter >>~ char '}') <|> letter + let table = fromMaybe [] $ lookup character accentTable + let result = case lookup accent table of + Just num -> chr num + Nothing -> '?' + return $ Str [result] + +-- an association list of letters and association list of accents +-- and decimal character numbers. +accentTable = + [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]), + ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]), + ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]), + ('N', [('~', 209)]), + ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]), + ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]), + ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]), + ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]), + ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]), + ('n', [('~', 241)]), + ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), + ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] + +specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, + oslash, pound, euro, copyright, sect ] + +ccedil = try $ do + char '\\' + letter <- oneOfStrings ["cc", "cC"] + let num = if letter == "cc" then 231 else 199 + return $ Str [chr num] + +aring = try $ do + char '\\' + letter <- oneOfStrings ["aa", "AA"] + let num = if letter == "aa" then 229 else 197 + return $ Str [chr num] + +iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> + return (Str [chr 239]) + +icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >> + return (Str [chr 238]) + +szlig = try (string "\\ss") >> return (Str [chr 223]) + +oslash = try $ do + char '\\' + letter <- choice [char 'o', char 'O'] + let num = if letter == 'o' then 248 else 216 + return $ Str [chr num] + +aelig = try $ do + char '\\' + letter <- oneOfStrings ["ae", "AE"] + let num = if letter == "ae" then 230 else 198 + return $ Str [chr num] + +pound = try (string "\\pounds") >> return (Str [chr 163]) + +euro = try (string "\\euro") >> return (Str [chr 8364]) + +copyright = try (string "\\copyright") >> return (Str [chr 169]) + +sect = try (string "\\S") >> return (Str [chr 167]) + +escapedChar = do + result <- escaped (oneOf " $%&_#{}\n") + return $ if result == Str "\n" then Str " " else result + +-- ignore standalone, nonescaped special characters +unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "") + +specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ] + +backslash = try (string "\\textbackslash") >> return (Str "\\") + +tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~") + +caret = try (string "\\^{}") >> return (Str "^") + +bar = try (string "\\textbar") >> return (Str "\\") + +lt = try (string "\\textless") >> return (Str "<") + +gt = try (string "\\textgreater") >> return (Str ">") + +doubleQuote = char '"' >> return (Str "\"") + +code = code1 <|> code2 + +code1 = try $ do + string "\\verb" + marker <- anyChar + result <- manyTill anyChar (char marker) + return $ Code $ removeLeadingTrailingSpace result + +code2 = try $ do + string "\\texttt{" + result <- manyTill (noneOf "\\\n~$%^&{}") (char '}') + return $ Code result + +emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> + manyTill inline (char '}') >>= return . Emph + +strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>= + return . Strikeout + +superscript = try $ string "\\textsuperscript{" >> + manyTill inline (char '}') >>= return . Superscript + +-- note: \textsubscript isn't a standard latex command, but we use +-- a defined version in pandoc. +subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>= + return . Subscript + +apostrophe = char '\'' >> return Apostrophe + +quoted = doubleQuoted <|> singleQuoted + +singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>= + return . Quoted SingleQuote . normalizeSpaces + +doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>= + return . Quoted DoubleQuote . normalizeSpaces + +singleQuoteStart = char '`' + +singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum + +doubleQuoteStart = string "``" + +doubleQuoteEnd = try $ string "''" + +ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >> + return Ellipses + +enDash = try (string "--") >> return EnDash + +emDash = try (string "---") >> return EmDash + +hyphen = char '-' >> return (Str "-") + +lab = try $ do + string "\\label{" + result <- manyTill anyChar (char '}') + return $ Str $ "(" ++ result ++ ")" + +ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str + +strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= + return . Strong + +whitespace = many1 (oneOf "~ \t") >> return Space + +-- hard line break +linebreak = try (string "\\\\") >> return LineBreak + +spacer = try (string "\\,") >> return (Str "") + +str = many1 (noneOf specialChars) >>= return . Str + +-- endline internal to paragraph +endline = try $ newline >> notFollowedBy blankline >> return Space + +-- math +math = math1 <|> math2 <?> "math" + +math1 = try $ do + char '$' + result <- many (noneOf "$") + char '$' + return $ TeX ("$" ++ result ++ "$") + +math2 = try $ do + string "\\(" + result <- many (noneOf "$") + string "\\)" + return $ TeX ("$" ++ result ++ "$") + +-- +-- links and images +-- + +url = try $ do + string "\\url" + url <- charsInBalanced '{' '}' + return $ Link [Code url] (url, "") + +link = try $ do + string "\\href{" + url <- manyTill anyChar (char '}') + char '{' + label <- manyTill inline (char '}') + return $ Link (normalizeSpaces label) (url, "") + +image = try $ do + ("includegraphics", _, args) <- command + let args' = filter isArg args -- filter out options + let src = if null args' then + ("", "") + else + (stripFirstAndLast (head args'), "") + return $ Image [Str "image"] src + +footnote = try $ do + (name, _, (contents:[])) <- command + if ((name == "footnote") || (name == "thanks")) + then string "" + else fail "not a footnote or thanks command" + let contents' = stripFirstAndLast contents + -- parse the extracted block, which may contain various block elements: + rest <- getInput + setInput $ contents' + blocks <- parseBlocks + setInput rest + return $ Note blocks + +-- | Parse any LaTeX command and return it in a raw TeX inline element. +rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline = try $ do + (name, star, args) <- command + state <- getState + if ((name == "begin") || (name == "end") || (name == "item")) + then fail "not an inline command" + else string "" + return $ TeX ("\\" ++ name ++ star ++ concat args) + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs new file mode 100644 index 000000000..df84c0ac7 --- /dev/null +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -0,0 +1,909 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Markdown + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of markdown-formatted plain text to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Markdown ( + readMarkdown + ) where + +import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy ) +import Data.Ord ( comparing ) +import Data.Char ( isAlphaNum ) +import Network.URI ( isURI ) +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) +import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, + anyHtmlInlineTag, anyHtmlTag, + anyHtmlEndTag, htmlEndTag, extractTagType, + htmlBlockElement ) +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) +import Text.ParserCombinators.Parsec + +-- | Read markdown from an input string and return a Pandoc document. +readMarkdown :: ParserState -> String -> Pandoc +readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n") + +-- +-- Constants and data structure definitions +-- + +spaceChars = " \t" +bulletListMarkers = "*+-" +hruleChars = "*-_" +setextHChars = "=-" + +-- treat these as potentially non-text when parsing inline: +specialChars = "\\[]*_~`<>$!^-.&'\"" + +-- +-- auxiliary functions +-- + +indentSpaces = try $ do + state <- getState + let tabStop = stateTabStop state + try (count tabStop (char ' ')) <|> + (many (char ' ') >> string "\t") <?> "indentation" + +nonindentSpaces = do + state <- getState + let tabStop = stateTabStop state + sps <- many (char ' ') + if length sps < tabStop + then return sps + else unexpected "indented line" + +-- | Fail unless we're at beginning of a line. +failUnlessBeginningOfLine = do + pos <- getPosition + if sourceColumn pos == 1 then return () else fail "not beginning of line" + +-- | Fail unless we're in "smart typography" mode. +failUnlessSmart = do + state <- getState + if stateSmart state then return () else fail "Smart typography feature" + +-- | Parse an inline Str element with a given content. +inlineString str = try $ do + (Str res) <- inline + if res == str then return res else fail $ "unexpected Str content" + +-- | Parse a sequence of inline elements between a string +-- @opener@ and a string @closer@, including inlines +-- between balanced pairs of @opener@ and a @closer@. +inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] +inlinesInBalanced opener closer = try $ do + string opener + result <- manyTill ( (do lookAhead (inlineString opener) + -- because it might be a link... + bal <- inlinesInBalanced opener closer + return $ [Str opener] ++ bal ++ [Str closer]) + <|> (count 1 inline)) + (try (string closer)) + return $ concat result + +-- +-- document structure +-- + +titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline + +authorsLine = try $ do + char '%' + skipSpaces + authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") + newline + return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors + +dateLine = try $ do + char '%' + skipSpaces + date <- many (noneOf "\n") + newline + return $ decodeCharacterReferences $ removeTrailingSpace date + +titleBlock = try $ do + failIfStrict + title <- option [] titleLine + author <- option [] authorsLine + date <- option "" dateLine + optional blanklines + return (title, author, date) + +parseMarkdown = do + -- markdown allows raw HTML + updateState (\state -> state { stateParseRaw = True }) + startPos <- getPosition + -- go through once just to get list of reference keys + -- docMinusKeys is the raw document with blanks where the keys were... + docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= + return . concat + setInput docMinusKeys + setPosition startPos + st <- getState + -- go through again for notes unless strict... + if stateStrict st + then return () + else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= + return . concat + st <- getState + let reversedNotes = stateNotes st + updateState $ \st -> st { stateNotes = reverse reversedNotes } + setInput docMinusNotes + setPosition startPos + -- now parse it for real... + (title, author, date) <- option ([],[],"") titleBlock + blocks <- parseBlocks + return $ Pandoc (Meta title author date) $ filter (/= Null) blocks + +-- +-- initial pass for references and notes +-- + +referenceKey = try $ do + startPos <- getPosition + nonindentSpaces + label <- reference + char ':' + skipSpaces + optional (char '<') + src <- many (noneOf "> \n\t") + optional (char '>') + tit <- option "" referenceTitle + blanklines + endPos <- getPosition + let newkey = (label, (removeTrailingSpace src, tit)) + st <- getState + let oldkeys = stateKeys st + updateState $ \st -> st { stateKeys = newkey : oldkeys } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +referenceTitle = try $ do + (many1 spaceChar >> option '\n' newline) <|> newline + skipSpaces + tit <- (charsInBalanced '(' ')' >>= return . unwords . words) + <|> do delim <- char '\'' <|> char '"' + manyTill anyChar (try (char delim >> skipSpaces >> + notFollowedBy (noneOf ")\n"))) + return $ decodeCharacterReferences tit + +noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']') + +rawLine = do + notFollowedBy blankline + notFollowedBy' noteMarker + contents <- many1 nonEndline + end <- option "" (newline >> optional indentSpaces >> return "\n") + return $ contents ++ end + +rawLines = many1 rawLine >>= return . concat + +noteBlock = try $ do + startPos <- getPosition + ref <- noteMarker + char ':' + optional blankline + optional indentSpaces + raw <- sepBy rawLines (try (blankline >> indentSpaces)) + optional blanklines + endPos <- getPosition + -- parse the extracted text, which may contain various block elements: + contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" + let newnote = (ref, contents) + st <- getState + let oldnotes = stateNotes st + updateState $ \st -> st { stateNotes = newnote : oldnotes } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +-- +-- parsing blocks +-- + +parseBlocks = manyTill block eof + +block = choice [ header + , table + , codeBlock + , hrule + , list + , blockQuote + , htmlBlock + , rawLaTeXEnvironment' + , para + , plain + , nullBlock ] <?> "block" + +-- +-- header blocks +-- + +header = atxHeader <|> setextHeader <?> "header" + +atxHeader = try $ do + level <- many1 (char '#') >>= return . length + notFollowedBy (char '.' <|> char ')') -- this would be a list + skipSpaces + text <- manyTill inline atxClosing >>= return . normalizeSpaces + return $ Header level text + +atxClosing = try $ skipMany (char '#') >> blanklines + +setextHeader = try $ do + -- first, see if this block has any chance of being a setextHeader: + lookAhead (anyLine >> oneOf setextHChars) + text <- many1Till inline newline >>= return . normalizeSpaces + level <- choice $ zipWith + (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev) + setextHChars [1..(length setextHChars)] + return $ Header level text + +-- +-- hrule block +-- + +hrule = try $ do + skipSpaces + start <- oneOf hruleChars + count 2 (skipSpaces >> char start) + skipMany (skipSpaces >> char start) + newline + optional blanklines + return HorizontalRule + +-- +-- code blocks +-- + +indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") + +codeBlock = do + contents <- many1 (indentedLine <|> + try (do b <- blanklines + l <- indentedLine + return $ b ++ l)) + optional blanklines + return $ CodeBlock $ stripTrailingNewlines $ concat contents + +-- +-- block quotes +-- + +emacsBoxQuote = try $ do + failIfStrict + string ",----" + manyTill anyChar newline + raw <- manyTill + (try (char '|' >> optional (char ' ') >> manyTill anyChar newline)) + (try (string "`----")) + blanklines + return raw + +emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') + +emailBlockQuote = try $ do + emailBlockQuoteStart + raw <- sepBy (many (nonEndline <|> + (try (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n')))) + (try (newline >> emailBlockQuoteStart)) + newline <|> (eof >> return '\n') + optional blanklines + return raw + +blockQuote = do + raw <- emailBlockQuote <|> emacsBoxQuote + -- parse the extracted block, which may contain various block elements: + contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" + return $ BlockQuote contents + +-- +-- list blocks +-- + +list = choice [ bulletList, orderedList, definitionList ] <?> "list" + +bulletListStart = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + notFollowedBy' hrule -- because hrules start out just like lists + oneOf bulletListMarkers + spaceChar + skipSpaces + +anyOrderedListStart = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + notFollowedBy $ string "p." >> spaceChar >> digit -- page number + state <- getState + if stateStrict state + then do many1 digit + char '.' + spaceChar + return (1, DefaultStyle, DefaultDelim) + else anyOrderedListMarker >>~ spaceChar + +orderedListStart style delim = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + state <- getState + num <- if stateStrict state + then do many1 digit + char '.' + return 1 + else orderedListMarker style delim + if delim == Period && (style == UpperAlpha || (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000])) + then char '\t' <|> (spaceChar >> spaceChar) + else spaceChar + skipSpaces + +-- parse a line of a list item (start = parser for beginning of list item) +listLine start = try $ do + notFollowedBy' start + notFollowedBy blankline + notFollowedBy' (do indentSpaces + many (spaceChar) + bulletListStart <|> (anyOrderedListStart >> return ())) + line <- manyTill anyChar newline + return $ line ++ "\n" + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem start = try $ do + start + result <- many1 (listLine start) + blanks <- many blankline + return $ concat result ++ blanks + +-- continuation of a list item - indented and separated by blankline +-- or (in compact lists) endline. +-- note: nested lists are parsed as continuations +listContinuation start = try $ do + lookAhead indentSpaces + result <- many1 (listContinuationLine start) + blanks <- many blankline + return $ concat result ++ blanks + +listContinuationLine start = try $ do + notFollowedBy blankline + notFollowedBy' start + optional indentSpaces + result <- manyTill anyChar newline + return $ result ++ "\n" + +listItem start = try $ do + first <- rawListItem start + continuations <- many (listContinuation start) + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let oldContext = stateParserContext state + setState $ state {stateParserContext = ListItemState} + -- parse the extracted block, which may contain various block elements: + let raw = concat (first:continuations) + contents <- parseFromString parseBlocks raw + updateState (\st -> st {stateParserContext = oldContext}) + return contents + +orderedList = try $ do + (start, style, delim) <- lookAhead anyOrderedListStart + items <- many1 (listItem (orderedListStart style delim)) + return $ OrderedList (start, style, delim) $ compactify items + +bulletList = many1 (listItem bulletListStart) >>= + return . BulletList . compactify + +-- definition lists + +definitionListItem = try $ do + notFollowedBy blankline + notFollowedBy' indentSpaces + -- first, see if this has any chance of being a definition list: + lookAhead (anyLine >> char ':') + term <- manyTill inline newline + raw <- many1 defRawBlock + state <- getState + let oldContext = stateParserContext state + -- parse the extracted block, which may contain various block elements: + contents <- parseFromString parseBlocks $ concat raw + updateState (\st -> st {stateParserContext = oldContext}) + return ((normalizeSpaces term), contents) + +defRawBlock = try $ do + char ':' + state <- getState + let tabStop = stateTabStop state + try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") + firstline <- anyLine + rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) + trailing <- option "" blanklines + return $ firstline ++ "\n" ++ unlines rawlines ++ trailing + +definitionList = do + failIfStrict + items <- many1 definitionListItem + let (terms, defs) = unzip items + let defs' = compactify defs + let items' = zip terms defs' + return $ DefinitionList items' + +-- +-- paragraph block +-- + +para = try $ do + result <- many1 inline + newline + blanklines <|> do st <- getState + if stateStrict st + then lookAhead (blockQuote <|> header) >> return "" + else lookAhead emacsBoxQuote >> return "" + return $ Para $ normalizeSpaces result + +plain = many1 inline >>= return . Plain . normalizeSpaces + +-- +-- raw html +-- + +htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element" + +htmlBlock = do + st <- getState + if stateStrict st + then try $ do failUnlessBeginningOfLine + first <- htmlElement + finalSpace <- many (oneOf spaceChars) + finalNewlines <- many newline + return $ RawHtml $ first ++ finalSpace ++ finalNewlines + else rawHtmlBlocks + +-- True if tag is self-closing +isSelfClosing tag = + isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag + +strictHtmlBlock = try $ do + tag <- anyHtmlBlockTag + let tag' = extractTagType tag + if isSelfClosing tag || tag' == "hr" + then return tag + else do contents <- many (notFollowedBy' (htmlEndTag tag') >> + (htmlElement <|> (count 1 anyChar))) + end <- htmlEndTag tag' + return $ tag ++ concat contents ++ end + +rawHtmlBlocks = do + htmlBlocks <- many1 rawHtmlBlock + let combined = concatMap (\(RawHtml str) -> str) htmlBlocks + let combined' = if not (null combined) && last combined == '\n' + then init combined -- strip extra newline + else combined + return $ RawHtml combined' + +-- +-- LaTeX +-- + +rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment + +-- +-- Tables +-- + +-- Parse a dashed line with optional trailing spaces; return its length +-- and the length including trailing space. +dashedLine ch = do + dashes <- many1 (char ch) + sp <- many spaceChar + return $ (length dashes, length $ dashes ++ sp) + +-- Parse a table header with dashed lines of '-' preceded by +-- one line of text. +simpleTableHeader = try $ do + rawContent <- anyLine + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeads = tail $ splitByIndices (init indices) rawContent + let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths + return (rawHeads, aligns, indices) + +-- Parse a table footer - dashed lines followed by blank line. +tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines + +-- Parse a table separator - dashed line. +tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" + +-- Parse a raw line and split it into chunks by indices. +rawTableLine indices = do + notFollowedBy' (blanklines <|> tableFooter) + line <- many1Till anyChar newline + return $ map removeLeadingTrailingSpace $ tail $ + splitByIndices (init indices) line + +-- Parse a table line and return a list of lists of blocks (columns). +tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) + +-- Parse a multiline table row and return a list of blocks (columns). +multilineRow indices = do + colLines <- many1 (rawTableLine indices) + optional blanklines + let cols = map unlines $ transpose colLines + mapM (parseFromString (many plain)) cols + +-- Calculate relative widths of table columns, based on indices +widthsFromIndices :: Int -- Number of columns on terminal + -> [Int] -- Indices + -> [Float] -- Fractional relative sizes of columns +widthsFromIndices _ [] = [] +widthsFromIndices numColumns indices = + let lengths = zipWith (-) indices (0:indices) + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs + +-- Parses a table caption: inlines beginning with 'Table:' +-- and followed by blank lines. +tableCaption = try $ do + nonindentSpaces + string "Table:" + result <- many1 inline + blanklines + return $ normalizeSpaces result + +-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. +tableWith headerParser lineParser footerParser = try $ do + (rawHeads, aligns, indices) <- headerParser + lines <- many1Till (lineParser indices) footerParser + caption <- option [] tableCaption + heads <- mapM (parseFromString (many plain)) rawHeads + state <- getState + let numColumns = stateColumns state + let widths = widthsFromIndices numColumns indices + return $ Table caption aligns widths heads lines + +-- Parse a simple table with '---' header and one line per row. +simpleTable = tableWith simpleTableHeader tableLine blanklines + +-- Parse a multiline table: starts with row of '-' on top, then header +-- (which may be multiline), then the rows, +-- which may be multiline, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). +multilineTable = tableWith multilineTableHeader multilineRow tableFooter + +multilineTableHeader = try $ do + tableSep + rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeadsList = transpose $ map + (\ln -> tail $ splitByIndices (init indices) ln) + rawContent + let rawHeads = map (joinWithSep " ") rawHeadsList + let aligns = zipWith alignType rawHeadsList lengths + return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) + +-- Returns an alignment type for a table, based on a list of strings +-- (the rows of the column header) and a number (the length of the +-- dashed line under the rows. +alignType :: [String] -> Int -> Alignment +alignType [] len = AlignDefault +alignType strLst len = + let str = head $ sortBy (comparing length) $ + map removeTrailingSpace strLst + leftSpace = if null str then False else (str !! 0) `elem` " \t" + rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" + in case (leftSpace, rightSpace) of + (True, False) -> AlignRight + (False, True) -> AlignLeft + (True, True) -> AlignCenter + (False, False) -> AlignDefault + +table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table" + +-- +-- inline +-- + +inline = choice [ str + , smartPunctuation + , whitespace + , endline + , code + , charRef + , strong + , emph + , note + , inlineNote + , link + , image + , math + , strikeout + , superscript + , subscript + , autoLink + , rawHtmlInline' + , rawLaTeXInline' + , escapedChar + , symbol + , ltSign ] <?> "inline" + +escapedChar = do + char '\\' + state <- getState + result <- option '\\' $ if stateStrict state + then oneOf "\\`*_{}[]()>#+-.!~" + else satisfy (not . isAlphaNum) + return $ Str [result] + +ltSign = do + st <- getState + if stateStrict st + then char '<' + else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html + return $ Str ['<'] + +specialCharsMinusLt = filter (/= '<') specialChars + +symbol = do + result <- oneOf specialCharsMinusLt + return $ Str [result] + +-- parses inline code, between n `s and n `s +code = try $ do + starts <- many1 (char '`') + skipSpaces + result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + (char '\n' >> return " ")) + (try (skipSpaces >> count (length starts) (char '`') >> + notFollowedBy (char '`'))) + return $ Code $ removeLeadingTrailingSpace $ concat result + +mathWord = many1 ((noneOf " \t\n\\$") <|> + (try (char '\\') >>~ notFollowedBy (char '$'))) + +math = try $ do + failIfStrict + char '$' + notFollowedBy space + words <- sepBy1 mathWord (many1 space) + char '$' + return $ TeX ("$" ++ (joinWithSep " " words) ++ "$") + +emph = ((enclosed (char '*') (char '*') inline) <|> + (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= + return . Emph . normalizeSpaces + +strong = ((enclosed (string "**") (try $ string "**") inline) <|> + (enclosed (string "__") (try $ string "__") inline)) >>= + return . Strong . normalizeSpaces + +strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= + return . Strikeout . normalizeSpaces + +superscript = failIfStrict >> enclosed (char '^') (char '^') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Superscript + +subscript = failIfStrict >> enclosed (char '~') (char '~') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Subscript + +smartPunctuation = failUnlessSmart >> + choice [ quoted, apostrophe, dash, ellipses ] + +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe + +quoted = doubleQuoted <|> singleQuoted + +withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = stateQuoteContext oldState + setState oldState { stateQuoteContext = context } + result <- parser + newState <- getState + setState newState { stateQuoteContext = oldQuoteContext } + return result + +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= + return . Quoted SingleQuote . normalizeSpaces + +doubleQuoted = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= + return . Quoted DoubleQuote . normalizeSpaces + +failIfInQuoteContext context = do + st <- getState + if stateQuoteContext st == context + then fail "already inside quotes" + else return () + +singleQuoteStart = do + failIfInQuoteContext InSingleQuote + char '\8216' <|> + do char '\'' + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) -- possess/contraction + return '\'' + +singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum + +doubleQuoteStart = failIfInQuoteContext InDoubleQuote >> + (char '"' <|> char '\8220') >> + notFollowedBy (oneOf " \t\n") + +doubleQuoteEnd = char '"' <|> char '\8221' + +ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses + +dash = enDash <|> emDash + +enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash + +emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >> + skipSpaces >> return EmDash + +whitespace = do + sps <- many1 (oneOf spaceChars) + if length sps >= 2 + then option Space (endline >> return LineBreak) + else return Space <?> "whitespace" + +nonEndline = satisfy (/='\n') + +strChar = noneOf (specialChars ++ spaceChars ++ "\n") + +str = many1 strChar >>= return . Str + +-- an endline character that can be treated as a space, not a structural break +endline = try $ do + newline + notFollowedBy blankline + st <- getState + if stateStrict st + then do notFollowedBy emailBlockQuoteStart + notFollowedBy (char '#') -- atx header + else return () + -- parse potential list-starts differently if in a list: + if stateParserContext st == ListItemState + then notFollowedBy' (bulletListStart <|> + (anyOrderedListStart >> return ())) + else return () + return Space + +-- +-- links +-- + +-- a reference label for a link +reference = notFollowedBy' (string "[^") >> -- footnote reference + inlinesInBalanced "[" "]" >>= (return . normalizeSpaces) + +-- source for a link, with optional title +source = try $ do + char '(' + optional (char '<') + src <- many (noneOf ")> \t\n") + optional (char '>') + tit <- option "" linkTitle + skipSpaces + char ')' + return (removeTrailingSpace src, tit) + +linkTitle = try $ do + (many1 spaceChar >> option '\n' newline) <|> newline + skipSpaces + delim <- char '\'' <|> char '"' + tit <- manyTill anyChar (try (char delim >> skipSpaces >> + notFollowedBy (noneOf ")\n"))) + return $ decodeCharacterReferences tit + +link = try $ do + label <- reference + src <- source <|> referenceLink label + return $ Link label src + +-- a link like [this][ref] or [this][] or [this] +referenceLink label = do + ref <- option [] (try (optional (char ' ') >> + optional (newline >> skipSpaces) >> reference)) + let ref' = if null ref then label else ref + state <- getState + case lookupKeySrc (stateKeys state) ref' of + Nothing -> fail "no corresponding key" + Just target -> return target + +emailAddress = try $ do + name <- many1 (alphaNum <|> char '+') + char '@' + first <- many1 alphaNum + rest <- many1 (char '.' >> many1 alphaNum) + return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest) + +uri = try $ do + str <- many1 (noneOf "\n\t >") + if isURI str + then return str + else fail "not a URI" + +autoLink = try $ do + char '<' + src <- uri <|> emailAddress + char '>' + let src' = if "mailto:" `isPrefixOf` src + then drop 7 src + else src + st <- getState + return $ if stateStrict st + then Link [Str src'] (src, "") + else Link [Code src'] (src, "") + +image = try $ do + char '!' + (Link label src) <- link + return $ Image label src + +note = try $ do + failIfStrict + ref <- noteMarker + state <- getState + let notes = stateNotes state + case lookup ref notes of + Nothing -> fail "note not found" + Just contents -> return $ Note contents + +inlineNote = try $ do + failIfStrict + char '^' + contents <- inlinesInBalanced "[" "]" + return $ Note [Para contents] + +rawLaTeXInline' = failIfStrict >> rawLaTeXInline + +rawHtmlInline' = do + st <- getState + result <- choice $ if stateStrict st + then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] + else [htmlBlockElement, anyHtmlInlineTag] + return $ HtmlInline result + diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs new file mode 100644 index 000000000..1239eb688 --- /dev/null +++ b/src/Text/Pandoc/Readers/RST.hs @@ -0,0 +1,640 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.RST + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion from reStructuredText to 'Pandoc' document. +-} +module Text.Pandoc.Readers.RST ( + readRST + ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.ParserCombinators.Parsec +import Data.List ( findIndex, delete ) + +-- | Parse reStructuredText string and return Pandoc document. +readRST :: ParserState -> String -> Pandoc +readRST state str = (readWith parseRST) state (str ++ "\n\n") + +-- +-- Constants and data structure definitions +--- + +bulletListMarkers = "*+-" +underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~" + +-- treat these as potentially non-text when parsing inline: +specialChars = "\\`|*_<>$:[-" + +-- +-- parsing documents +-- + +isAnonKey (ref, src) = ref == [Str "_"] + +isHeader :: Int -> Block -> Bool +isHeader n (Header x _) = x == n +isHeader _ _ = False + +-- | Promote all headers in a list of blocks. (Part of +-- title transformation for RST.) +promoteHeaders :: Int -> [Block] -> [Block] +promoteHeaders num ((Header level text):rest) = + (Header (level - num) text):(promoteHeaders num rest) +promoteHeaders num (other:rest) = other:(promoteHeaders num rest) +promoteHeaders num [] = [] + +-- | If list of blocks starts with a header (or a header and subheader) +-- of level that are not found elsewhere, return it as a title and +-- promote all the other headers. +titleTransform :: [Block] -- ^ list of blocks + -> ([Block], [Inline]) -- ^ modified list of blocks, title +titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle + if (any (isHeader 1) rest) || (any (isHeader 2) rest) + then ((Header 1 head1):(Header 2 head2):rest, []) + else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) +titleTransform ((Header 1 head1):rest) = -- title, no subtitle + if (any (isHeader 1) rest) + then ((Header 1 head1):rest, []) + else ((promoteHeaders 1 rest), head1) +titleTransform blocks = (blocks, []) + +parseRST = do + startPos <- getPosition + -- go through once just to get list of reference keys + -- docMinusKeys is the raw document with blanks where the keys were... + docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat + setInput docMinusKeys + setPosition startPos + st <- getState + let reversedKeys = stateKeys st + updateState $ \st -> st { stateKeys = reverse reversedKeys } + -- now parse it for real... + blocks <- parseBlocks + let blocks' = filter (/= Null) blocks + state <- getState + let (blocks'', title) = if stateStandalone state + then titleTransform blocks' + else (blocks', []) + let authors = stateAuthors state + let date = stateDate state + let title' = if (null title) then (stateTitle state) else title + return $ Pandoc (Meta title' authors date) blocks'' + +-- +-- parsing blocks +-- + +parseBlocks = manyTill block eof + +block = choice [ codeBlock + , rawHtmlBlock + , rawLaTeXBlock + , fieldList + , blockQuote + , imageBlock + , unknownDirective + , header + , hrule + , list + , lineBlock + , para + , plain + , nullBlock ] <?> "block" + +-- +-- field list +-- + +fieldListItem indent = try $ do + string indent + char ':' + name <- many1 alphaNum + string ": " + skipSpaces + first <- manyTill anyChar newline + rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> + indentedBlock + return (name, joinWithSep " " (first:(lines rest))) + +fieldList = try $ do + indent <- lookAhead $ many (oneOf " \t") + items <- many1 $ fieldListItem indent + blanklines + let authors = case lookup "Authors" items of + Just auth -> [auth] + Nothing -> map snd (filter (\(x,y) -> x == "Author") items) + if null authors + then return () + else updateState $ \st -> st {stateAuthors = authors} + case (lookup "Date" items) of + Just dat -> updateState $ \st -> st {stateDate = dat} + Nothing -> return () + case (lookup "Title" items) of + Just tit -> parseFromString (many inline) tit >>= + \t -> updateState $ \st -> st {stateTitle = t} + Nothing -> return () + let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && + (x /= "Date") && (x /= "Title")) items + if null remaining + then return Null + else do terms <- mapM (return . (:[]) . Str . fst) remaining + defs <- mapM (parseFromString (many block) . snd) + remaining + return $ DefinitionList $ zip terms defs + +-- +-- line block +-- + +lineBlockLine = try $ do + string "| " + white <- many (oneOf " \t") + line <- manyTill inline newline + return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] + +lineBlock = try $ do + lines <- many1 lineBlockLine + blanklines + return $ Para (concat lines) + +-- +-- paragraph block +-- + +para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" + +codeBlockStart = string "::" >> blankline >> blankline + +-- paragraph that ends in a :: starting a code block +paraBeforeCodeBlock = try $ do + result <- many1 (notFollowedBy' codeBlockStart >> inline) + lookAhead (string "::") + return $ Para $ if last result == Space + then normalizeSpaces result + else (normalizeSpaces result) ++ [Str ":"] + +-- regular paragraph +paraNormal = try $ do + result <- many1 inline + newline + blanklines + return $ Para $ normalizeSpaces result + +plain = many1 inline >>= return . Plain . normalizeSpaces + +-- +-- image block +-- + +imageBlock = try $ do + string ".. image:: " + src <- manyTill anyChar newline + fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t") + many1 $ fieldListItem indent + optional blanklines + case lookup "alt" fields of + Just alt -> return $ Plain [Image [Str alt] (src, alt)] + Nothing -> return $ Plain [Image [Str "image"] (src, "")] +-- +-- header blocks +-- + +header = doubleHeader <|> singleHeader <?> "header" + +-- a header with lines on top and bottom +doubleHeader = try $ do + c <- oneOf underlineChars + rest <- many (char c) -- the top line + let lenTop = length (c:rest) + skipSpaces + newline + txt <- many1 (notFollowedBy blankline >> inline) + pos <- getPosition + let len = (sourceColumn pos) - 1 + if (len > lenTop) then fail "title longer than border" else return () + blankline -- spaces and newline + count lenTop (char c) -- the bottom line + blanklines + -- check to see if we've had this kind of header before. + -- if so, get appropriate level. if not, add to list. + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return $ Header level (normalizeSpaces txt) + +-- a header with line on the bottom only +singleHeader = try $ do + notFollowedBy' whitespace + txt <- many1 (do {notFollowedBy blankline; inline}) + pos <- getPosition + let len = (sourceColumn pos) - 1 + blankline + c <- oneOf underlineChars + rest <- count (len - 1) (char c) + many (char c) + blanklines + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return $ Header level (normalizeSpaces txt) + +-- +-- hrule block +-- + +hrule = try $ do + chr <- oneOf underlineChars + count 3 (char chr) + skipMany (char chr) + blankline + blanklines + return HorizontalRule + +-- +-- code blocks +-- + +-- read a line indented by a given string +indentedLine indents = try $ do + string indents + result <- manyTill anyChar newline + return $ result ++ "\n" + +-- two or more indented lines, possibly separated by blank lines. +-- any amount of indentation will work. +indentedBlock = do + indents <- lookAhead $ many1 (oneOf " \t") + lns <- many $ choice $ [ indentedLine indents, + try $ do b <- blanklines + l <- indentedLine indents + return (b ++ l) ] + optional blanklines + return $ concat lns + +codeBlock = try $ do + codeBlockStart + result <- indentedBlock + return $ CodeBlock $ stripTrailingNewlines result + +-- +-- raw html +-- + +rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> + indentedBlock >>= return . RawHtml + +-- +-- raw latex +-- + +rawLaTeXBlock = try $ do + string ".. raw:: latex" + blanklines + result <- indentedBlock + return $ Para [(TeX result)] + +-- +-- block quotes +-- + +blockQuote = do + raw <- indentedBlock + -- parse the extracted block, which may contain various block elements: + contents <- parseFromString parseBlocks $ raw ++ "\n\n" + return $ BlockQuote contents + +-- +-- list blocks +-- + +list = choice [ bulletList, orderedList, definitionList ] <?> "list" + +definitionListItem = try $ do + term <- many1Till inline endline + raw <- indentedBlock + -- parse the extracted block, which may contain various block elements: + contents <- parseFromString parseBlocks $ raw ++ "\n\n" + return (normalizeSpaces term, contents) + +definitionList = many1 definitionListItem >>= return . DefinitionList + +-- parses bullet list start and returns its length (inc. following whitespace) +bulletListStart = try $ do + notFollowedBy' hrule -- because hrules start out just like lists + marker <- oneOf bulletListMarkers + white <- many1 spaceChar + return $ length (marker:white) + +-- parses ordered list start and returns its length (inc following whitespace) +orderedListStart style delim = try $ do + (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) + white <- many1 spaceChar + return $ markerLen + length white + +-- parse a line of a list item +listLine markerLength = try $ do + notFollowedBy blankline + indentWith markerLength + line <- manyTill anyChar newline + return $ line ++ "\n" + +-- indent by specified number of spaces (or equiv. tabs) +indentWith num = do + state <- getState + let tabStop = stateTabStop state + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')), + (try (char '\t' >> count (num - tabStop) (char ' '))) ] + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem start = do + markerLength <- start + firstLine <- manyTill anyChar newline + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + +-- continuation of a list item - indented and separated by blankline or +-- (in compact lists) endline. +-- Note: nested lists are parsed as continuations. +listContinuation markerLength = try $ do + blanks <- many1 blankline + result <- many1 (listLine markerLength) + return $ blanks ++ concat result + +listItem start = try $ do + (markerLength, first) <- rawListItem start + rest <- many (listContinuation markerLength) + blanks <- choice [ try (many blankline >>~ lookAhead start), + many1 blankline ] -- whole list must end with blank. + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let oldContext = stateParserContext state + setState $ state {stateParserContext = ListItemState} + -- parse the extracted block, which may itself contain block elements + parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks + updateState (\st -> st {stateParserContext = oldContext}) + return parsed + +orderedList = do + (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) + items <- many1 (listItem (orderedListStart style delim)) + let items' = compactify items + return $ OrderedList (start, style, delim) items' + +bulletList = many1 (listItem bulletListStart) >>= + return . BulletList . compactify + +-- +-- unknown directive (e.g. comment) +-- + +unknownDirective = try $ do + string ".. " + manyTill anyChar newline + many (string " :" >> many1 (noneOf "\n:") >> char ':' >> + many1 (noneOf "\n") >> newline) + optional blanklines + return Null + +-- +-- reference key +-- + +referenceKey = do + startPos <- getPosition + key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] + st <- getState + let oldkeys = stateKeys st + updateState $ \st -> st { stateKeys = key : oldkeys } + optional blanklines + endPos <- getPosition + -- return enough blanks to replace key + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +targetURI = do + skipSpaces + optional newline + contents <- many1 (try (many spaceChar >> newline >> + many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") + blanklines + return contents + +imageKey = try $ do + string ".. |" + ref <- manyTill inline (char '|') + skipSpaces + string "image::" + src <- targetURI + return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + +anonymousKey = try $ do + oneOfStrings [".. __:", "__"] + src <- targetURI + state <- getState + return ([Str "_"], (removeLeadingTrailingSpace src, "")) + +regularKeyQuoted = try $ do + string ".. _`" + ref <- manyTill inline (char '`') + char ':' + src <- targetURI + return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + +regularKey = try $ do + string ".. _" + ref <- manyTill inline (char ':') + src <- targetURI + return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + + -- + -- inline + -- + +inline = choice [ link + , str + , whitespace + , endline + , strong + , emph + , code + , image + , hyphens + , superscript + , subscript + , escapedChar + , symbol ] <?> "inline" + +hyphens = do + result <- many1 (char '-') + option Space endline + -- don't want to treat endline after hyphen or dash as a space + return $ Str result + +escapedChar = escaped anyChar + +symbol = do + result <- oneOf specialChars + return $ Str [result] + +-- parses inline code, between codeStart and codeEnd +code = try $ do + string "``" + result <- manyTill anyChar (try (string "``")) + return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result + +emph = enclosed (char '*') (char '*') inline >>= + return . Emph . normalizeSpaces + +strong = enclosed (string "**") (try $ string "**") inline >>= + return . Strong . normalizeSpaces + +interpreted role = try $ do + optional $ try $ string "\\ " + result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar + nextChar <- lookAhead anyChar + try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") + return [Str result] + +superscript = interpreted "sup" >>= (return . Superscript) + +subscript = interpreted "sub" >>= (return . Subscript) + +whitespace = many1 spaceChar >> return Space <?> "whitespace" + +str = notFollowedBy' oneWordReference >> + many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str + +-- an endline character that can be treated as a space, not a structural break +endline = try $ do + newline + notFollowedBy blankline + -- parse potential list-starts at beginning of line differently in a list: + st <- getState + if (stateParserContext st) == ListItemState + then notFollowedBy (anyOrderedListMarker >> spaceChar) >> + notFollowedBy' bulletListStart + else return () + return Space + +-- +-- links +-- + +link = choice [explicitLink, referenceLink, autoLink] <?> "link" + +explicitLink = try $ do + char '`' + notFollowedBy (char '`') -- `` is marks start of inline code + label <- manyTill inline (try (do {spaces; char '<'})) + src <- manyTill (noneOf ">\n ") (char '>') + skipSpaces + string "`_" + return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "") + +reference = try $ do + char '`' + notFollowedBy (char '`') + label <- many1Till inline (char '`') + char '_' + return label + +oneWordReference = do + raw <- many1 alphaNum + char '_' + notFollowedBy alphaNum -- because this_is_not a link + return [Str raw] + +referenceLink = try $ do + label <- reference <|> oneWordReference + key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link + state <- getState + let keyTable = stateKeys state + src <- case lookupKeySrc keyTable key of + Nothing -> fail "no corresponding key" + Just target -> return target + -- if anonymous link, remove first anon key so it won't be used again + let keyTable' = if (key == [Str "_"]) -- anonymous link? + then delete ([Str "_"], src) keyTable -- remove first anon key + else keyTable + setState $ state { stateKeys = keyTable' } + return $ Link (normalizeSpaces label) src + +uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", + "mailto:", "news:", "telnet:" ] + +uri = try $ do + scheme <- uriScheme + identifier <- many1 (noneOf " \t\n") + return $ scheme ++ identifier + +autoURI = do + src <- uri + return $ Link [Str src] (src, "") + +emailChar = alphaNum <|> oneOf "-+_." + +emailAddress = try $ do + firstLetter <- alphaNum + restAddr <- many emailChar + let addr = firstLetter:restAddr + char '@' + dom <- domain + return $ addr ++ '@':dom + +domainChar = alphaNum <|> char '-' + +domain = do + first <- many1 domainChar + dom <- many1 (try (do{ char '.'; many1 domainChar })) + return $ joinWithSep "." (first:dom) + +autoEmail = do + src <- emailAddress + return $ Link [Str src] ("mailto:" ++ src, "") + +autoLink = autoURI <|> autoEmail + +-- For now, we assume that all substitution references are for images. +image = try $ do + char '|' + ref <- manyTill inline (char '|') + state <- getState + let keyTable = stateKeys state + src <- case lookupKeySrc keyTable ref of + Nothing -> fail "no corresponding key" + Just target -> return target + return $ Image (normalizeSpaces ref) src + diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs new file mode 100644 index 000000000..f27c3ae75 --- /dev/null +++ b/src/Text/Pandoc/Shared.hs @@ -0,0 +1,792 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Shared + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Utility functions and definitions used by the various Pandoc modules. +-} +module Text.Pandoc.Shared ( + -- * List processing + splitBy, + splitByIndices, + substitute, + joinWithSep, + -- * Text processing + backslashEscapes, + escapeStringUsing, + stripTrailingNewlines, + removeLeadingTrailingSpace, + removeLeadingSpace, + removeTrailingSpace, + stripFirstAndLast, + camelCaseToHyphenated, + toRomanNumeral, + wrapped, + wrapIfNeeded, + -- * Parsing + (>>~), + anyLine, + many1Till, + notFollowedBy', + oneOfStrings, + spaceChar, + skipSpaces, + blankline, + blanklines, + enclosed, + stringAnyCase, + parseFromString, + lineClump, + charsInBalanced, + charsInBalanced', + romanNumeral, + withHorizDisplacement, + nullBlock, + failIfStrict, + escaped, + anyOrderedListMarker, + orderedListMarker, + charRef, + readWith, + testStringWith, + ParserState (..), + defaultParserState, + HeaderType (..), + ParserContext (..), + QuoteContext (..), + NoteTable, + KeyTable, + lookupKeySrc, + refsMatch, + -- * Native format prettyprinting + prettyPandoc, + -- * Pandoc block and inline list processing + orderedListMarkers, + normalizeSpaces, + compactify, + Element (..), + hierarchicalize, + isHeaderBlock, + -- * Writer options + WriterOptions (..), + defaultWriterOptions + ) where + +import Text.Pandoc.Definition +import Text.ParserCombinators.Parsec +import Text.PrettyPrint.HughesPJ ( Doc, fsep ) +import Text.Pandoc.CharacterReferences ( characterReference ) +import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) +import Data.List ( find, isPrefixOf ) +import Control.Monad ( join ) + +-- +-- List processing +-- + +-- | Split list by groups of one or more sep. +splitBy :: (Eq a) => a -> [a] -> [[a]] +splitBy _ [] = [] +splitBy sep lst = + let (first, rest) = break (== sep) lst + rest' = dropWhile (== sep) rest + in first:(splitBy sep rest') + +-- | Split list into chunks divided at specified indices. +splitByIndices :: [Int] -> [a] -> [[a]] +splitByIndices [] lst = [lst] +splitByIndices (x:xs) lst = + let (first, rest) = splitAt x lst in + first:(splitByIndices (map (\y -> y - x) xs) rest) + +-- | Replace each occurrence of one sublist in a list with another. +substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] +substitute _ _ [] = [] +substitute [] _ lst = lst +substitute target replacement lst = + if target `isPrefixOf` lst + then replacement ++ (substitute target replacement $ drop (length target) lst) + else (head lst):(substitute target replacement $ tail lst) + +-- | Joins a list of lists, separated by another list. +joinWithSep :: [a] -- ^ List to use as separator + -> [[a]] -- ^ Lists to join + -> [a] +joinWithSep _ [] = [] +joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst + +-- +-- Text processing +-- + +-- | Returns an association list of backslash escapes for the +-- designated characters. +backslashEscapes :: [Char] -- ^ list of special characters to escape + -> [(Char, String)] +backslashEscapes = map (\ch -> (ch, ['\\',ch])) + +-- | Escape a string of characters, using an association list of +-- characters and strings. +escapeStringUsing :: [(Char, String)] -> String -> String +escapeStringUsing _ [] = "" +escapeStringUsing escapeTable (x:xs) = + case (lookup x escapeTable) of + Just str -> str ++ rest + Nothing -> x:rest + where rest = escapeStringUsing escapeTable xs + +-- | Strip trailing newlines from string. +stripTrailingNewlines :: String -> String +stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse + +-- | Remove leading and trailing space (including newlines) from string. +removeLeadingTrailingSpace :: String -> String +removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace + +-- | Remove leading space (including newlines) from string. +removeLeadingSpace :: String -> String +removeLeadingSpace = dropWhile (`elem` " \n\t") + +-- | Remove trailing space (including newlines) from string. +removeTrailingSpace :: String -> String +removeTrailingSpace = reverse . removeLeadingSpace . reverse + +-- | Strip leading and trailing characters from string +stripFirstAndLast :: String -> String +stripFirstAndLast str = + drop 1 $ take ((length str) - 1) str + +-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). +camelCaseToHyphenated :: String -> String +camelCaseToHyphenated [] = "" +camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = + a:'-':(toLower b):(camelCaseToHyphenated rest) +camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) + +-- | Convert number < 4000 to uppercase roman numeral. +toRomanNumeral :: Int -> String +toRomanNumeral x = + if x >= 4000 || x < 0 + then "?" + else case x of + _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) + _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900) + _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500) + _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400) + _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100) + _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90) + _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50) + _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) + _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10) + _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5) + _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5) + _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4) + _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) + _ -> "" + +-- | Wrap inlines to line length. +wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc +wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= + return . fsep + +wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) -> + [Inline] -> m Doc +wrapIfNeeded opts = if writerWrapText opts + then wrapped + else ($) + +-- +-- Parsing +-- + +-- | Like >>, but returns the operation on the left. +-- (Suggested by Tillmann Rendel on Haskell-cafe list.) +(>>~) :: (Monad m) => m a -> m b -> m a +a >>~ b = a >>= \x -> b >> return x + +-- | Parse any line of text +anyLine :: GenParser Char st [Char] +anyLine = manyTill anyChar newline + +-- | Like @manyTill@, but reads at least one item. +many1Till :: GenParser tok st a + -> GenParser tok st end + -> GenParser tok st [a] +many1Till p end = do + first <- p + rest <- manyTill p end + return (first:rest) + +-- | A more general form of @notFollowedBy@. This one allows any +-- type of parser to be specified, and succeeds only if that parser fails. +-- It does not consume any input. +notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' p = try $ join $ do a <- try p + return (unexpected (show a)) + <|> + return (return ()) +-- (This version due to Andrew Pimlott on the Haskell mailing list.) + +-- | Parses one of a list of strings (tried in order). +oneOfStrings :: [String] -> GenParser Char st String +oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings + +-- | Parses a space or tab. +spaceChar :: CharParser st Char +spaceChar = char ' ' <|> char '\t' + +-- | Skips zero or more spaces or tabs. +skipSpaces :: GenParser Char st () +skipSpaces = skipMany spaceChar + +-- | Skips zero or more spaces or tabs, then reads a newline. +blankline :: GenParser Char st Char +blankline = try $ skipSpaces >> newline + +-- | Parses one or more blank lines and returns a string of newlines. +blanklines :: GenParser Char st [Char] +blanklines = many1 blankline + +-- | Parses material enclosed between start and end parsers. +enclosed :: GenParser Char st t -- ^ start parser + -> GenParser Char st end -- ^ end parser + -> GenParser Char st a -- ^ content parser (to be used repeatedly) + -> GenParser Char st [a] +enclosed start end parser = try $ + start >> notFollowedBy space >> many1Till parser end + +-- | Parse string, case insensitive. +stringAnyCase :: [Char] -> CharParser st String +stringAnyCase [] = string "" +stringAnyCase (x:xs) = do + firstChar <- char (toUpper x) <|> char (toLower x) + rest <- stringAnyCase xs + return (firstChar:rest) + +-- | Parse contents of 'str' using 'parser' and return result. +parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString parser str = do + oldPos <- getPosition + oldInput <- getInput + setInput str + result <- parser + setInput oldInput + setPosition oldPos + return result + +-- | Parse raw line block up to and including blank lines. +lineClump :: GenParser Char st String +lineClump = blanklines + <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) + +-- | Parse a string of characters between an open character +-- and a close character, including text between balanced +-- pairs of open and close, which must be different. For example, +-- @charsInBalanced '(' ')'@ will parse "(hello (there))" +-- and return "hello (there)". Stop if a blank line is +-- encountered. +charsInBalanced :: Char -> Char -> GenParser Char st String +charsInBalanced open close = try $ do + char open + raw <- many $ (many1 (noneOf [open, close, '\n'])) + <|> (do res <- charsInBalanced open close + return $ [open] ++ res ++ [close]) + <|> try (string "\n" >>~ notFollowedBy' blanklines) + char close + return $ concat raw + +-- | Like @charsInBalanced@, but allow blank lines in the content. +charsInBalanced' :: Char -> Char -> GenParser Char st String +charsInBalanced' open close = try $ do + char open + raw <- many $ (many1 (noneOf [open, close])) + <|> (do res <- charsInBalanced' open close + return $ [open] ++ res ++ [close]) + char close + return $ concat raw + +-- | Parses a roman numeral (uppercase or lowercase), returns number. +romanNumeral :: Bool -- ^ Uppercase if true + -> GenParser Char st Int +romanNumeral upperCase = do + let charAnyCase c = char (if upperCase then toUpper c else c) + let one = charAnyCase 'i' + let five = charAnyCase 'v' + let ten = charAnyCase 'x' + let fifty = charAnyCase 'l' + let hundred = charAnyCase 'c' + let fivehundred = charAnyCase 'd' + let thousand = charAnyCase 'm' + thousands <- many thousand >>= (return . (1000 *) . length) + ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 + fivehundreds <- many fivehundred >>= (return . (500 *) . length) + fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 + hundreds <- many hundred >>= (return . (100 *) . length) + nineties <- option 0 $ try $ ten >> hundred >> return 90 + fifties <- many fifty >>= (return . (50 *) . length) + forties <- option 0 $ try $ ten >> fifty >> return 40 + tens <- many ten >>= (return . (10 *) . length) + nines <- option 0 $ try $ one >> ten >> return 9 + fives <- many five >>= (return . (5 *) . length) + fours <- option 0 $ try $ one >> five >> return 4 + ones <- many one >>= (return . length) + let total = thousands + ninehundreds + fivehundreds + fourhundreds + + hundreds + nineties + fifties + forties + tens + nines + + fives + fours + ones + if total == 0 + then fail "not a roman numeral" + else return total + +-- | Applies a parser, returns tuple of its results and its horizontal +-- displacement (the difference between the source column at the end +-- and the source column at the beginning). Vertical displacement +-- (source row) is ignored. +withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply + -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement parser = do + pos1 <- getPosition + result <- parser + pos2 <- getPosition + return (result, sourceColumn pos2 - sourceColumn pos1) + +-- | Parses a character and returns 'Null' (so that the parser can move on +-- if it gets stuck). +nullBlock :: GenParser Char st Block +nullBlock = anyChar >> return Null + +-- | Fail if reader is in strict markdown syntax mode. +failIfStrict :: GenParser Char ParserState () +failIfStrict = do + state <- getState + if stateStrict state then fail "strict mode" else return () + +-- | Parses backslash, then applies character parser. +escaped :: GenParser Char st Char -- ^ Parser for character to escape + -> GenParser Char st Inline +escaped parser = try $ do + char '\\' + result <- parser + return (Str [result]) + +-- | Parses an uppercase roman numeral and returns (UpperRoman, number). +upperRoman :: GenParser Char st (ListNumberStyle, Int) +upperRoman = do + num <- romanNumeral True + return (UpperRoman, num) + +-- | Parses a lowercase roman numeral and returns (LowerRoman, number). +lowerRoman :: GenParser Char st (ListNumberStyle, Int) +lowerRoman = do + num <- romanNumeral False + return (LowerRoman, num) + +-- | Parses a decimal numeral and returns (Decimal, number). +decimal :: GenParser Char st (ListNumberStyle, Int) +decimal = do + num <- many1 digit + return (Decimal, read num) + +-- | Parses a '#' returns (DefaultStyle, 1). +defaultNum :: GenParser Char st (ListNumberStyle, Int) +defaultNum = do + char '#' + return (DefaultStyle, 1) + +-- | Parses a lowercase letter and returns (LowerAlpha, number). +lowerAlpha :: GenParser Char st (ListNumberStyle, Int) +lowerAlpha = do + ch <- oneOf ['a'..'z'] + return (LowerAlpha, ord ch - ord 'a' + 1) + +-- | Parses an uppercase letter and returns (UpperAlpha, number). +upperAlpha :: GenParser Char st (ListNumberStyle, Int) +upperAlpha = do + ch <- oneOf ['A'..'Z'] + return (UpperAlpha, ord ch - ord 'A' + 1) + +-- | Parses a roman numeral i or I +romanOne :: GenParser Char st (ListNumberStyle, Int) +romanOne = (char 'i' >> return (LowerRoman, 1)) <|> + (char 'I' >> return (UpperRoman, 1)) + +-- | Parses an ordered list marker and returns list attributes. +anyOrderedListMarker :: GenParser Char st ListAttributes +anyOrderedListMarker = choice $ + [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], + numParser <- [decimal, defaultNum, romanOne, + lowerAlpha, lowerRoman, upperAlpha, upperRoman]] + +-- | Parses a list number (num) followed by a period, returns list attributes. +inPeriod :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inPeriod num = try $ do + (style, start) <- num + char '.' + let delim = if style == DefaultStyle + then DefaultDelim + else Period + return (start, style, delim) + +-- | Parses a list number (num) followed by a paren, returns list attributes. +inOneParen :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inOneParen num = try $ do + (style, start) <- num + char ')' + return (start, style, OneParen) + +-- | Parses a list number (num) enclosed in parens, returns list attributes. +inTwoParens :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inTwoParens num = try $ do + char '(' + (style, start) <- num + char ')' + return (start, style, TwoParens) + +-- | Parses an ordered list marker with a given style and delimiter, +-- returns number. +orderedListMarker :: ListNumberStyle + -> ListNumberDelim + -> GenParser Char st Int +orderedListMarker style delim = do + let num = case style of + DefaultStyle -> decimal <|> defaultNum + Decimal -> decimal + UpperRoman -> upperRoman + LowerRoman -> lowerRoman + UpperAlpha -> upperAlpha + LowerAlpha -> lowerAlpha + let context = case delim of + DefaultDelim -> inPeriod + Period -> inPeriod + OneParen -> inOneParen + TwoParens -> inTwoParens + (start, _, _) <- context num + return start + +-- | Parses a character reference and returns a Str element. +charRef :: GenParser Char st Inline +charRef = do + c <- characterReference + return $ Str [c] + +-- | Parse a string with a given parser and state. +readWith :: GenParser Char ParserState a -- ^ parser + -> ParserState -- ^ initial state + -> String -- ^ input string + -> a +readWith parser state input = + case runParser parser state "source" input of + Left err -> error $ "\nError:\n" ++ show err + Right result -> result + +-- | Parse a string with @parser@ (for testing). +testStringWith :: (Show a) => GenParser Char ParserState a + -> String + -> IO () +testStringWith parser str = putStrLn $ show $ + readWith parser defaultParserState str + +-- | Parsing options. +data ParserState = ParserState + { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + stateParserContext :: ParserContext, -- ^ Inside list? + stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateKeys :: KeyTable, -- ^ List of reference keys + stateNotes :: NoteTable, -- ^ List of notes + stateTabStop :: Int, -- ^ Tab stop + stateStandalone :: Bool, -- ^ Parse bibliographic info? + stateTitle :: [Inline], -- ^ Title of document + stateAuthors :: [String], -- ^ Authors of document + stateDate :: String, -- ^ Date of document + stateStrict :: Bool, -- ^ Use strict markdown syntax? + stateSmart :: Bool, -- ^ Use smart typography? + stateColumns :: Int, -- ^ Number of columns in terminal + stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used + } + deriving Show + +defaultParserState :: ParserState +defaultParserState = + ParserState { stateParseRaw = False, + stateParserContext = NullState, + stateQuoteContext = NoQuote, + stateKeys = [], + stateNotes = [], + stateTabStop = 4, + stateStandalone = False, + stateTitle = [], + stateAuthors = [], + stateDate = [], + stateStrict = False, + stateSmart = False, + stateColumns = 80, + stateHeaderTable = [] } + +data HeaderType + = SingleHeader Char -- ^ Single line of characters underneath + | DoubleHeader Char -- ^ Lines of characters above and below + deriving (Eq, Show) + +data ParserContext + = ListItemState -- ^ Used when running parser on list item contents + | NullState -- ^ Default state + deriving (Eq, Show) + +data QuoteContext + = InSingleQuote -- ^ Used when parsing inside single quotes + | InDoubleQuote -- ^ Used when parsing inside double quotes + | NoQuote -- ^ Used when not parsing inside quotes + deriving (Eq, Show) + +type NoteTable = [(String, [Block])] + +type KeyTable = [([Inline], Target)] + +-- | Look up key in key table and return target object. +lookupKeySrc :: KeyTable -- ^ Key table + -> [Inline] -- ^ Key + -> Maybe Target +lookupKeySrc table key = case find (refsMatch key . fst) table of + Nothing -> Nothing + Just (_, src) -> Just src + +-- | Returns @True@ if keys match (case insensitive). +refsMatch :: [Inline] -> [Inline] -> Bool +refsMatch ((Str x):restx) ((Str y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Emph x):restx) ((Emph y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strong x):restx) ((Strong y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Superscript x):restx) ((Superscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Subscript x):restx) ((Subscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = + t == u && refsMatch x y && refsMatch restx resty +refsMatch ((Code x):restx) ((Code y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((TeX x):restx) ((TeX y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty +refsMatch [] x = null x +refsMatch x [] = null x + +-- +-- Native format prettyprinting +-- + +-- | Indent string as a block. +indentBy :: Int -- ^ Number of spaces to indent the block + -> Int -- ^ Number of spaces (rel to block) to indent first line + -> String -- ^ Contents of block to indent + -> String +indentBy _ _ [] = "" +indentBy num first str = + let (firstLine:restLines) = lines str + firstLineIndent = num + first + in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ + (joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines) + +-- | Prettyprint list of Pandoc blocks elements. +prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks + -> [Block] -- ^ List of blocks + -> String +prettyBlockList indent [] = indentBy indent 0 "[]" +prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ + (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" + +-- | Prettyprint Pandoc block element. +prettyBlock :: Block -> String +prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ + (prettyBlockList 2 blocks) +prettyBlock (OrderedList attribs blockLists) = + "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ + (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks) + blockLists)) ++ " ]" +prettyBlock (BulletList blockLists) = "BulletList\n" ++ + indentBy 2 0 ("[ " ++ (joinWithSep ", " + (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++ + indentBy 2 0 ("[" ++ (joinWithSep ",\n" + (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++ + indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]" +prettyBlock (Table caption aligns widths header rows) = + "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ + show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ + (joinWithSep ",\n" (map prettyRow rows)) ++ " ]" + where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", " + (map (\blocks -> prettyBlockList 2 blocks) + cols))) ++ " ]" +prettyBlock block = show block + +-- | Prettyprint Pandoc document. +prettyPandoc :: Pandoc -> String +prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ + ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" + +-- +-- Pandoc block and inline list processing +-- + +-- | Generate infinite lazy list of markers for an ordered list, +-- depending on list attributes. +orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] +orderedListMarkers (start, numstyle, numdelim) = + let singleton c = [c] + nums = case numstyle of + DefaultStyle -> map show [start..] + Decimal -> map show [start..] + UpperAlpha -> drop (start - 1) $ cycle $ + map singleton ['A'..'Z'] + LowerAlpha -> drop (start - 1) $ cycle $ + map singleton ['a'..'z'] + UpperRoman -> map toRomanNumeral [start..] + LowerRoman -> map (map toLower . toRomanNumeral) [start..] + inDelim str = case numdelim of + DefaultDelim -> str ++ "." + Period -> str ++ "." + OneParen -> str ++ ")" + TwoParens -> "(" ++ str ++ ")" + in map inDelim nums + +-- | Normalize a list of inline elements: remove leading and trailing +-- @Space@ elements, collapse double @Space@s into singles, and +-- remove empty Str elements. +normalizeSpaces :: [Inline] -> [Inline] +normalizeSpaces [] = [] +normalizeSpaces list = + let removeDoubles [] = [] + removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) + removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest) + removeDoubles ((Str ""):rest) = removeDoubles rest + removeDoubles (x:rest) = x:(removeDoubles rest) + removeLeading (Space:xs) = removeLeading xs + removeLeading x = x + removeTrailing [] = [] + removeTrailing lst = if (last lst == Space) + then init lst + else lst + in removeLeading $ removeTrailing $ removeDoubles list + +-- | Change final list item from @Para@ to @Plain@ if the list should +-- be compact. +compactify :: [[Block]] -- ^ List of list items (each a list of blocks) + -> [[Block]] +compactify [] = [] +compactify items = + let final = last items + others = init items + in case final of + [Para a] -> if any containsPara others + then items + else others ++ [[Plain a]] + _ -> items + +containsPara :: [Block] -> Bool +containsPara [] = False +containsPara ((Para _):_) = True +containsPara ((BulletList items):rest) = any containsPara items || + containsPara rest +containsPara ((OrderedList _ items):rest) = any containsPara items || + containsPara rest +containsPara ((DefinitionList items):rest) = any containsPara (map snd items) || + containsPara rest +containsPara (_:rest) = containsPara rest + +-- | Data structure for defining hierarchical Pandoc documents +data Element = Blk Block + | Sec [Inline] [Element] deriving (Eq, Read, Show) + +-- | Returns @True@ on Header block with at least the specified level +headerAtLeast :: Int -> Block -> Bool +headerAtLeast level (Header x _) = x <= level +headerAtLeast _ _ = False + +-- | Convert list of Pandoc blocks into (hierarchical) list of Elements +hierarchicalize :: [Block] -> [Element] +hierarchicalize [] = [] +hierarchicalize (block:rest) = + case block of + (Header level title) -> + let (thisSection, rest') = break (headerAtLeast level) rest + in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') + x -> (Blk x):(hierarchicalize rest) + +-- | True if block is a Header block. +isHeaderBlock :: Block -> Bool +isHeaderBlock (Header _ _) = True +isHeaderBlock _ = False + +-- +-- Writer options +-- + +-- | Options for writers +data WriterOptions = WriterOptions + { writerStandalone :: Bool -- ^ Include header and footer + , writerHeader :: String -- ^ Header for the document + , writerTitlePrefix :: String -- ^ Prefix for HTML titles + , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs + , writerTableOfContents :: Bool -- ^ Include table of contents + , writerS5 :: Bool -- ^ We're writing S5 + , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML + , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js + , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) + , writerIncremental :: Bool -- ^ Incremental S5 lists + , writerNumberSections :: Bool -- ^ Number sections in LaTeX + , writerIncludeBefore :: String -- ^ String to include before the body + , writerIncludeAfter :: String -- ^ String to include after the body + , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax + , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , writerWrapText :: Bool -- ^ Wrap text to line length + } deriving Show + +-- | Default writer options. +defaultWriterOptions :: WriterOptions +defaultWriterOptions = + WriterOptions { writerStandalone = False + , writerHeader = "" + , writerTitlePrefix = "" + , writerTabStop = 4 + , writerTableOfContents = False + , writerS5 = False + , writerUseASCIIMathML = False + , writerASCIIMathMLURL = Nothing + , writerIgnoreNotes = False + , writerIncremental = False + , writerNumberSections = False + , writerIncludeBefore = "" + , writerIncludeAfter = "" + , writerStrictMarkdown = False + , writerReferenceLinks = False + , writerWrapText = True + } diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs new file mode 100644 index 000000000..16bdb9218 --- /dev/null +++ b/src/Text/Pandoc/UTF8.hs @@ -0,0 +1,45 @@ +-- | Functions for converting Unicode strings to UTF-8 and vice versa. +-- +-- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>. +-- (c) 2003, OGI School of Science & Engineering, Oregon Health and +-- Science University. +-- +-- Modified by Martin Norbaeck +-- to pass illegal UTF-8 sequences through unchanged. +module Text.Pandoc.UTF8 ( + fromUTF8, + toUTF8 + ) where + +-- From the Char module supplied with HBC. + +-- | Take a UTF-8 string and decode it into a Unicode string. +fromUTF8 :: String -> String +fromUTF8 "" = "" +fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker) +fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' && + '\x80' <= c' && c' <= '\xbf' = + toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs +fromUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' && + '\x80' <= c' && c' <= '\xbf' && + '\x80' <= c'' && c'' <= '\xbf' = + toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs +fromUTF8 (c:cs) = c : fromUTF8 cs + +-- | Take a Unicode string and encode it as a UTF-8 string. +toUTF8 :: String -> String +toUTF8 "" = "" +toUTF8 (c:cs) = + if c > '\x0000' && c < '\x0080' then + c : toUTF8 cs + else if c < toEnum 0x0800 then + let i = fromEnum c + in toEnum (0xc0 + i `div` 0x40) : + toEnum (0x80 + i `mod` 0x40) : + toUTF8 cs + else + let i = fromEnum c + in toEnum (0xe0 + i `div` 0x1000) : + toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : + toEnum (0x80 + i `mod` 0x40) : + toUTF8 cs diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs new file mode 100644 index 000000000..13912a9f3 --- /dev/null +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -0,0 +1,248 @@ +{- +Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.ConTeXt + Copyright : Copyright (C) 2007 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' format into ConTeXt. +-} +module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Printf ( printf ) +import Data.List ( (\\), intersperse ) +import Control.Monad.State + +type WriterState = Int -- number of next URL reference + +-- | Convert Pandoc to ConTeXt. +writeConTeXt :: WriterOptions -> Pandoc -> String +writeConTeXt options document = evalState (pandocToConTeXt options document) 1 + +pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String +pandocToConTeXt options (Pandoc meta blocks) = do + main <- blockListToConTeXt blocks + let body = writerIncludeBefore options ++ main ++ writerIncludeAfter options + head <- if writerStandalone options + then contextHeader options meta + else return "" + let toc = if writerTableOfContents options + then "\\placecontent\n\n" + else "" + let foot = if writerStandalone options + then "\n\\stoptext\n" + else "" + return $ head ++ toc ++ body ++ foot + +-- | Insert bibliographic information into ConTeXt header. +contextHeader :: WriterOptions -- ^ Options, including ConTeXt header + -> Meta -- ^ Meta with bibliographic information + -> State WriterState String +contextHeader options (Meta title authors date) = do + titletext <- if null title + then return "" + else inlineListToConTeXt title + let authorstext = if null authors + then "" + else if length authors == 1 + then stringToConTeXt $ head authors + else stringToConTeXt $ (joinWithSep ", " $ + init authors) ++ " & " ++ last authors + let datetext = if date == "" + then "" + else stringToConTeXt date + let titleblock = "\\doctitle{" ++ titletext ++ "}\n\ + \ \\author{" ++ authorstext ++ "}\n\ + \ \\date{" ++ datetext ++ "}\n\n" + let setupheads = if (writerNumberSections options) + then "\\setupheads[sectionnumber=yes, style=\\bf]\n" + else "\\setupheads[sectionnumber=no, style=\\bf]\n" + let header = writerHeader options + return $ header ++ setupheads ++ titleblock ++ "\\starttext\n\\maketitle\n\n" + +-- escape things as needed for ConTeXt + +escapeCharForConTeXt :: Char -> String +escapeCharForConTeXt ch = + case ch of + '{' -> "\\letteropenbrace{}" + '}' -> "\\letterclosebrace{}" + '\\' -> "\\letterbackslash{}" + '$' -> "\\$" + '|' -> "\\letterbar{}" + '^' -> "\\letterhat{}" + '%' -> "\\%" + '~' -> "\\lettertilde{}" + '&' -> "\\&" + '#' -> "\\#" + '<' -> "\\letterless{}" + '>' -> "\\lettermore{}" + '_' -> "\\letterunderscore{}" + x -> [x] + +-- | Escape string for ConTeXt +stringToConTeXt :: String -> String +stringToConTeXt = concatMap escapeCharForConTeXt + +-- | Convert Pandoc block element to ConTeXt. +blockToConTeXt :: Block -> State WriterState String +blockToConTeXt Null = return "" +blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n") +blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n") +blockToConTeXt (BlockQuote lst) = do + contents <- blockListToConTeXt lst + return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n" +blockToConTeXt (CodeBlock str) = + return $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n" +blockToConTeXt (RawHtml str) = return "" +blockToConTeXt (BulletList lst) = do + contents <- mapM listItemToConTeXt lst + return $ "\\startltxitem\n" ++ concat contents ++ "\\stopltxitem\n" +blockToConTeXt (OrderedList attribs lst) = case attribs of + (1, DefaultStyle, DefaultDelim) -> do + contents <- mapM listItemToConTeXt lst + return $ "\\startltxenum\n" ++ concat contents ++ "\\stopltxenum\n" + _ -> do + let markers = take (length lst) $ orderedListMarkers attribs + contents <- zipWithM orderedListItemToConTeXt markers lst + let markerWidth = maximum $ map length markers + let markerWidth' = if markerWidth < 3 + then "" + else "[width=" ++ + show ((markerWidth + 2) `div` 2) ++ "em]" + return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++ + "\\stopitemize\n" +blockToConTeXt (DefinitionList lst) = + mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat +blockToConTeXt HorizontalRule = return "\\thinrule\n\n" +blockToConTeXt (Header level lst) = do + contents <- inlineListToConTeXt lst + return $ if level > 0 && level <= 3 + then "\\" ++ concat (replicate (level - 1) "sub") ++ + "section{" ++ contents ++ "}\n\n" + else contents ++ "\n\n" +blockToConTeXt (Table caption aligns widths heads rows) = do + let colWidths = map printDecimal widths + let colDescriptor colWidth alignment = (case alignment of + AlignLeft -> 'l' + AlignRight -> 'r' + AlignCenter -> 'c' + AlignDefault -> 'l'): + "p(" ++ colWidth ++ "\\textwidth)|" + let colDescriptors = "|" ++ (concat $ + zipWith colDescriptor colWidths aligns) + headers <- tableRowToConTeXt heads + captionText <- inlineListToConTeXt caption + let captionText' = if null caption then "none" else captionText + rows' <- mapM tableRowToConTeXt rows + return $ "\\placetable[here]{" ++ captionText' ++ "}\n\\starttable[" ++ + colDescriptors ++ "]\n" ++ "\\HL\n" ++ headers ++ "\\HL\n" ++ + concat rows' ++ "\\HL\n\\stoptable\n\n" + +printDecimal :: Float -> String +printDecimal = printf "%.2f" + +tableRowToConTeXt cols = do + cols' <- mapM blockListToConTeXt cols + return $ "\\NC " ++ (concat $ intersperse "\\NC " cols') ++ "\\NC\\AR\n" + +listItemToConTeXt list = do + contents <- blockListToConTeXt list + return $ "\\item " ++ contents + +orderedListItemToConTeXt marker list = do + contents <- blockListToConTeXt list + return $ "\\sym{" ++ marker ++ "} " ++ contents + +defListItemToConTeXt (term, def) = do + term' <- inlineListToConTeXt term + def' <- blockListToConTeXt def + return $ "\\startdescr{" ++ term' ++ "}\n" ++ + def' ++ "\n\\stopdescr\n" + +-- | Convert list of block elements to ConTeXt. +blockListToConTeXt :: [Block] -> State WriterState String +blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat + +-- | Convert list of inline elements to ConTeXt. +inlineListToConTeXt :: [Inline] -- ^ Inlines to convert + -> State WriterState String +inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat + +isQuoted :: Inline -> Bool +isQuoted (Quoted _ _) = True +isQuoted Apostrophe = True +isQuoted _ = False + +-- | Convert inline element to ConTeXt +inlineToConTeXt :: Inline -- ^ Inline to convert + -> State WriterState String +inlineToConTeXt (Emph lst) = do + contents <- inlineListToConTeXt lst + return $ "{\\em " ++ contents ++ "}" +inlineToConTeXt (Strong lst) = do + contents <- inlineListToConTeXt lst + return $ "{\\bf " ++ contents ++ "}" +inlineToConTeXt (Strikeout lst) = do + contents <- inlineListToConTeXt lst + return $ "\\overstrikes{" ++ contents ++ "}" +inlineToConTeXt (Superscript lst) = do + contents <- inlineListToConTeXt lst + return $ "\\high{" ++ contents ++ "}" +inlineToConTeXt (Subscript lst) = do + contents <- inlineListToConTeXt lst + return $ "\\low{" ++ contents ++ "}" +inlineToConTeXt (Code str) = return $ "\\type{" ++ str ++ "}" +inlineToConTeXt (Quoted SingleQuote lst) = do + contents <- inlineListToConTeXt lst + return $ "\\quote{" ++ contents ++ "}" +inlineToConTeXt (Quoted DoubleQuote lst) = do + contents <- inlineListToConTeXt lst + return $ "\\quotation{" ++ contents ++ "}" +inlineToConTeXt Apostrophe = return "'" +inlineToConTeXt EmDash = return "---" +inlineToConTeXt EnDash = return "--" +inlineToConTeXt Ellipses = return "\\ldots{}" +inlineToConTeXt (Str str) = return $ stringToConTeXt str +inlineToConTeXt (TeX str) = return str +inlineToConTeXt (HtmlInline str) = return "" +inlineToConTeXt (LineBreak) = return "\\crlf\n" +inlineToConTeXt Space = return " " +inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own + inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links... +inlineToConTeXt (Link text (src, _)) = do + next <- get + put (next + 1) + let ref = show next + label <- inlineListToConTeXt text + return $ "\\useurl[" ++ ref ++ "][" ++ src ++ "][][" ++ label ++ + "]\\from[" ++ ref ++ "]" +inlineToConTeXt (Image alternate (src, tit)) = do + alt <- inlineListToConTeXt alternate + return $ "\\placefigure\n[]\n[fig:" ++ alt ++ "]\n{" ++ + tit ++ "}\n{\\externalfigure[" ++ src ++ "]}" +inlineToConTeXt (Note contents) = do + contents' <- blockListToConTeXt contents + return $ "\\footnote{" ++ contents' ++ "}" + diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs new file mode 100644 index 000000000..13dc8585d --- /dev/null +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -0,0 +1,299 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Docbook + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to Docbook XML. +-} +module Text.Pandoc.Writers.Docbook ( writeDocbook) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Data.List ( isPrefixOf, drop ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +-- +-- code to format XML +-- + +-- | Escape one character as needed for XML. +escapeCharForXML :: Char -> String +escapeCharForXML x = case x of + '&' -> "&" + '<' -> "<" + '>' -> ">" + '"' -> """ + '\160' -> " " + c -> [c] + +-- | True if the character needs to be escaped. +needsEscaping :: Char -> Bool +needsEscaping c = c `elem` "&<>\"\160" + +-- | Escape string as needed for XML. Entity references are not preserved. +escapeStringForXML :: String -> String +escapeStringForXML "" = "" +escapeStringForXML str = + case break needsEscaping str of + (okay, "") -> okay + (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs + +-- | Return a text object with a string of formatted XML attributes. +attributeList :: [(String, String)] -> Doc +attributeList = text . concatMap + (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++ + escapeStringForXML b ++ "\"") + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes and (if specified) indentation. +inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc +inTags isIndented tagType attribs contents = + let openTag = char '<' <> text tagType <> attributeList attribs <> + char '>' + closeTag = text "</" <> text tagType <> char '>' + in if isIndented + then openTag $$ nest 2 contents $$ closeTag + else openTag <> contents <> closeTag + +-- | Return a self-closing tag of tagType with specified attributes +selfClosingTag :: String -> [(String, String)] -> Doc +selfClosingTag tagType attribs = + char '<' <> text tagType <> attributeList attribs <> text " />" + +-- | Put the supplied contents between start and end tags of tagType. +inTagsSimple :: String -> Doc -> Doc +inTagsSimple tagType = inTags False tagType [] + +-- | Put the supplied contents in indented block btw start and end tags. +inTagsIndented :: String -> Doc -> Doc +inTagsIndented tagType = inTags True tagType [] + +-- +-- Docbook writer +-- + +-- | Convert list of authors to a docbook <author> section +authorToDocbook :: [Char] -> Doc +authorToDocbook name = inTagsIndented "author" $ + if ',' `elem` name + then -- last name first + let (lastname, rest) = break (==',') name + firstname = removeLeadingSpace rest in + inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> + inTagsSimple "surname" (text $ escapeStringForXML lastname) + else -- last name last + let namewords = words name + lengthname = length namewords + (firstname, lastname) = case lengthname of + 0 -> ("","") + 1 -> ("", name) + n -> (joinWithSep " " (take (n-1) namewords), last namewords) + in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (text $ escapeStringForXML lastname) + +-- | Convert Pandoc document to string in Docbook format. +writeDocbook :: WriterOptions -> Pandoc -> String +writeDocbook opts (Pandoc (Meta title authors date) blocks) = + let head = if writerStandalone opts + then text (writerHeader opts) + else empty + meta = if writerStandalone opts + then inTagsIndented "articleinfo" $ + (inTagsSimple "title" (wrap opts title)) $$ + (vcat (map authorToDocbook authors)) $$ + (inTagsSimple "date" (text $ escapeStringForXML date)) + else empty + elements = hierarchicalize blocks + before = writerIncludeBefore opts + after = writerIncludeAfter opts + body = (if null before then empty else text before) $$ + vcat (map (elementToDocbook opts) elements) $$ + (if null after then empty else text after) + body' = if writerStandalone opts + then inTagsIndented "article" (meta $$ body) + else body + in render $ head $$ body' $$ text "" + +-- | Convert an Element to Docbook. +elementToDocbook :: WriterOptions -> Element -> Doc +elementToDocbook opts (Blk block) = blockToDocbook opts block +elementToDocbook opts (Sec title elements) = + -- Docbook doesn't allow sections with no content, so insert some if needed + let elements' = if null elements + then [Blk (Para [])] + else elements + in inTagsIndented "section" $ + inTagsSimple "title" (wrap opts title) $$ + vcat (map (elementToDocbook opts) elements') + +-- | Convert a list of Pandoc blocks to Docbook. +blocksToDocbook :: WriterOptions -> [Block] -> Doc +blocksToDocbook opts = vcat . map (blockToDocbook opts) + +-- | Auxiliary function to convert Plain block to Para. +plainToPara (Plain x) = Para x +plainToPara x = x + +-- | Convert a list of pairs of terms and definitions into a list of +-- Docbook varlistentrys. +deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc +deflistItemsToDocbook opts items = + vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items + +-- | Convert a term and a list of blocks into a Docbook varlistentry. +deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc +deflistItemToDocbook opts term def = + let def' = map plainToPara def + in inTagsIndented "varlistentry" $ + inTagsIndented "term" (inlinesToDocbook opts term) $$ + inTagsIndented "listitem" (blocksToDocbook opts def') + +-- | Convert a list of lists of blocks to a list of Docbook list items. +listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc +listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items + +-- | Convert a list of blocks into a Docbook list item. +listItemToDocbook :: WriterOptions -> [Block] -> Doc +listItemToDocbook opts item = + inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item + +-- | Convert a Pandoc block element to Docbook. +blockToDocbook :: WriterOptions -> Block -> Doc +blockToDocbook opts Null = empty +blockToDocbook opts (Plain lst) = wrap opts lst +blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst +blockToDocbook opts (BlockQuote blocks) = + inTagsIndented "blockquote" $ blocksToDocbook opts blocks +blockToDocbook opts (CodeBlock str) = + text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>" +blockToDocbook opts (BulletList lst) = + inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst +blockToDocbook opts (OrderedList _ []) = empty +blockToDocbook opts (OrderedList (start, numstyle, numdelim) (first:rest)) = + let attribs = case numstyle of + DefaultStyle -> [] + Decimal -> [("numeration", "arabic")] + UpperAlpha -> [("numeration", "upperalpha")] + LowerAlpha -> [("numeration", "loweralpha")] + UpperRoman -> [("numeration", "upperroman")] + LowerRoman -> [("numeration", "lowerroman")] + items = if start == 1 + then listItemsToDocbook opts (first:rest) + else (inTags True "listitem" [("override",show start)] + (blocksToDocbook opts $ map plainToPara first)) $$ + listItemsToDocbook opts rest + in inTags True "orderedlist" attribs items +blockToDocbook opts (DefinitionList lst) = + inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst +blockToDocbook opts (RawHtml str) = text str -- raw XML block +blockToDocbook opts HorizontalRule = empty -- not semantic +blockToDocbook opts (Table caption aligns widths headers rows) = + let alignStrings = map alignmentToString aligns + captionDoc = if null caption + then empty + else inTagsIndented "caption" + (inlinesToDocbook opts caption) + tableType = if isEmpty captionDoc then "informaltable" else "table" + in inTagsIndented tableType $ captionDoc $$ + (colHeadsToDocbook opts alignStrings widths headers) $$ + (vcat $ map (tableRowToDocbook opts alignStrings) rows) + +colHeadsToDocbook opts alignStrings widths headers = + let heads = zipWith3 (\align width item -> + tableItemToDocbook opts "th" align width item) + alignStrings widths headers + in inTagsIndented "tr" $ vcat heads + +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToDocbook opts aligns cols = inTagsIndented "tr" $ + vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols + +tableItemToDocbook opts tag align width item = + let attrib = [("align", align)] ++ + if width /= 0 + then [("style", "{width: " ++ + show (truncate (100*width)) ++ "%;}")] + else [] + in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item + +-- | Take list of inline elements and return wrapped doc. +wrap :: WriterOptions -> [Inline] -> Doc +wrap opts lst = if writerWrapText opts + then fsep $ map (inlinesToDocbook opts) (splitBy Space lst) + else inlinesToDocbook opts lst + +-- | Convert a list of inline elements to Docbook. +inlinesToDocbook :: WriterOptions -> [Inline] -> Doc +inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst + +-- | Convert an inline element to Docbook. +inlineToDocbook :: WriterOptions -> Inline -> Doc +inlineToDocbook opts (Str str) = text $ escapeStringForXML str +inlineToDocbook opts (Emph lst) = + inTagsSimple "emphasis" $ inlinesToDocbook opts lst +inlineToDocbook opts (Strong lst) = + inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst +inlineToDocbook opts (Strikeout lst) = + inTags False "emphasis" [("role", "strikethrough")] $ + inlinesToDocbook opts lst +inlineToDocbook opts (Superscript lst) = + inTagsSimple "superscript" $ inlinesToDocbook opts lst +inlineToDocbook opts (Subscript lst) = + inTagsSimple "subscript" $ inlinesToDocbook opts lst +inlineToDocbook opts (Quoted _ lst) = + inTagsSimple "quote" $ inlinesToDocbook opts lst +inlineToDocbook opts Apostrophe = char '\'' +inlineToDocbook opts Ellipses = text "…" +inlineToDocbook opts EmDash = text "—" +inlineToDocbook opts EnDash = text "–" +inlineToDocbook opts (Code str) = + inTagsSimple "literal" $ text (escapeStringForXML str) +inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str) +inlineToDocbook opts (HtmlInline str) = empty +inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>" +inlineToDocbook opts Space = char ' ' +inlineToDocbook opts (Link txt (src, tit)) = + if isPrefixOf "mailto:" src + then let src' = drop 7 src + emailLink = inTagsSimple "email" $ text $ + escapeStringForXML $ src' + in if txt == [Code src'] + then emailLink + else inlinesToDocbook opts txt <+> char '(' <> emailLink <> + char ')' + else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt +inlineToDocbook opts (Image alt (src, tit)) = + let titleDoc = if null tit + then empty + else inTagsIndented "objectinfo" $ + inTagsIndented "title" (text $ escapeStringForXML tit) + in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ + titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] +inlineToDocbook opts (Note contents) = + inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs new file mode 100644 index 000000000..7ec95d8ef --- /dev/null +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -0,0 +1,458 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.HTML + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to HTML. +-} +module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where +import Text.Pandoc.Definition +import Text.Pandoc.ASCIIMathML +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) +import Text.Pandoc.Shared +import Text.Regex ( mkRegex, matchRegex ) +import Numeric ( showHex ) +import Data.Char ( ord, toLower ) +import Data.List ( isPrefixOf, intersperse ) +import qualified Data.Set as S +import Control.Monad.State +import Text.XHtml.Transitional + +data WriterState = WriterState + { stNotes :: [Html] -- ^ List of notes + , stIds :: [String] -- ^ List of header identifiers + , stMath :: Bool -- ^ Math is used in document + , stCSS :: S.Set String -- ^ CSS to include in header + } deriving Show + +defaultWriterState :: WriterState +defaultWriterState = WriterState {stNotes= [], stIds = [], + stMath = False, stCSS = S.empty} + +-- Helpers to render HTML with the appropriate function. +render opts = if writerWrapText opts then renderHtml else showHtml +renderFragment opts = if writerWrapText opts + then renderHtmlFragment + else showHtmlFragment + +-- | Convert Pandoc document to Html string. +writeHtmlString :: WriterOptions -> Pandoc -> String +writeHtmlString opts = + if writerStandalone opts + then render opts . writeHtml opts + else renderFragment opts . writeHtml opts + +-- | Convert Pandoc document to Html structure. +writeHtml :: WriterOptions -> Pandoc -> Html +writeHtml opts (Pandoc (Meta tit authors date) blocks) = + let titlePrefix = writerTitlePrefix opts + topTitle = evalState (inlineListToHtml opts tit) defaultWriterState + topTitle' = if null titlePrefix + then topTitle + else titlePrefix +++ " - " +++ topTitle + metadata = thetitle topTitle' +++ + meta ! [httpequiv "Content-Type", + content "text/html; charset=UTF-8"] +++ + meta ! [name "generator", content "pandoc"] +++ + (toHtmlFromList $ + map (\a -> meta ! [name "author", content a]) authors) +++ + (if null date + then noHtml + else meta ! [name "date", content date]) + titleHeader = if writerStandalone opts && not (null tit) && + not (writerS5 opts) + then h1 ! [theclass "title"] $ topTitle + else noHtml + headerBlocks = filter isHeaderBlock blocks + ids = uniqueIdentifiers $ + map (\(Header _ lst) -> lst) headerBlocks + toc = if writerTableOfContents opts + then tableOfContents opts headerBlocks ids + else noHtml + (blocks', newstate) = + runState (blockListToHtml opts blocks) + (defaultWriterState {stIds = ids}) + cssLines = stCSS newstate + css = if S.null cssLines + then noHtml + else style ! [thetype "text/css"] $ primHtml $ + '\n':(unlines $ S.toList cssLines) + math = if stMath newstate + then case writerASCIIMathMLURL opts of + Just path -> script ! [src path, + thetype "text/javascript"] $ + noHtml + Nothing -> primHtml asciiMathMLScript + else noHtml + head = header $ metadata +++ math +++ css +++ + primHtml (writerHeader opts) + notes = reverse (stNotes newstate) + before = primHtml $ writerIncludeBefore opts + after = primHtml $ writerIncludeAfter opts + thebody = before +++ titleHeader +++ toc +++ blocks' +++ + footnoteSection opts notes +++ after + in if writerStandalone opts + then head +++ body thebody + else thebody + +-- | Construct table of contents from list of header blocks and identifiers. +-- Assumes there are as many identifiers as header blocks. +tableOfContents :: WriterOptions -> [Block] -> [String] -> Html +tableOfContents _ [] _ = noHtml +tableOfContents opts headers ids = + let opts' = opts { writerIgnoreNotes = True } + contentsTree = hierarchicalize headers + contents = evalState (mapM (elementToListItem opts') contentsTree) + (defaultWriterState {stIds = ids}) + in thediv ! [identifier "toc"] $ unordList contents + +-- | Converts an Element to a list item for a table of contents, +-- retrieving the appropriate identifier from state. +elementToListItem :: WriterOptions -> Element -> State WriterState Html +elementToListItem opts (Blk _) = return noHtml +elementToListItem opts (Sec headerText subsecs) = do + st <- get + let ids = stIds st + let (id, rest) = if null ids + then ("", []) + else (head ids, tail ids) + put $ st {stIds = rest} + txt <- inlineListToHtml opts headerText + subHeads <- mapM (elementToListItem opts) subsecs + let subList = if null subHeads + then noHtml + else unordList subHeads + return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++ + subList + +-- | Convert list of Note blocks to a footnote <div>. +-- Assumes notes are sorted. +footnoteSection :: WriterOptions -> [Html] -> Html +footnoteSection opts notes = + if null notes + then noHtml + else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) + +-- | Obfuscate a "mailto:" link using Javascript. +obfuscateLink :: WriterOptions -> String -> String -> Html +obfuscateLink opts text src = + let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$" + src' = map toLower src + in case (matchRegex emailRegex src') of + (Just [name, domain]) -> + let domain' = substitute "." " dot " domain + at' = obfuscateChar '@' + (linkText, altText) = + if text == drop 7 src' -- autolink + then ("'<code>'+e+'</code>'", name ++ " at " ++ domain') + else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++ + domain' ++ ")") + in if writerStrictMarkdown opts + then -- need to use primHtml or &'s are escaped to & in URL + primHtml $ "<a href=\"" ++ (obfuscateString src') + ++ "\">" ++ (obfuscateString text) ++ "</a>" + else (script ! [thetype "text/javascript"] $ + primHtml ("\n<!--\nh='" ++ + obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ + obfuscateString name ++ "';e=n+a+h;\n" ++ + "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++ + linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ + noscript (primHtml $ obfuscateString altText) + _ -> anchor ! [href src] $ primHtml text -- malformed email + +-- | Obfuscate character as entity. +obfuscateChar :: Char -> String +obfuscateChar char = + let num = ord char + numstr = if even num then show num else "x" ++ showHex num "" + in "&#" ++ numstr ++ ";" + +-- | Obfuscate string using entities. +obfuscateString :: String -> String +obfuscateString = concatMap obfuscateChar . decodeCharacterReferences + +-- | True if character is a punctuation character (unicode). +isPunctuation :: Char -> Bool +isPunctuation c = + let c' = ord c + in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || + c' >= 0xE000 && c' <= 0xE0FF + then True + else False + +-- | Add CSS for document header. +addToCSS :: String -> State WriterState () +addToCSS item = do + st <- get + let current = stCSS st + put $ st {stCSS = S.insert item current} + +-- | Convert Pandoc inline list to plain text identifier. +inlineListToIdentifier :: [Inline] -> String +inlineListToIdentifier [] = "" +inlineListToIdentifier (x:xs) = + xAsText ++ inlineListToIdentifier xs + where xAsText = case x of + Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ + concat $ intersperse "-" $ words $ map toLower s + Emph lst -> inlineListToIdentifier lst + Strikeout lst -> inlineListToIdentifier lst + Superscript lst -> inlineListToIdentifier lst + Subscript lst -> inlineListToIdentifier lst + Strong lst -> inlineListToIdentifier lst + Quoted _ lst -> inlineListToIdentifier lst + Code s -> s + Space -> "-" + EmDash -> "-" + EnDash -> "-" + Apostrophe -> "" + Ellipses -> "" + LineBreak -> "-" + TeX _ -> "" + HtmlInline _ -> "" + Link lst _ -> inlineListToIdentifier lst + Image lst _ -> inlineListToIdentifier lst + Note _ -> "" + +-- | Return unique identifiers for list of inline lists. +uniqueIdentifiers :: [[Inline]] -> [String] +uniqueIdentifiers ls = + let addIdentifier (nonuniqueIds, uniqueIds) l = + let new = inlineListToIdentifier l + matches = length $ filter (== new) nonuniqueIds + new' = new ++ if matches > 0 then ("-" ++ show matches) else "" + in (new:nonuniqueIds, new':uniqueIds) + in reverse $ snd $ foldl addIdentifier ([],[]) ls + +-- | Convert Pandoc block element to HTML. +blockToHtml :: WriterOptions -> Block -> State WriterState Html +blockToHtml opts Null = return $ noHtml +blockToHtml opts (Plain lst) = inlineListToHtml opts lst +blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) +blockToHtml opts (RawHtml str) = return $ primHtml str +blockToHtml opts (HorizontalRule) = return $ hr +blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n") + -- the final \n for consistency with Markdown.pl +blockToHtml opts (BlockQuote blocks) = + -- in S5, treat list in blockquote specially + -- if default is incremental, make it nonincremental; + -- otherwise incremental + if writerS5 opts + then let inc = not (writerIncremental opts) in + case blocks of + [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) + (BulletList lst) + [OrderedList attribs lst] -> + blockToHtml (opts {writerIncremental = inc}) + (OrderedList attribs lst) + otherwise -> blockListToHtml opts blocks >>= + (return . blockquote) + else blockListToHtml opts blocks >>= (return . blockquote) +blockToHtml opts (Header level lst) = do + contents <- inlineListToHtml opts lst + st <- get + let ids = stIds st + let (id, rest) = if null ids + then ("", []) + else (head ids, tail ids) + put $ st {stIds = rest} + let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts) + then [] + else [identifier id] + let contents' = if writerTableOfContents opts + then anchor ! [href ("#TOC-" ++ id)] $ contents + else contents + return $ case level of + 1 -> h1 contents' ! attribs + 2 -> h2 contents' ! attribs + 3 -> h3 contents' ! attribs + 4 -> h4 contents' ! attribs + 5 -> h5 contents' ! attribs + 6 -> h6 contents' ! attribs + _ -> paragraph contents' ! attribs +blockToHtml opts (BulletList lst) = do + contents <- mapM (blockListToHtml opts) lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ unordList ! attribs $ contents +blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do + contents <- mapM (blockListToHtml opts) lst + let numstyle' = camelCaseToHyphenated $ show numstyle + let attribs = (if writerIncremental opts + then [theclass "incremental"] + else []) ++ + (if startnum /= 1 + then [start startnum] + else []) ++ + (if numstyle /= DefaultStyle + then [theclass numstyle'] + else []) + if numstyle /= DefaultStyle + then addToCSS $ "ol." ++ numstyle' ++ + " { list-style-type: " ++ + numstyle' ++ "; }" + else return () + return $ ordList ! attribs $ contents +blockToHtml opts (DefinitionList lst) = do + contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term + def' <- blockListToHtml opts def + return $ (term', def')) lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ defList ! attribs $ contents +blockToHtml opts (Table capt aligns widths headers rows) = do + let alignStrings = map alignmentToString aligns + captionDoc <- if null capt + then return noHtml + else inlineListToHtml opts capt >>= return . caption + colHeads <- colHeadsToHtml opts alignStrings + widths headers + rows' <- mapM (tableRowToHtml opts alignStrings) rows + return $ table $ captionDoc +++ colHeads +++ rows' + +colHeadsToHtml opts alignStrings widths headers = do + heads <- sequence $ zipWith3 + (\align width item -> tableItemToHtml opts th align width item) + alignStrings widths headers + return $ tr $ toHtmlFromList heads + +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToHtml opts aligns cols = + (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>= + return . tr . toHtmlFromList + +tableItemToHtml opts tag align' width item = do + contents <- blockListToHtml opts item + let attrib = [align align'] ++ + if width /= 0 + then [thestyle ("width: " ++ show (truncate (100*width)) ++ + "%;")] + else [] + return $ tag ! attrib $ contents + +blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html +blockListToHtml opts lst = + mapM (blockToHtml opts) lst >>= return . toHtmlFromList + +-- | Convert list of Pandoc inline elements to HTML. +inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html +inlineListToHtml opts lst = + mapM (inlineToHtml opts) lst >>= return . toHtmlFromList + +-- | Convert Pandoc inline element to HTML. +inlineToHtml :: WriterOptions -> Inline -> State WriterState Html +inlineToHtml opts inline = + case inline of + (Str str) -> return $ stringToHtml str + (Space) -> return $ stringToHtml " " + (LineBreak) -> return $ br + (EmDash) -> return $ primHtmlChar "mdash" + (EnDash) -> return $ primHtmlChar "ndash" + (Ellipses) -> return $ primHtmlChar "hellip" + (Apostrophe) -> return $ primHtmlChar "rsquo" + (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize + (Strong lst) -> inlineListToHtml opts lst >>= return . strong + (Code str) -> return $ thecode << str + (Strikeout lst) -> addToCSS + ".strikeout { text-decoration: line-through; }" >> + inlineListToHtml opts lst >>= + return . (thespan ! [theclass "strikeout"]) + (Superscript lst) -> inlineListToHtml opts lst >>= return . sup + (Subscript lst) -> inlineListToHtml opts lst >>= return . sub + (Quoted quoteType lst) -> + let (leftQuote, rightQuote) = case quoteType of + SingleQuote -> (primHtmlChar "lsquo", + primHtmlChar "rsquo") + DoubleQuote -> (primHtmlChar "ldquo", + primHtmlChar "rdquo") + in do contents <- inlineListToHtml opts lst + return $ leftQuote +++ contents +++ rightQuote + (TeX str) -> (if writerUseASCIIMathML opts + then modify (\st -> st {stMath = True}) + else return ()) >> return (stringToHtml str) + (HtmlInline str) -> return $ primHtml str + (Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src -> + return $ obfuscateLink opts str src + (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do + linkText <- inlineListToHtml opts txt + return $ obfuscateLink opts (show linkText) src + (Link txt (src,tit)) -> do + linkText <- inlineListToHtml opts txt + return $ anchor ! ([href src] ++ + if null tit then [] else [title tit]) $ + linkText + (Image txt (source,tit)) -> do + alternate <- inlineListToHtml opts txt + let alternate' = renderFragment opts alternate + let attributes = [src source] ++ + (if null tit + then [] + else [title tit]) ++ + if null txt + then [] + else [alt alternate'] + return $ image ! attributes + -- note: null title included, as in Markdown.pl + (Note contents) -> do + st <- get + let notes = stNotes st + let number = (length notes) + 1 + let ref = show number + htmlContents <- blockListToNote opts ref contents + -- push contents onto front of notes + put $ st {stNotes = (htmlContents:notes)} + return $ anchor ! [href ("#fn" ++ ref), + theclass "footnoteRef", + identifier ("fnref" ++ ref)] << + sup << ref + +blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html +blockListToNote opts ref blocks = + -- If last block is Para or Plain, include the backlink at the end of + -- that block. Otherwise, insert a new Plain block with the backlink. + let backlink = [HtmlInline $ " <a href=\"#fnref" ++ ref ++ + "\" class=\"footnoteBackLink\"" ++ + " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] + blocks' = if null blocks + then [] + else let lastBlock = last blocks + otherBlocks = init blocks + in case lastBlock of + (Para lst) -> otherBlocks ++ + [Para (lst ++ backlink)] + (Plain lst) -> otherBlocks ++ + [Plain (lst ++ backlink)] + _ -> otherBlocks ++ [lastBlock, + Plain backlink] + in do contents <- blockListToHtml opts blocks' + return $ li ! [identifier ("fn" ++ ref)] $ contents + diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs new file mode 100644 index 000000000..f64e06e24 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -0,0 +1,310 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.LaTeX + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' format into LaTeX. +-} +module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Printf ( printf ) +import Data.List ( (\\), isInfixOf, isSuffixOf, intersperse ) +import Data.Char ( toLower ) +import qualified Data.Set as S +import Control.Monad.State +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +data WriterState = + WriterState { stIncludes :: S.Set String -- strings to include in header + , stInNote :: Bool -- @True@ if we're in a note + , stOLLevel :: Int } -- level of ordered list nesting + +-- | Add line to header. +addToHeader :: String -> State WriterState () +addToHeader str = do + st <- get + let includes = stIncludes st + put st {stIncludes = S.insert str includes} + +-- | Convert Pandoc to LaTeX. +writeLaTeX :: WriterOptions -> Pandoc -> String +writeLaTeX options document = + render $ evalState (pandocToLaTeX options document) $ + WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1 } + +pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToLaTeX options (Pandoc meta blocks) = do + main <- blockListToLaTeX blocks + head <- if writerStandalone options + then latexHeader options meta + else return empty + let before = if null (writerIncludeBefore options) + then empty + else text (writerIncludeBefore options) + let after = if null (writerIncludeAfter options) + then empty + else text (writerIncludeAfter options) + let body = before $$ main $$ after + let toc = if writerTableOfContents options + then text "\\tableofcontents\n" + else empty + let foot = if writerStandalone options + then text "\\end{document}" + else empty + return $ head $$ toc $$ body $$ foot + +-- | Insert bibliographic information into LaTeX header. +latexHeader :: WriterOptions -- ^ Options, including LaTeX header + -> Meta -- ^ Meta with bibliographic information + -> State WriterState Doc +latexHeader options (Meta title authors date) = do + titletext <- if null title + then return empty + else inlineListToLaTeX title >>= return . inCmd "title" + headerIncludes <- get >>= return . S.toList . stIncludes + let extras = text $ unlines headerIncludes + let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes + then text "\\VerbatimFootnotes % allows verbatim text in footnotes" + else empty + let authorstext = text $ "\\author{" ++ + joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}" + let datetext = if date == "" + then empty + else text $ "\\date{" ++ stringToLaTeX date ++ "}" + let maketitle = if null title then empty else text "\\maketitle" + let secnumline = if (writerNumberSections options) + then empty + else text "\\setcounter{secnumdepth}{0}" + let baseHeader = text $ writerHeader options + let header = baseHeader $$ extras + return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$ + datetext $$ text "\\begin{document}" $$ maketitle $$ text "" + +-- escape things as needed for LaTeX + +stringToLaTeX :: String -> String +stringToLaTeX = escapeStringUsing latexEscapes + where latexEscapes = backslashEscapes "{}$%&_#" ++ + [ ('^', "\\^{}") + , ('\\', "\\textbackslash{}") + , ('~', "\\ensuremath{\\sim}") + , ('|', "\\textbar{}") + , ('<', "\\textless{}") + , ('>', "\\textgreater{}") + ] + +-- | Puts contents into LaTeX command. +inCmd :: String -> Doc -> Doc +inCmd cmd contents = char '\\' <> text cmd <> braces contents + +-- | Remove all code elements from list of inline elements +-- (because it's illegal to have verbatim inside some command arguments) +deVerb :: [Inline] -> [Inline] +deVerb [] = [] +deVerb ((Code str):rest) = + (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest) +deVerb (other:rest) = other:(deVerb rest) + +-- | Convert Pandoc block element to LaTeX. +blockToLaTeX :: Block -- ^ Block to convert + -> State WriterState Doc +blockToLaTeX Null = return empty +blockToLaTeX (Plain lst) = wrapped inlineListToLaTeX lst >>= return +blockToLaTeX (Para lst) = + wrapped inlineListToLaTeX lst >>= return . (<> char '\n') +blockToLaTeX (BlockQuote lst) = do + contents <- blockListToLaTeX lst + return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}" +blockToLaTeX (CodeBlock str) = do + st <- get + env <- if stInNote st + then do addToHeader "\\usepackage{fancyvrb}" + return "Verbatim" + else return "verbatim" + return $ text ("\\begin{" ++ env ++ "}\n") <> text str <> + text ("\n\\end{" ++ env ++ "}") +blockToLaTeX (RawHtml str) = return empty +blockToLaTeX (BulletList lst) = do + items <- mapM listItemToLaTeX lst + return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}" +blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do + st <- get + let oldlevel = stOLLevel st + put $ st {stOLLevel = oldlevel + 1} + items <- mapM listItemToLaTeX lst + modify (\st -> st {stOLLevel = oldlevel}) + exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim + then do addToHeader "\\usepackage{enumerate}" + return $ char '[' <> + text (head (orderedListMarkers (1, numstyle, + numdelim))) <> char ']' + else return empty + let resetcounter = if start /= 1 && oldlevel <= 4 + then text $ "\\setcounter{enum" ++ + map toLower (toRomanNumeral oldlevel) ++ + "}{" ++ show (start - 1) ++ "}" + else empty + return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$ + vcat items $$ text "\\end{enumerate}" +blockToLaTeX (DefinitionList lst) = do + items <- mapM defListItemToLaTeX lst + return $ text "\\begin{description}" $$ vcat items $$ + text "\\end{description}" +blockToLaTeX HorizontalRule = return $ text $ + "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n" +blockToLaTeX (Header level lst) = do + txt <- inlineListToLaTeX (deVerb lst) + return $ if (level > 0) && (level <= 3) + then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++ + "section{") <> txt <> text "}\n" + else txt <> char '\n' +blockToLaTeX (Table caption aligns widths heads rows) = do + headers <- tableRowToLaTeX heads + captionText <- inlineListToLaTeX caption + rows' <- mapM tableRowToLaTeX rows + let colWidths = map (printf "%.2f") widths + let colDescriptors = concat $ zipWith + (\width align -> ">{\\PBS" ++ + (case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright") ++ + "\\hspace{0pt}}p{" ++ width ++ + "\\columnwidth}") + colWidths aligns + let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$ + headers $$ text "\\hline" $$ vcat rows' $$ + text "\\end{tabular}" + let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}" + addToHeader "\\usepackage{array}\n\ + \% This is needed because raggedright in table elements redefines \\\\:\n\ + \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\ + \\\let\\PBS=\\PreserveBackslash" + return $ if isEmpty captionText + then centered tableBody <> char '\n' + else text "\\begin{table}[h]" $$ centered tableBody $$ + inCmd "caption" captionText $$ text "\\end{table}\n" + +blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat + +tableRowToLaTeX cols = mapM blockListToLaTeX cols >>= + return . ($$ text "\\\\") . foldl (\row item -> row $$ + (if isEmpty row then empty else text " & ") <> item) empty + +listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item " $$) . + (nest 2) + +defListItemToLaTeX (term, def) = do + term' <- inlineListToLaTeX $ deVerb term + def' <- blockListToLaTeX def + return $ text "\\item[" <> term' <> text "]" $$ def' + +-- | Convert list of inline elements to LaTeX. +inlineListToLaTeX :: [Inline] -- ^ Inlines to convert + -> State WriterState Doc +inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat + +isQuoted :: Inline -> Bool +isQuoted (Quoted _ _) = True +isQuoted Apostrophe = True +isQuoted _ = False + +-- | Convert inline element to LaTeX +inlineToLaTeX :: Inline -- ^ Inline to convert + -> State WriterState Doc +inlineToLaTeX (Emph lst) = + inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph" +inlineToLaTeX (Strong lst) = + inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf" +inlineToLaTeX (Strikeout lst) = do + contents <- inlineListToLaTeX $ deVerb lst + addToHeader "\\usepackage[normalem]{ulem}" + return $ inCmd "sout" contents +inlineToLaTeX (Superscript lst) = + inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript" +inlineToLaTeX (Subscript lst) = do + contents <- inlineListToLaTeX $ deVerb lst + -- oddly, latex includes \textsuperscript but not \textsubscript + -- so we have to define it: + addToHeader "\\newcommand{\\textsubscript}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}" + return $ inCmd "textsubscript" contents +inlineToLaTeX (Code str) = do + st <- get + if stInNote st + then do addToHeader "\\usepackage{fancyvrb}" + else return () + let chr = ((enumFromTo '!' '~') \\ str) !! 0 + return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] +inlineToLaTeX (Quoted SingleQuote lst) = do + contents <- inlineListToLaTeX lst + let s1 = if (not (null lst)) && (isQuoted (head lst)) + then text "\\," + else empty + let s2 = if (not (null lst)) && (isQuoted (last lst)) + then text "\\," + else empty + return $ char '`' <> s1 <> contents <> s2 <> char '\'' +inlineToLaTeX (Quoted DoubleQuote lst) = do + contents <- inlineListToLaTeX lst + let s1 = if (not (null lst)) && (isQuoted (head lst)) + then text "\\," + else empty + let s2 = if (not (null lst)) && (isQuoted (last lst)) + then text "\\," + else empty + return $ text "``" <> s1 <> contents <> s2 <> text "''" +inlineToLaTeX Apostrophe = return $ char '\'' +inlineToLaTeX EmDash = return $ text "---" +inlineToLaTeX EnDash = return $ text "--" +inlineToLaTeX Ellipses = return $ text "\\ldots{}" +inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str +inlineToLaTeX (TeX str) = return $ text str +inlineToLaTeX (HtmlInline str) = return empty +inlineToLaTeX (LineBreak) = return $ text "\\\\" +inlineToLaTeX Space = return $ char ' ' +inlineToLaTeX (Link txt (src, _)) = do + addToHeader "\\usepackage[breaklinks=true]{hyperref}" + case txt of + [Code x] | x == src -> -- autolink + do addToHeader "\\usepackage{url}" + return $ text $ "\\url{" ++ x ++ "}" + _ -> do contents <- inlineListToLaTeX $ deVerb txt + return $ text ("\\href{" ++ src ++ "}{") <> contents <> + char '}' +inlineToLaTeX (Image alternate (source, tit)) = do + addToHeader "\\usepackage{graphicx}" + return $ text $ "\\includegraphics{" ++ source ++ "}" +inlineToLaTeX (Note contents) = do + st <- get + put (st {stInNote = True}) + contents' <- blockListToLaTeX contents + modify (\st -> st {stInNote = False}) + let rawnote = stripTrailingNewlines $ render contents' + -- note: a \n before } is needed when note ends with a Verbatim environment + let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote + return $ text "%\n\\footnote{" <> + text rawnote <> (if optNewline then char '\n' else empty) <> char '}' diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs new file mode 100644 index 000000000..8e14c2bf0 --- /dev/null +++ b/src/Text/Pandoc/Writers/Man.hs @@ -0,0 +1,293 @@ +{- +Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Man + Copyright : Copyright (C) 2007 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to groff man page format. + +-} +module Text.Pandoc.Writers.Man ( writeMan) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Printf ( printf ) +import Data.List ( isPrefixOf, drop, nub, intersperse ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Control.Monad.State + +type Notes = [[Block]] +type Preprocessors = [String] -- e.g. "t" for tbl +type WriterState = (Notes, Preprocessors) + +-- | Convert Pandoc to Man. +writeMan :: WriterOptions -> Pandoc -> String +writeMan opts document = render $ evalState (pandocToMan opts document) ([],[]) + +-- | Return groff man representation of document. +pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToMan opts (Pandoc meta blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + let before' = if null before then empty else text before + let after' = if null after then empty else text after + (head, foot) <- metaToMan opts meta + body <- blockListToMan opts blocks + (notes, preprocessors) <- get + let preamble = if null preprocessors || not (writerStandalone opts) + then empty + else text $ ".\\\" " ++ concat (nub preprocessors) + notes' <- notesToMan opts (reverse notes) + return $ preamble $$ head $$ before' $$ body $$ notes' $$ foot $$ after' + +-- | Insert bibliographic information into Man header and footer. +metaToMan :: WriterOptions -- ^ Options, including Man header + -> Meta -- ^ Meta with bibliographic information + -> State WriterState (Doc, Doc) +metaToMan options (Meta title authors date) = do + titleText <- inlineListToMan options title + let (cmdName, rest) = break (== ' ') $ render titleText + let (title', section) = case reverse cmdName of + (')':d:'(':xs) | d `elem` ['0'..'9'] -> + (text (reverse xs), char d) + xs -> (text (reverse xs), doubleQuotes empty) + let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $ + splitBy '|' rest + let head = (text ".TH") <+> title' <+> section <+> + doubleQuotes (text date) <+> hsep extras + let foot = case length authors of + 0 -> empty + 1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors) + 2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors) + return $ if writerStandalone options + then (head, foot) + else (empty, empty) + +-- | Return man representation of notes. +notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToMan opts notes = + if null notes + then return empty + else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= + return . (text ".SH NOTES" $$) . vcat + +-- | Return man representation of a note. +noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToMan opts num note = do + contents <- blockListToMan opts note + let marker = text "\n.SS [" <> text (show num) <> char ']' + return $ marker $$ contents + +-- | Association list of characters to escape. +manEscapes :: [(Char, String)] +manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\" + +-- | Escape special characters for Man. +escapeString :: String -> String +escapeString = escapeStringUsing manEscapes + +-- | Escape a literal (code) section for Man. +escapeCode :: String -> String +escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ") + +-- | Convert Pandoc block element to man. +blockToMan :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToMan opts Null = return empty +blockToMan opts (Plain inlines) = + wrapIfNeeded opts (inlineListToMan opts) inlines +blockToMan opts (Para inlines) = do + contents <- wrapIfNeeded opts (inlineListToMan opts) inlines + return $ text ".PP" $$ contents +blockToMan opts (RawHtml str) = return $ text str +blockToMan opts HorizontalRule = return $ text $ ".PP\n * * * * *" +blockToMan opts (Header level inlines) = do + contents <- inlineListToMan opts inlines + let heading = case level of + 1 -> ".SH " + _ -> ".SS " + return $ text heading <> contents +blockToMan opts (CodeBlock str) = return $ + text ".PP" $$ text "\\f[CR]" $$ + text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]" +blockToMan opts (BlockQuote blocks) = do + contents <- blockListToMan opts blocks + return $ text ".RS" $$ contents $$ text ".RE" +blockToMan opts (Table caption alignments widths headers rows) = + let aligncode AlignLeft = "l" + aligncode AlignRight = "r" + aligncode AlignCenter = "c" + aligncode AlignDefault = "l" + in do + caption' <- inlineListToMan opts caption + modify (\(notes, preprocessors) -> (notes, "t":preprocessors)) + let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths + -- 78n default width - 8n indent = 70n + let coldescriptions = text $ joinWithSep " " + (zipWith (\align width -> aligncode align ++ width) + alignments iwidths) ++ "." + colheadings <- mapM (blockListToMan opts) headers + let makeRow cols = text "T{" $$ + (vcat $ intersperse (text "T}@T{") cols) $$ + text "T}" + let colheadings' = makeRow colheadings + body <- mapM (\row -> do + cols <- mapM (blockListToMan opts) row + return $ makeRow cols) rows + return $ text ".PP" $$ caption' $$ + text ".TS" $$ text "tab(@);" $$ coldescriptions $$ + colheadings' $$ char '_' $$ vcat body $$ text ".TE" + +blockToMan opts (BulletList items) = do + contents <- mapM (bulletListItemToMan opts) items + return (vcat contents) +blockToMan opts (OrderedList attribs items) = do + let markers = take (length items) $ orderedListMarkers attribs + let indent = 1 + (maximum $ map length markers) + contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ + zip markers items + return (vcat contents) +blockToMan opts (DefinitionList items) = do + contents <- mapM (definitionListItemToMan opts) items + return (vcat contents) + +-- | Convert bullet list item (list of blocks) to man. +bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToMan opts [] = return empty +bulletListItemToMan opts ((Para first):rest) = + bulletListItemToMan opts ((Plain first):rest) +bulletListItemToMan opts ((Plain first):rest) = do + first' <- blockToMan opts (Plain first) + rest' <- blockListToMan opts rest + let first'' = text ".IP \\[bu] 2" $$ first' + let rest'' = if null rest + then empty + else text ".RS 2" $$ rest' $$ text ".RE" + return (first'' $$ rest'') +bulletListItemToMan opts (first:rest) = do + first' <- blockToMan opts first + rest' <- blockListToMan opts rest + return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" + +-- | Convert ordered list item (a list of blocks) to man. +orderedListItemToMan :: WriterOptions -- ^ options + -> String -- ^ order marker for list item + -> Int -- ^ number of spaces to indent + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToMan _ _ _ [] = return empty +orderedListItemToMan opts num indent ((Para first):rest) = + orderedListItemToMan opts num indent ((Plain first):rest) +orderedListItemToMan opts num indent (first:rest) = do + first' <- blockToMan opts first + rest' <- blockListToMan opts rest + let num' = printf ("%" ++ show (indent - 1) ++ "s") num + let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' + let rest'' = if null rest + then empty + else text ".RS 4" $$ rest' $$ text ".RE" + return $ first'' $$ rest'' + +-- | Convert definition list item (label, list of blocks) to man. +definitionListItemToMan :: WriterOptions + -> ([Inline],[Block]) + -> State WriterState Doc +definitionListItemToMan opts (label, items) = do + labelText <- inlineListToMan opts label + contents <- if null items + then return empty + else do + let (first, rest) = case items of + ((Para x):y) -> (Plain x,y) + (x:y) -> (x,y) + rest' <- mapM (\item -> blockToMan opts item) + rest >>= (return . vcat) + first' <- blockToMan opts first + return $ first' $$ text ".RS" $$ rest' $$ text ".RE" + return $ text ".TP\n.B " <> labelText $+$ contents + +-- | Convert list of Pandoc block elements to man. +blockListToMan :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToMan opts blocks = + mapM (blockToMan opts) blocks >>= (return . vcat) + +-- | Convert list of Pandoc inline elements to man. +inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) + +-- | Convert Pandoc inline element to man. +inlineToMan :: WriterOptions -> Inline -> State WriterState Doc +inlineToMan opts (Emph lst) = do + contents <- inlineListToMan opts lst + return $ text "\\f[I]" <> contents <> text "\\f[]" +inlineToMan opts (Strong lst) = do + contents <- inlineListToMan opts lst + return $ text "\\f[B]" <> contents <> text "\\f[]" +inlineToMan opts (Strikeout lst) = do + contents <- inlineListToMan opts lst + return $ text "[STRIKEOUT:" <> contents <> char ']' +inlineToMan opts (Superscript lst) = do + contents <- inlineListToMan opts lst + return $ char '^' <> contents <> char '^' +inlineToMan opts (Subscript lst) = do + contents <- inlineListToMan opts lst + return $ char '~' <> contents <> char '~' +inlineToMan opts (Quoted SingleQuote lst) = do + contents <- inlineListToMan opts lst + return $ char '`' <> contents <> char '\'' +inlineToMan opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMan opts lst + return $ text "\\[lq]" <> contents <> text "\\[rq]" +inlineToMan opts EmDash = return $ text "\\[em]" +inlineToMan opts EnDash = return $ text "\\[en]" +inlineToMan opts Apostrophe = return $ char '\'' +inlineToMan opts Ellipses = return $ text "\\&..." +inlineToMan opts (Code str) = + return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]" +inlineToMan opts (Str str) = return $ text $ escapeString str +inlineToMan opts (TeX str) = return $ text $ escapeCode str +inlineToMan opts (HtmlInline str) = return $ text $ escapeCode str +inlineToMan opts (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n" +inlineToMan opts Space = return $ char ' ' +inlineToMan opts (Link txt (src, _)) = do + linktext <- inlineListToMan opts txt + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + return $ if txt == [Code srcSuffix] + then char '<' <> text srcSuffix <> char '>' + else linktext <> text " (" <> text src <> char ')' +inlineToMan opts (Image alternate (source, tit)) = do + let txt = if (null alternate) || (alternate == [Str ""]) || + (alternate == [Str source]) -- to prevent autolinks + then [Str "image"] + else alternate + linkPart <- inlineToMan opts (Link txt (source, tit)) + return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' +inlineToMan opts (Note contents) = do + modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state + (notes, _) <- get + let ref = show $ (length notes) + return $ char '[' <> text ref <> char ']' + diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs new file mode 100644 index 000000000..4cecaae5d --- /dev/null +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -0,0 +1,373 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Markdown + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to markdown-formatted plain text. + +Markdown: <http://daringfireball.net/projects/markdown/> +-} +module Text.Pandoc.Writers.Markdown ( writeMarkdown) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Blocks +import Text.ParserCombinators.Parsec ( parse, (<|>), GenParser ) +import Data.List ( group, isPrefixOf, drop, find, intersperse ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Control.Monad.State + +type Notes = [[Block]] +type Refs = KeyTable +type WriterState = (Notes, Refs) + +-- | Convert Pandoc to Markdown. +writeMarkdown :: WriterOptions -> Pandoc -> String +writeMarkdown opts document = + render $ evalState (pandocToMarkdown opts document) ([],[]) + +-- | Return markdown representation of document. +pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToMarkdown opts (Pandoc meta blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + let before' = if null before then empty else text before + let after' = if null after then empty else text after + metaBlock <- metaToMarkdown opts meta + let head = if writerStandalone opts + then metaBlock $+$ text (writerHeader opts) + else empty + let headerBlocks = filter isHeaderBlock blocks + let toc = if writerTableOfContents opts + then tableOfContents opts headerBlocks + else empty + body <- blockListToMarkdown opts blocks + (notes, _) <- get + notes' <- notesToMarkdown opts (reverse notes) + (_, refs) <- get -- note that the notes may contain refs + refs' <- keyTableToMarkdown opts (reverse refs) + return $ head $+$ before' $+$ toc $+$ body $+$ text "" $+$ + notes' $+$ text "" $+$ refs' $+$ after' + +-- | Return markdown representation of reference key table. +keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc +keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat + +-- | Return markdown representation of a reference key. +keyToMarkdown :: WriterOptions + -> ([Inline], (String, String)) + -> State WriterState Doc +keyToMarkdown opts (label, (src, tit)) = do + label' <- inlineListToMarkdown opts label + let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\"" + return $ text " " <> char '[' <> label' <> char ']' <> text ": " <> + text src <> tit' + +-- | Return markdown representation of notes. +notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToMarkdown opts notes = + mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= + return . vcat + +-- | Return markdown representation of a note. +noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToMarkdown opts num blocks = do + contents <- blockListToMarkdown opts blocks + let marker = text "[^" <> text (show num) <> text "]:" + return $ hang marker (writerTabStop opts) contents + +-- | Escape special characters for Markdown. +escapeString :: String -> String +escapeString = escapeStringUsing markdownEscapes + where markdownEscapes = ('\160', " "):(backslashEscapes "`<\\*_^~") + +-- | Convert bibliographic information into Markdown header. +metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc +metaToMarkdown opts (Meta title authors date) = do + title' <- titleToMarkdown opts title + authors' <- authorsToMarkdown authors + date' <- dateToMarkdown date + return $ title' $+$ authors' $+$ date' + +titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +titleToMarkdown opts [] = return empty +titleToMarkdown opts lst = do + contents <- inlineListToMarkdown opts lst + return $ text "% " <> contents + +authorsToMarkdown :: [String] -> State WriterState Doc +authorsToMarkdown [] = return empty +authorsToMarkdown lst = return $ + text "% " <> text (joinWithSep ", " (map escapeString lst)) + +dateToMarkdown :: String -> State WriterState Doc +dateToMarkdown [] = return empty +dateToMarkdown str = return $ text "% " <> text (escapeString str) + +-- | Construct table of contents from list of header blocks. +tableOfContents :: WriterOptions -> [Block] -> Doc +tableOfContents opts headers = + let opts' = opts { writerIgnoreNotes = True } + contents = BulletList $ map elementToListItem $ hierarchicalize headers + in evalState (blockToMarkdown opts' contents) ([],[]) + +-- | Converts an Element to a list item for a table of contents, +elementToListItem :: Element -> [Block] +elementToListItem (Blk _) = [] +elementToListItem (Sec headerText subsecs) = [Plain headerText] ++ + if null subsecs + then [] + else [BulletList $ map elementToListItem subsecs] + +-- | Ordered list start parser for use in Para below. +olMarker :: GenParser Char st Char +olMarker = do (start, style, delim) <- anyOrderedListMarker + if delim == Period && + (style == UpperAlpha || (style == UpperRoman && + start `elem` [1, 5, 10, 50, 100, 500, 1000])) + then spaceChar >> spaceChar + else spaceChar + +-- | True if string begins with an ordered list marker +beginsWithOrderedListMarker :: String -> Bool +beginsWithOrderedListMarker str = + case parse olMarker "para start" str of + Left _ -> False + Right _ -> True + +wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +wrappedMarkdown opts inlines = do + let chunks = splitBy LineBreak inlines + let chunks' = if null chunks + then [] + else (map (++ [Str " "]) $ init chunks) ++ [last chunks] + lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks' + return $ vcat lns + +-- | Convert Pandoc block element to markdown. +blockToMarkdown :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToMarkdown opts Null = return empty +blockToMarkdown opts (Plain inlines) = + wrappedMarkdown opts inlines +blockToMarkdown opts (Para inlines) = do + contents <- wrappedMarkdown opts inlines + -- escape if para starts with ordered list marker + let esc = if (not (writerStrictMarkdown opts)) && + beginsWithOrderedListMarker (render contents) + then char '\\' + else empty + return $ esc <> contents <> text "\n" +blockToMarkdown opts (RawHtml str) = return $ text str +blockToMarkdown opts HorizontalRule = return $ text "\n* * * * *\n" +blockToMarkdown opts (Header level inlines) = do + contents <- inlineListToMarkdown opts inlines + return $ text ((replicate level '#') ++ " ") <> contents <> text "\n" +blockToMarkdown opts (CodeBlock str) = return $ + (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" +blockToMarkdown opts (BlockQuote blocks) = do + contents <- blockListToMarkdown opts blocks + return $ (vcat $ map (text . ("> " ++)) $ lines $ render contents) <> + text "\n" +blockToMarkdown opts (Table caption aligns widths headers rows) = do + caption' <- inlineListToMarkdown opts caption + let caption'' = if null caption + then empty + else text "" $+$ (text "Table: " <> caption') + headers' <- mapM (blockListToMarkdown opts) headers + let widthsInChars = map (floor . (78 *)) widths + let alignHeader alignment = case alignment of + AlignLeft -> leftAlignBlock + AlignCenter -> centerAlignBlock + AlignRight -> rightAlignBlock + AlignDefault -> leftAlignBlock + let makeRow = hsepBlocks . (zipWith alignHeader aligns) . + (zipWith docToBlock widthsInChars) + let head = makeRow headers' + rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row + return $ makeRow cols) rows + let tableWidth = sum widthsInChars + let maxRowHeight = maximum $ map heightOfBlock (head:rows') + let isMultilineTable = maxRowHeight > 1 + let underline = hsep $ + map (\width -> text $ replicate width '-') widthsInChars + let border = if isMultilineTable + then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-' + else empty + let spacer = if isMultilineTable + then text "" + else empty + let body = vcat $ intersperse spacer $ map blockToDoc rows' + return $ (nest 2 $ border $+$ (blockToDoc head) $+$ underline $+$ body $+$ + border $+$ caption'') <> text "\n" +blockToMarkdown opts (BulletList items) = do + contents <- mapM (bulletListItemToMarkdown opts) items + return $ (vcat contents) <> text "\n" +blockToMarkdown opts (OrderedList attribs items) = do + let markers = orderedListMarkers attribs + let markers' = map (\m -> if length m < 3 + then m ++ replicate (3 - length m) ' ' + else m) markers + contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + zip markers' items + return $ (vcat contents) <> text "\n" +blockToMarkdown opts (DefinitionList items) = do + contents <- mapM (definitionListItemToMarkdown opts) items + return $ (vcat contents) <> text "\n" + +-- | Convert bullet list item (list of blocks) to markdown. +bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToMarkdown opts items = do + contents <- blockListToMarkdown opts items + return $ hang (text "- ") (writerTabStop opts) contents + +-- | Convert ordered list item (a list of blocks) to markdown. +orderedListItemToMarkdown :: WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToMarkdown opts marker items = do + contents <- blockListToMarkdown opts items + -- The complexities here are needed to ensure that if the list + -- marker is 4 characters or longer, the second and following + -- lines are indented 4 spaces but the list item begins after the marker. + return $ sep [nest (min (3 - length marker) 0) (text marker), + nest (writerTabStop opts) contents] + +-- | Convert definition list item (label, list of blocks) to markdown. +definitionListItemToMarkdown :: WriterOptions + -> ([Inline],[Block]) + -> State WriterState Doc +definitionListItemToMarkdown opts (label, items) = do + labelText <- inlineListToMarkdown opts label + let tabStop = writerTabStop opts + let leader = char ':' + contents <- mapM (\item -> blockToMarkdown opts item >>= + (\txt -> return (leader $$ nest tabStop txt))) + items >>= return . vcat + return $ labelText $+$ contents + +-- | Convert list of Pandoc block elements to markdown. +blockListToMarkdown :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToMarkdown opts blocks = + mapM (blockToMarkdown opts) blocks >>= return . vcat + +-- | Get reference for target; if none exists, create unique one and return. +-- Prefer label if possible; otherwise, generate a unique key. +getReference :: [Inline] -> Target -> State WriterState [Inline] +getReference label (src, tit) = do + (_,refs) <- get + case find ((== (src, tit)) . snd) refs of + Just (ref, _) -> return ref + Nothing -> do + let label' = case find ((== label) . fst) refs of + Just _ -> -- label is used; generate numerical label + case find (\n -> not (any (== [Str (show n)]) + (map fst refs))) [1..10000] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" + Nothing -> label + modify (\(notes, refs) -> (notes, (label', (src,tit)):refs)) + return label' + +-- | Convert list of Pandoc inline elements to markdown. +inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToMarkdown opts lst = + mapM (inlineToMarkdown opts) lst >>= return . hcat + +-- | Convert Pandoc inline element to markdown. +inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc +inlineToMarkdown opts (Emph lst) = do + contents <- inlineListToMarkdown opts lst + return $ char '*' <> contents <> char '*' +inlineToMarkdown opts (Strong lst) = do + contents <- inlineListToMarkdown opts lst + return $ text "**" <> contents <> text "**" +inlineToMarkdown opts (Strikeout lst) = do + contents <- inlineListToMarkdown opts lst + return $ text "~~" <> contents <> text "~~" +inlineToMarkdown opts (Superscript lst) = do + contents <- inlineListToMarkdown opts lst + let contents' = text $ substitute " " "\\ " $ render contents + return $ char '^' <> contents' <> char '^' +inlineToMarkdown opts (Subscript lst) = do + contents <- inlineListToMarkdown opts lst + let contents' = text $ substitute " " "\\ " $ render contents + return $ char '~' <> contents' <> char '~' +inlineToMarkdown opts (Quoted SingleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ char '\'' <> contents <> char '\'' +inlineToMarkdown opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ char '"' <> contents <> char '"' +inlineToMarkdown opts EmDash = return $ text "--" +inlineToMarkdown opts EnDash = return $ char '-' +inlineToMarkdown opts Apostrophe = return $ char '\'' +inlineToMarkdown opts Ellipses = return $ text "..." +inlineToMarkdown opts (Code str) = + let tickGroups = filter (\s -> '`' `elem` s) $ group str + longest = if null tickGroups + then 0 + else maximum $ map length tickGroups + marker = replicate (longest + 1) '`' + spacer = if (longest == 0) then "" else " " in + return $ text (marker ++ spacer ++ str ++ spacer ++ marker) +inlineToMarkdown opts (Str str) = return $ text $ escapeString str +inlineToMarkdown opts (TeX str) = return $ text str +inlineToMarkdown opts (HtmlInline str) = return $ text str +inlineToMarkdown opts (LineBreak) = return $ text " \n" +inlineToMarkdown opts Space = return $ char ' ' +inlineToMarkdown opts (Link txt (src, tit)) = do + linktext <- inlineListToMarkdown opts txt + let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let useRefLinks = writerReferenceLinks opts + let useAuto = null tit && txt == [Code srcSuffix] + ref <- if useRefLinks then getReference txt (src, tit) else return [] + reftext <- inlineListToMarkdown opts ref + return $ if useAuto + then char '<' <> text srcSuffix <> char '>' + else if useRefLinks + then let first = char '[' <> linktext <> char ']' + second = if txt == ref + then text "[]" + else char '[' <> reftext <> char ']' + in first <> second + else char '[' <> linktext <> char ']' <> + char '(' <> text src <> linktitle <> char ')' +inlineToMarkdown opts (Image alternate (source, tit)) = do + let txt = if (null alternate) || (alternate == [Str ""]) || + (alternate == [Str source]) -- to prevent autolinks + then [Str "image"] + else alternate + linkPart <- inlineToMarkdown opts (Link txt (source, tit)) + return $ char '!' <> linkPart +inlineToMarkdown opts (Note contents) = do + modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state + (notes, _) <- get + let ref = show $ (length notes) + return $ text "[^" <> text ref <> char ']' diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs new file mode 100644 index 000000000..ddcbf95c0 --- /dev/null +++ b/src/Text/Pandoc/Writers/RST.hs @@ -0,0 +1,325 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.RST + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to reStructuredText. + +reStructuredText: <http://docutils.sourceforge.net/rst.html> +-} +module Text.Pandoc.Writers.RST ( writeRST) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Blocks +import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Control.Monad.State + +type Notes = [[Block]] +type Refs = KeyTable +type WriterState = (Notes, Refs, Refs) -- first Refs is links, second pictures + +-- | Convert Pandoc to RST. +writeRST :: WriterOptions -> Pandoc -> String +writeRST opts document = + render $ evalState (pandocToRST opts document) ([],[],[]) + +-- | Return RST representation of document. +pandocToRST :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToRST opts (Pandoc meta blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + before' = if null before then empty else text before + after' = if null after then empty else text after + metaBlock <- metaToRST opts meta + let head = if (writerStandalone opts) + then metaBlock $+$ text (writerHeader opts) + else empty + body <- blockListToRST opts blocks + (notes, _, _) <- get + notes' <- notesToRST opts (reverse notes) + (_, refs, pics) <- get -- note that the notes may contain refs + refs' <- keyTableToRST opts (reverse refs) + pics' <- pictTableToRST opts (reverse pics) + return $ head $+$ before' $+$ body $+$ notes' $+$ text "" $+$ refs' $+$ + pics' $+$ after' + +-- | Return RST representation of reference key table. +keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc +keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat + +-- | Return RST representation of a reference key. +keyToRST :: WriterOptions + -> ([Inline], (String, String)) + -> State WriterState Doc +keyToRST opts (label, (src, tit)) = do + label' <- inlineListToRST opts label + let label'' = if ':' `elem` (render label') + then char '`' <> label' <> char '`' + else label' + return $ text ".. _" <> label'' <> text ": " <> text src + +-- | Return RST representation of notes. +notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToRST opts notes = + mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>= + return . vcat + +-- | Return RST representation of a note. +noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToRST opts num note = do + contents <- blockListToRST opts note + let marker = text ".. [" <> text (show num) <> text "] " + return $ hang marker 3 contents + +-- | Return RST representation of picture reference table. +pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc +pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat + +-- | Return RST representation of a picture substitution reference. +pictToRST :: WriterOptions + -> ([Inline], (String, String)) + -> State WriterState Doc +pictToRST opts (label, (src, _)) = do + label' <- inlineListToRST opts label + return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <> + text src + +-- | Take list of inline elements and return wrapped doc. +wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc +wrappedRST opts inlines = mapM (wrapIfNeeded opts (inlineListToRST opts)) + (splitBy LineBreak inlines) >>= return . vcat + +-- | Escape special characters for RST. +escapeString :: String -> String +escapeString = escapeStringUsing (backslashEscapes "`\\|*_") + +-- | Convert bibliographic information into RST header. +metaToRST :: WriterOptions -> Meta -> State WriterState Doc +metaToRST opts (Meta title authors date) = do + title' <- titleToRST opts title + authors' <- authorsToRST authors + date' <- dateToRST date + let toc = if writerTableOfContents opts + then text "" $+$ text ".. contents::" + else empty + return $ title' $+$ authors' $+$ date' $+$ toc + +titleToRST :: WriterOptions -> [Inline] -> State WriterState Doc +titleToRST opts [] = return empty +titleToRST opts lst = do + contents <- inlineListToRST opts lst + let titleLength = length $ render contents + let border = text (replicate titleLength '=') + return $ border $+$ contents $+$ border <> text "\n" + +authorsToRST :: [String] -> State WriterState Doc +authorsToRST [] = return empty +authorsToRST (first:rest) = do + rest' <- authorsToRST rest + return $ (text ":Author: " <> text first) $+$ rest' + +dateToRST :: String -> State WriterState Doc +dateToRST [] = return empty +dateToRST str = return $ text ":Date: " <> text (escapeString str) + +-- | Convert Pandoc block element to RST. +blockToRST :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToRST opts Null = return empty +blockToRST opts (Plain inlines) = wrappedRST opts inlines +blockToRST opts (Para [TeX str]) = + let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in + return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str') +blockToRST opts (Para inlines) = do + contents <- wrappedRST opts inlines + return $ contents <> text "\n" +blockToRST opts (RawHtml str) = + let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in + return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str') +blockToRST opts HorizontalRule = return $ text "--------------\n" +blockToRST opts (Header level inlines) = do + contents <- inlineListToRST opts inlines + let headerLength = length $ render contents + let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) + let border = text $ replicate headerLength headerChar + return $ contents $+$ border <> text "\n" +blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$ + (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" +blockToRST opts (BlockQuote blocks) = do + contents <- blockListToRST opts blocks + return $ (nest (writerTabStop opts) contents) <> text "\n" +blockToRST opts (Table caption aligns widths headers rows) = do + caption' <- inlineListToRST opts caption + let caption'' = if null caption + then empty + else text "" $+$ (text "Table: " <> caption') + headers' <- mapM (blockListToRST opts) headers + let widthsInChars = map (floor . (78 *)) widths + let alignHeader alignment = case alignment of + AlignLeft -> leftAlignBlock + AlignCenter -> centerAlignBlock + AlignRight -> rightAlignBlock + AlignDefault -> leftAlignBlock + let hpipeBlocks blocks = hcatBlocks [beg, middle, end] + where height = maximum (map heightOfBlock blocks) + sep = TextBlock 3 height (replicate height " | ") + beg = TextBlock 2 height (replicate height "| ") + end = TextBlock 2 height (replicate height " |") + middle = hcatBlocks $ intersperse sep blocks + let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars + let head = makeRow headers' + rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row + return $ makeRow cols) rows + let tableWidth = sum widthsInChars + let maxRowHeight = maximum $ map heightOfBlock (head:rows') + let border ch = char '+' <> char ch <> + (hcat $ intersperse (char ch <> char '+' <> char ch) $ + map (\l -> text $ replicate l ch) widthsInChars) <> + char ch <> char '+' + let body = vcat $ intersperse (border '-') $ map blockToDoc rows' + return $ border '-' $+$ blockToDoc head $+$ border '=' $+$ body $+$ + border '-' $$ caption'' $$ text "" +blockToRST opts (BulletList items) = do + contents <- mapM (bulletListItemToRST opts) items + -- ensure that sublists have preceding blank line + return $ text "" $+$ vcat contents <> text "\n" +blockToRST opts (OrderedList (start, style, delim) items) = do + let markers = if start == 1 && style == DefaultStyle && delim == DefaultDelim + then take (length items) $ repeat "#." + else take (length items) $ orderedListMarkers + (start, style, delim) + let maxMarkerLength = maximum $ map length markers + let markers' = map (\m -> let s = maxMarkerLength - length m + in m ++ replicate s ' ') markers + contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $ + zip markers' items + -- ensure that sublists have preceding blank line + return $ text "" $+$ vcat contents <> text "\n" +blockToRST opts (DefinitionList items) = do + contents <- mapM (definitionListItemToRST opts) items + return $ (vcat contents) <> text "\n" + +-- | Convert bullet list item (list of blocks) to RST. +bulletListItemToRST :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToRST opts items = do + contents <- blockListToRST opts items + return $ hang (text "- ") 3 contents + +-- | Convert ordered list item (a list of blocks) to RST. +orderedListItemToRST :: WriterOptions -- ^ options + -> String -- ^ marker for list item + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToRST opts marker items = do + contents <- blockListToRST opts items + return $ hang (text marker) (length marker + 1) contents + +-- | Convert defintion list item (label, list of blocks) to RST. +definitionListItemToRST :: WriterOptions -> ([Inline], [Block]) -> State WriterState Doc +definitionListItemToRST opts (label, items) = do + label <- inlineListToRST opts label + contents <- blockListToRST opts items + return $ label $+$ nest (writerTabStop opts) contents + +-- | Convert list of Pandoc block elements to RST. +blockListToRST :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToRST opts blocks = + mapM (blockToRST opts) blocks >>= return . vcat + +-- | Convert list of Pandoc inline elements to RST. +inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat + +-- | Convert Pandoc inline element to RST. +inlineToRST :: WriterOptions -> Inline -> State WriterState Doc +inlineToRST opts (Emph lst) = do + contents <- inlineListToRST opts lst + return $ char '*' <> contents <> char '*' +inlineToRST opts (Strong lst) = do + contents <- inlineListToRST opts lst + return $ text "**" <> contents <> text "**" +inlineToRST opts (Strikeout lst) = do + contents <- inlineListToRST opts lst + return $ text "[STRIKEOUT:" <> contents <> char ']' +inlineToRST opts (Superscript lst) = do + contents <- inlineListToRST opts lst + return $ text "\\ :sup:`" <> contents <> text "`\\ " +inlineToRST opts (Subscript lst) = do + contents <- inlineListToRST opts lst + return $ text "\\ :sub:`" <> contents <> text "`\\ " +inlineToRST opts (Quoted SingleQuote lst) = do + contents <- inlineListToRST opts lst + return $ char '\'' <> contents <> char '\'' +inlineToRST opts (Quoted DoubleQuote lst) = do + contents <- inlineListToRST opts lst + return $ char '"' <> contents <> char '"' +inlineToRST opts EmDash = return $ text "--" +inlineToRST opts EnDash = return $ char '-' +inlineToRST opts Apostrophe = return $ char '\'' +inlineToRST opts Ellipses = return $ text "..." +inlineToRST opts (Code str) = return $ text $ "``" ++ str ++ "``" +inlineToRST opts (Str str) = return $ text $ escapeString str +inlineToRST opts (TeX str) = return $ text str +inlineToRST opts (HtmlInline str) = return empty +inlineToRST opts (LineBreak) = return $ char ' ' -- RST doesn't have linebreaks +inlineToRST opts Space = return $ char ' ' +inlineToRST opts (Link [Code str] (src, tit)) | src == str || + src == "mailto:" ++ str = do + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + return $ text srcSuffix +inlineToRST opts (Link txt (src, tit)) = do + let useReferenceLinks = writerReferenceLinks opts + linktext <- inlineListToRST opts $ normalizeSpaces txt + if useReferenceLinks + then do (notes, refs, pics) <- get + let refs' = if (txt, (src, tit)) `elem` refs + then refs + else (txt, (src, tit)):refs + put (notes, refs', pics) + return $ char '`' <> linktext <> text "`_" + else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_" +inlineToRST opts (Image alternate (source, tit)) = do + (notes, refs, pics) <- get + let labelsUsed = map fst pics + let txt = if null alternate || alternate == [Str ""] || + alternate `elem` labelsUsed + then [Str $ "image" ++ show (length refs)] + else alternate + let pics' = if (txt, (source, tit)) `elem` pics + then pics + else (txt, (source, tit)):pics + put (notes, refs, pics') + label <- inlineListToRST opts txt + return $ char '|' <> label <> char '|' +inlineToRST opts (Note contents) = do + -- add to notes in state + modify (\(notes, refs, pics) -> (contents:notes, refs, pics)) + (notes, _, _) <- get + let ref = show $ (length notes) + return $ text " [" <> text ref <> text "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs new file mode 100644 index 000000000..3bd5c63b2 --- /dev/null +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -0,0 +1,286 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.RTF + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to RTF (rich text format). +-} +module Text.Pandoc.Writers.RTF ( writeRTF ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Regex ( matchRegexAll, mkRegex ) +import Data.List ( isSuffixOf ) +import Data.Char ( ord ) + +-- | Convert Pandoc to a string in rich text format. +writeRTF :: WriterOptions -> Pandoc -> String +writeRTF options (Pandoc meta blocks) = + let head = if writerStandalone options + then rtfHeader (writerHeader options) meta + else "" + toc = if writerTableOfContents options + then tableOfContents $ filter isHeaderBlock blocks + else "" + foot = if writerStandalone options then "\n}\n" else "" + body = writerIncludeBefore options ++ + concatMap (blockToRTF 0 AlignDefault) blocks ++ + writerIncludeAfter options + in head ++ toc ++ body ++ foot + +-- | Construct table of contents from list of header blocks. +tableOfContents :: [Block] -> String +tableOfContents headers = + let contentsTree = hierarchicalize headers + in concatMap (blockToRTF 0 AlignDefault) $ + [Header 1 [Str "Contents"], + BulletList (map elementToListItem contentsTree)] + +elementToListItem :: Element -> [Block] +elementToListItem (Blk _) = [] +elementToListItem (Sec sectext subsecs) = [Plain sectext] ++ + if null subsecs + then [] + else [BulletList (map elementToListItem subsecs)] + +-- | Convert unicode characters (> 127) into rich text format representation. +handleUnicode :: String -> String +handleUnicode [] = [] +handleUnicode (c:cs) = + if ord c > 127 + then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs + else c:(handleUnicode cs) + +-- | Escape special characters. +escapeSpecial :: String -> String +escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}")) + +-- | Escape strings as needed for rich text format. +stringToRTF :: String -> String +stringToRTF = handleUnicode . escapeSpecial + +-- | Escape things as needed for code block in RTF. +codeStringToRTF :: String -> String +codeStringToRTF str = joinWithSep "\\line\n" $ lines (stringToRTF str) + +-- | Deal with raw LaTeX. +latexToRTF :: String -> String +latexToRTF str = "{\\cf1 " ++ (stringToRTF str) ++ "\\cf0 } " + +-- | Make a paragraph with first-line indent, block indent, and space after. +rtfParSpaced :: Int -- ^ space after (in twips) + -> Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> Alignment -- ^ alignment + -> String -- ^ string with content + -> String +rtfParSpaced spaceAfter indent firstLineIndent alignment content = + let alignString = case alignment of + AlignLeft -> "\\ql " + AlignRight -> "\\qr " + AlignCenter -> "\\qc " + AlignDefault -> "\\ql " + in "{\\pard " ++ alignString ++ + "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ + " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" + +-- | Default paragraph. +rtfPar :: Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> Alignment -- ^ alignment + -> String -- ^ string with content + -> String +rtfPar = rtfParSpaced 180 + +-- | Compact paragraph (e.g. for compact list items). +rtfCompact :: Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> Alignment -- ^ alignment + -> String -- ^ string with content + -> String +rtfCompact = rtfParSpaced 0 + +-- number of twips to indent +indentIncrement = 720 +listIncrement = 360 + +-- | Returns appropriate bullet list marker for indent level. +bulletMarker :: Int -> String +bulletMarker indent = case indent `mod` 720 of + 0 -> "\\bullet " + otherwise -> "\\endash " + +-- | Returns appropriate (list of) ordered list markers for indent level. +orderedMarkers :: Int -> ListAttributes -> [String] +orderedMarkers indent (start, style, delim) = + if style == DefaultStyle && delim == DefaultDelim + then case indent `mod` 720 of + 0 -> orderedListMarkers (start, Decimal, Period) + otherwise -> orderedListMarkers (start, LowerAlpha, Period) + else orderedListMarkers (start, style, delim) + +-- | Returns RTF header. +rtfHeader :: String -- ^ header text + -> Meta -- ^ bibliographic information + -> String +rtfHeader headerText (Meta title authors date) = + let titletext = if null title + then "" + else rtfPar 0 0 AlignCenter $ + "\\b \\fs36 " ++ inlineListToRTF title + authorstext = if null authors + then "" + else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $ + map stringToRTF authors)) + datetext = if date == "" + then "" + else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in + let spacer = if null (titletext ++ authorstext ++ datetext) + then "" + else rtfPar 0 0 AlignDefault "" in + headerText ++ titletext ++ authorstext ++ datetext ++ spacer + +-- | Convert Pandoc block element to RTF. +blockToRTF :: Int -- ^ indent level + -> Alignment -- ^ alignment + -> Block -- ^ block to convert + -> String +blockToRTF _ _ Null = "" +blockToRTF indent alignment (Plain lst) = + rtfCompact indent 0 alignment $ inlineListToRTF lst +blockToRTF indent alignment (Para lst) = + rtfPar indent 0 alignment $ inlineListToRTF lst +blockToRTF indent alignment (BlockQuote lst) = + concatMap (blockToRTF (indent + indentIncrement) alignment) lst +blockToRTF indent _ (CodeBlock str) = + rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) +blockToRTF _ _ (RawHtml str) = "" +blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ + concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst +blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ + zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst +blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ + concatMap (definitionListItemToRTF alignment indent) lst +blockToRTF indent _ HorizontalRule = + rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" +blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ + "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst +blockToRTF indent alignment (Table caption aligns sizes headers rows) = + tableRowToRTF True indent aligns sizes headers ++ + concatMap (tableRowToRTF False indent aligns sizes) rows ++ + rtfPar indent 0 alignment (inlineListToRTF caption) + +tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String +tableRowToRTF header indent aligns sizes cols = + let columns = concat $ zipWith (tableItemToRTF indent) aligns cols + totalTwips = 6 * 1440 -- 6 inches + rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) + 0 sizes + cellDefs = map (\edge -> (if header + then "\\clbrdrb\\brdrs" + else "") ++ "\\cellx" ++ show edge) + rightEdges + start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ + "\\trkeep\\intbl\n{\n" + end = "}\n\\intbl\\row}\n" + in start ++ columns ++ end + +tableItemToRTF :: Int -> Alignment -> [Block] -> String +tableItemToRTF indent alignment item = + let contents = concatMap (blockToRTF indent alignment) item + in "{\\intbl " ++ contents ++ "\\cell}\n" + +-- | Ensure that there's the same amount of space after compact +-- lists as after regular lists. +spaceAtEnd :: String -> String +spaceAtEnd str = + if isSuffixOf "\\par}\n" str + then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" + else str + +-- | Convert list item (list of blocks) to RTF. +listItemToRTF :: Alignment -- ^ alignment + -> Int -- ^ indent level + -> String -- ^ list start marker + -> [Block] -- ^ list item (list of blocks) + -> [Char] +listItemToRTF alignment indent marker [] = + rtfCompact (indent + listIncrement) (0 - listIncrement) alignment + (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") +listItemToRTF alignment indent marker list = + let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in + -- insert the list marker into the (processed) first block + let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of + Just (before, matched, after, _) -> + before ++ "\\fi" ++ show (0 - listIncrement) ++ + " " ++ marker ++ "\\tx" ++ + show listIncrement ++ "\\tab" ++ after + Nothing -> first in + modFirst ++ concat rest + +-- | Convert definition list item (label, list of blocks) to RTF. +definitionListItemToRTF :: Alignment -- ^ alignment + -> Int -- ^ indent level + -> ([Inline],[Block]) -- ^ list item (list of blocks) + -> [Char] +definitionListItemToRTF alignment indent (label, items) = + let labelText = blockToRTF indent alignment (Plain label) + itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items + in labelText ++ itemsText + +-- | Convert list of inline items to RTF. +inlineListToRTF :: [Inline] -- ^ list of inlines to convert + -> String +inlineListToRTF lst = concatMap inlineToRTF lst + +-- | Convert inline item to RTF. +inlineToRTF :: Inline -- ^ inline to convert + -> String +inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "} " +inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "} " +inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "} " +inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "} " +inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "} " +inlineToRTF (Quoted SingleQuote lst) = + "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" +inlineToRTF (Quoted DoubleQuote lst) = + "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" +inlineToRTF Apostrophe = "\\u8217'" +inlineToRTF Ellipses = "\\u8230?" +inlineToRTF EmDash = "\\u8212-" +inlineToRTF EnDash = "\\u8211-" +inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} " +inlineToRTF (Str str) = stringToRTF str +inlineToRTF (TeX str) = latexToRTF str +inlineToRTF (HtmlInline str) = "" +inlineToRTF (LineBreak) = "\\line " +inlineToRTF Space = " " +inlineToRTF (Link text (src, tit)) = + "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ + "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" +inlineToRTF (Image alternate (source, tit)) = + "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF (Note contents) = + "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" diff --git a/src/headers/ConTeXtHeader b/src/headers/ConTeXtHeader new file mode 100644 index 000000000..41648081c --- /dev/null +++ b/src/headers/ConTeXtHeader @@ -0,0 +1,61 @@ +\enableregime[utf] % use UTF-8 + +\setupcolors[state=start] +\setupinteraction[state=start, color=middlered] % needed for hyperlinks + +\setuppapersize[letter][letter] % use letter paper +\setuplayout[width=middle, backspace=1.5in, cutspace=1.5in, + height=middle, header=0.75in, footer=0.75in] % page layout +\setuppagenumbering[location={footer,center}] % number pages +\setupbodyfont[11pt] % 11pt font +\setupwhitespace[medium] % inter-paragraph spacing + +\setuphead[section][style=\tfc] +\setuphead[subsection][style=\tfb] +\setuphead[subsubsection][style=\bf] + +% define title block commands +\unprotect +\def\doctitle#1{\gdef\@title{#1}} +\def\author#1{\gdef\@author{#1}} +\def\date#1{\gdef\@date{#1}} +\date{\currentdate} % Default to today unless specified otherwise. +\def\maketitle{% + \startalignment[center] + \blank[2*big] + {\tfd \@title} + \blank[3*medium] + {\tfa \@author} + \blank[2*medium] + {\tfa \@date} + \blank[3*medium] + \stopalignment} +\protect + +% define descr (for definition lists) +\definedescription[descr][ + headstyle=bold,style=normal,align=left,location=hanging, + width=broad,margin=1cm] + +% define ltxitem (for bulleted lists) +\defineitemgroup[ltxitem][levels=4] +\setupitemgroup[ltxitem][1][1] +\setupitemgroup[ltxitem][2][2] +\setupitemgroup[ltxitem][3][3] +\setupitemgroup[ltxitem][4][4,packed] + +% define ltxenum (for enumerated lists) +\defineitemgroup[ltxenum][levels=4] +\setupitemgroup[ltxenum][1][n] +\setupitemgroup[ltxenum][2][a] +\setupitemgroup[ltxenum][3][r] +\setupitemgroup[ltxenum][4][A,packed] + +\setupthinrules[width=15em] % width of horizontal rules + +% for block quotations +\definestartstop [blockquote] + [before={\startnarrower\switchtobodyfont[11pt] + \whitespace\setupindenting[no]}, + after={\stopnarrower\whitespace}] + diff --git a/src/headers/DocbookHeader b/src/headers/DocbookHeader new file mode 100644 index 000000000..7b26b2c73 --- /dev/null +++ b/src/headers/DocbookHeader @@ -0,0 +1,3 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN" + "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"> diff --git a/src/headers/LaTeXHeader b/src/headers/LaTeXHeader new file mode 100644 index 000000000..d891b5f63 --- /dev/null +++ b/src/headers/LaTeXHeader @@ -0,0 +1,5 @@ +\documentclass{article} +\usepackage[mathletters]{ucs} +\usepackage[utf8x]{inputenc} +\setlength{\parindent}{0pt} +\setlength{\parskip}{6pt plus 2pt minus 1pt} diff --git a/src/headers/RTFHeader b/src/headers/RTFHeader new file mode 100644 index 000000000..b4368694b --- /dev/null +++ b/src/headers/RTFHeader @@ -0,0 +1,4 @@ +{\rtf1\ansi\deff0{\fonttbl{\f0 \fswiss Helvetica;}{\f1 Courier;}} +{\colortbl;\red255\green0\blue0;\red0\green0\blue255;} +\widowctrl\hyphauto + diff --git a/src/headers/S5Header b/src/headers/S5Header new file mode 100644 index 000000000..ebb24ebe2 --- /dev/null +++ b/src/headers/S5Header @@ -0,0 +1,3 @@ +<!-- configuration parameters --> +<meta name="defaultView" content="slideshow" /> +<meta name="controlVis" content="hidden" /> diff --git a/src/templates/ASCIIMathML.hs b/src/templates/ASCIIMathML.hs new file mode 100644 index 000000000..1d04c6ff7 --- /dev/null +++ b/src/templates/ASCIIMathML.hs @@ -0,0 +1,7 @@ +-- | Definitions for use of ASCIIMathML in HTML. +-- (See <http://www1.chapman.edu/~jipsen/mathml/asciimath.html>.) +module Text.Pandoc.ASCIIMathML ( asciiMathMLScript ) where + +-- | String containing ASCIIMathML javascript. +asciiMathMLScript :: String +asciiMathMLScript = "<script type=\"text/javascript\">\n@ASCIIMathML.js@</script>\n" diff --git a/src/templates/DefaultHeaders.hs b/src/templates/DefaultHeaders.hs new file mode 100644 index 000000000..1bd9fe1d2 --- /dev/null +++ b/src/templates/DefaultHeaders.hs @@ -0,0 +1,52 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.DefaultHeaders + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Default headers for Pandoc writers. +-} +module Text.Pandoc.Writers.DefaultHeaders ( + defaultLaTeXHeader, + defaultConTeXtHeader, + defaultDocbookHeader, + defaultS5Header, + defaultRTFHeader + ) where +import Text.Pandoc.Writers.S5 + +defaultLaTeXHeader :: String +defaultLaTeXHeader = "@LaTeXHeader@" + +defaultConTeXtHeader :: String +defaultConTeXtHeader = "@ConTeXtHeader@" + +defaultDocbookHeader :: String +defaultDocbookHeader = "@DocbookHeader@" + +defaultS5Header :: String +defaultS5Header = s5Meta ++ s5CSS ++ s5Javascript + +defaultRTFHeader :: String +defaultRTFHeader = "@RTFHeader@" diff --git a/src/templates/Makefile b/src/templates/Makefile new file mode 100644 index 000000000..9522666c5 --- /dev/null +++ b/src/templates/Makefile @@ -0,0 +1,20 @@ +VPATH := .. +PROCESSOR := ./fillTemplates.pl +TARGETS := Text/Pandoc/ASCIIMathML.hs \ + Text/Pandoc/Writers/S5.hs \ + Text/Pandoc/Writers/DefaultHeaders.hs + +all: $(TARGETS) + +Text/Pandoc/ASCIIMathML.hs: ASCIIMathML.hs $(PROCESSOR) $(VPATH)/ASCIIMathML.js + perl $(PROCESSOR) $@ $(VPATH) + +Text/Pandoc/Writers/S5.hs: S5.hs $(PROCESSOR) $(VPATH)/ui/default/* + perl $(PROCESSOR) $@ $(VPATH) + +Text/Pandoc/Writers/DefaultHeaders.hs: DefaultHeaders.hs $(PROCESSOR) $(VPATH)/headers/* + perl $(PROCESSOR) $@ $(VPATH) + +.PHONY: clean +clean: + cd $(VPATH); rm -f $(TARGETS) diff --git a/src/templates/S5.hs b/src/templates/S5.hs new file mode 100644 index 000000000..a0b69b132 --- /dev/null +++ b/src/templates/S5.hs @@ -0,0 +1,133 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.S5 + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Definitions for creation of S5 powerpoint-like HTML. +(See <http://meyerweb.com/eric/tools/s5/>.) +-} +module Text.Pandoc.Writers.S5 ( + -- * Strings + s5Meta, + s5Javascript, + s5CSS, + s5Links, + -- * Functions + writeS5, + writeS5String, + insertS5Structure + ) where +import Text.Pandoc.Shared ( joinWithSep, WriterOptions ) +import Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) +import Text.Pandoc.Definition +import Text.XHtml.Strict + +s5Meta :: String +s5Meta = "<!-- configuration parameters -->\n<meta name=\"defaultView\" content=\"slideshow\" />\n<meta name=\"controlVis\" content=\"hidden\" />\n" + +s5Javascript :: String +s5Javascript = "<script type=\"text/javascript\">\n@slides.js@</script>\n" + +s5CoreCSS :: String +s5CoreCSS = "@s5-core.css@" + +s5FramingCSS :: String +s5FramingCSS = "@framing.css@" + +s5PrettyCSS :: String +s5PrettyCSS = "@pretty.css@" + +s5OperaCSS :: String +s5OperaCSS = "@opera.css@" + +s5OutlineCSS :: String +s5OutlineCSS = "@outline.css@" + +s5PrintCSS :: String +s5PrintCSS = "@print.css@" + +s5CSS :: String +s5CSS = "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n" + +s5Links :: String +s5Links = "<!-- style sheet links -->\n<link rel=\"stylesheet\" href=\"ui/default/slides.css\" type=\"text/css\" media=\"projection\" id=\"slideProj\" />\n<link rel=\"stylesheet\" href=\"ui/default/outline.css\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" />\n<link rel=\"stylesheet\" href=\"ui/default/print.css\" type=\"text/css\" media=\"print\" id=\"slidePrint\" />\n<link rel=\"stylesheet\" href=\"ui/default/opera.css\" type=\"text/css\" media=\"projection\" id=\"operaFix\" />\n<!-- S5 JS -->\n<script src=\"ui/default/slides.js\" type=\"text/javascript\"></script>\n" + +-- | Converts Pandoc document to an S5 HTML presentation (Html structure). +writeS5 :: WriterOptions -> Pandoc -> Html +writeS5 options = (writeHtml options) . insertS5Structure + +-- | Converts Pandoc document to an S5 HTML presentation (string). +writeS5String :: WriterOptions -> Pandoc -> String +writeS5String options = (writeHtmlString options) . insertS5Structure + +-- | Inserts HTML needed for an S5 presentation (e.g. around slides). +layoutDiv :: [Inline] -- ^ Title of document (for header or footer) + -> String -- ^ Date of document (for header or footer) + -> [Block] -- ^ List of block elements returned +layoutDiv title date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 [Str date]), (Header 2 title), (RawHtml "</div>\n</div>\n")] + +presentationStart = RawHtml "<div class=\"presentation\">\n\n" + +presentationEnd = RawHtml "</div>\n" + +slideStart = RawHtml "<div class=\"slide\">\n" + +slideEnd = RawHtml "</div>\n" + +-- | Returns 'True' if block is a Header 1. +isH1 :: Block -> Bool +isH1 (Header 1 _) = True +isH1 _ = False + +-- | Insert HTML around sections to make individual slides. +insertSlides :: Bool -> [Block] -> [Block] +insertSlides beginning blocks = + let (beforeHead, rest) = break isH1 blocks in + if (null rest) then + if beginning then + beforeHead + else + beforeHead ++ [slideEnd] + else + if beginning then + beforeHead ++ + slideStart:(head rest):(insertSlides False (tail rest)) + else + beforeHead ++ + slideEnd:slideStart:(head rest):(insertSlides False (tail rest)) + +-- | Insert blocks into 'Pandoc' for slide structure. +insertS5Structure :: Pandoc -> Pandoc +insertS5Structure (Pandoc meta []) = Pandoc meta [] +insertS5Structure (Pandoc (Meta title authors date) blocks) = + let slides = insertSlides True blocks + firstSlide = if not (null title) + then [slideStart, (Header 1 title), + (Header 3 [Str (joinWithSep ", " authors)]), + (Header 4 [Str date]), slideEnd] + else [] + newBlocks = (layoutDiv title date) ++ presentationStart:firstSlide ++ + slides ++ [presentationEnd] + in Pandoc (Meta title authors date) newBlocks diff --git a/src/templates/fillTemplates.pl b/src/templates/fillTemplates.pl new file mode 100755 index 000000000..85e6eaa18 --- /dev/null +++ b/src/templates/fillTemplates.pl @@ -0,0 +1,131 @@ +#!/usr/bin/env perl +# Fills templates with haskell-escaped strings slurped from input files. +# Takes two arguments, the first specifying the pathname of the target +# relative to the root directory, the second specifying the root directory +# (defaulting to ..). The template is assumed to have the same base name +# as the target and to reside in the templates subdirectory of the root +# directory. + +use strict; +use warnings; + +# Utility routines: + +sub slurp { + open FILE, $_[0] or die "couldn't open file '$_[0]': $!"; + my $contents = do { local $/; <FILE>;}; + close FILE; + return $contents; +} + +sub escape_for_haskell { + my ($contents) = @_; + + $contents =~ s/\\/\\\\/g; + $contents =~ s/\t/\\t/g; + $contents =~ s/"/\\"/g; + $contents =~ s/\n/\\n/g; + return $contents; +} + +# Template processors. + +my %processor = ( + # -------------------------------------------------------------------------- + 'Text/Pandoc/Writers/S5.hs' => { + # -------------------------------------------------------------------------- + proc => sub { + my ($template) = @_; + + my (@files) = qw(slides.js s5-core.css framing.css pretty.css + opera.css outline.css print.css); + + foreach my $file (@files) { + my $replacement = escape_for_haskell(slurp "ui/default/$file"); + my $escapedfile = $file; + $escapedfile =~ s/\./\\./g; + $template =~ s/\@$escapedfile\@/$replacement/; + } + return $template; + }, + }, + # -------------------------------------------------------------------------- + 'Text/Pandoc/ASCIIMathML.hs' => { + # -------------------------------------------------------------------------- + proc => sub { + my ($template) = @_; + + my $script = escape_for_haskell(slurp "ASCIIMathML.js"); + my $acknowledgements = + " ASCIIMathML.js - copyright Peter Jipsen,". + " released under the GPL\\nSee ". + "http://www1.chapman.edu/~jipsen/mathml/asciimath.html/ "; + $script =~ s/\/\*.*?\*\//\/\*$acknowledgements\*\//g; # strip comments + $template =~ s/\@ASCIIMathML\.js@/$script/; + + return $template; + }, + }, + # -------------------------------------------------------------------------- + 'Text/Pandoc/Writers/DefaultHeaders.hs' => { + # -------------------------------------------------------------------------- + proc => sub { + my ($template) = @_; + + my (@headers) = split(/\s/,`ls headers`); + foreach my $header (@headers) { + my ($replacement) = escape_for_haskell(slurp "headers/$header"); + $template =~ s/\@$header\@/$replacement/; + } + + return $template; + }, + }, + # -------------------------------------------------------------------------- + # 'foo/bar/baz' => { + # -------------------------------------------------------------------------- + # template => 'optional-template-filename-defaults-to-baz' + # proc => sub { + # my ($template) = @_; + # # Process. + # return $template; + # }, + #}, +); + +# Main. + +my $target = shift @ARGV; +if (!defined $target || !length $target) { + print STDERR "Available targets:\n\n" . join "\n", keys %processor; + die "\n\nYou must supply a target!\n"; +} + +die "No processor exists for '$target'!\n" if ! exists $processor{$target}; + +my $rootdir = shift @ARGV || '..'; +chdir $rootdir or die "Couldn't chdir to '$rootdir': $!"; + +my $template; +if (exists $processor{$target}->{template}) { + $template = $processor{$target}->{template}; +} +else { + ($template = $target) =~ s!.*/+!!; +} +$template = "templates/$template"; +die "No template exists for '$target'!\n" if ! -f "$template"; + +open OUTFILE, ">$target" or die "couldn't open file '$target': $!"; +print OUTFILE <<END; +---------------------------------------------------- +-- Do not edit this file by hand. Edit +-- '$template' +-- and run $0 $target +---------------------------------------------------- + +END + +print OUTFILE $processor{$target}->{proc}->(slurp($template)); +print OUTFILE "\n"; +close OUTFILE; diff --git a/src/ui/default/blank.gif b/src/ui/default/blank.gif Binary files differnew file mode 100644 index 000000000..75b945d25 --- /dev/null +++ b/src/ui/default/blank.gif diff --git a/src/ui/default/bodybg.gif b/src/ui/default/bodybg.gif Binary files differnew file mode 100644 index 000000000..5f448a16f --- /dev/null +++ b/src/ui/default/bodybg.gif diff --git a/src/ui/default/framing.css b/src/ui/default/framing.css new file mode 100644 index 000000000..14d8509e9 --- /dev/null +++ b/src/ui/default/framing.css @@ -0,0 +1,23 @@ +/* The following styles size, place, and layer the slide components. + Edit these if you want to change the overall slide layout. + The commented lines can be uncommented (and modified, if necessary) + to help you with the rearrangement process. */ + +/* target = 1024x768 */ + +div#header, div#footer, .slide {width: 100%; top: 0; left: 0;} +div#header {top: 0; height: 3em; z-index: 1;} +div#footer {top: auto; bottom: 0; height: 2.5em; z-index: 5;} +.slide {top: 0; width: 92%; padding: 3.5em 4% 4%; z-index: 2; list-style: none;} +div#controls {left: 50%; bottom: 0; width: 50%; z-index: 100;} +div#controls form {position: absolute; bottom: 0; right: 0; width: 100%; + margin: 0;} +#currentSlide {position: absolute; width: 10%; left: 45%; bottom: 1em; z-index: 10;} +html>body #currentSlide {position: fixed;} + +/* +div#header {background: #FCC;} +div#footer {background: #CCF;} +div#controls {background: #BBD;} +div#currentSlide {background: #FFC;} +*/ diff --git a/src/ui/default/iepngfix.htc b/src/ui/default/iepngfix.htc new file mode 100644 index 000000000..bba2db756 --- /dev/null +++ b/src/ui/default/iepngfix.htc @@ -0,0 +1,42 @@ +<public:component>
+<public:attach event="onpropertychange" onevent="doFix()" />
+
+<script>
+
+// IE5.5+ PNG Alpha Fix v1.0 by Angus Turnbull http://www.twinhelix.com
+// Free usage permitted as long as this notice remains intact.
+
+// This must be a path to a blank image. That's all the configuration you need here.
+var blankImg = 'ui/default/blank.gif';
+
+var f = 'DXImageTransform.Microsoft.AlphaImageLoader';
+
+function filt(s, m) {
+ if (filters[f]) {
+ filters[f].enabled = s ? true : false;
+ if (s) with (filters[f]) { src = s; sizingMethod = m }
+ } else if (s) style.filter = 'progid:'+f+'(src="'+s+'",sizingMethod="'+m+'")';
+}
+
+function doFix() {
+ if ((parseFloat(navigator.userAgent.match(/MSIE (\S+)/)[1]) < 5.5) ||
+ (event && !/(background|src)/.test(event.propertyName))) return;
+
+ if (tagName == 'IMG') {
+ if ((/\.png$/i).test(src)) {
+ filt(src, 'image'); // was 'scale'
+ src = blankImg;
+ } else if (src.indexOf(blankImg) < 0) filt();
+ } else if (style.backgroundImage) {
+ if (style.backgroundImage.match(/^url[("']+(.*\.png)[)"']+$/i)) {
+ var s = RegExp.$1;
+ style.backgroundImage = '';
+ filt(s, 'crop');
+ } else filt();
+ }
+}
+
+doFix();
+
+</script>
+</public:component>
\ No newline at end of file diff --git a/src/ui/default/opera.css b/src/ui/default/opera.css new file mode 100644 index 000000000..9e9d2a3c5 --- /dev/null +++ b/src/ui/default/opera.css @@ -0,0 +1,7 @@ +/* DO NOT CHANGE THESE unless you really want to break Opera Show */ +.slide { + visibility: visible !important; + position: static !important; + page-break-before: always; +} +#slide0 {page-break-before: avoid;} diff --git a/src/ui/default/outline.css b/src/ui/default/outline.css new file mode 100644 index 000000000..62db519ed --- /dev/null +++ b/src/ui/default/outline.css @@ -0,0 +1,15 @@ +/* don't change this unless you want the layout stuff to show up in the outline view! */ + +.layout div, #footer *, #controlForm * {display: none;} +#footer, #controls, #controlForm, #navLinks, #toggle { + display: block; visibility: visible; margin: 0; padding: 0;} +#toggle {float: right; padding: 0.5em;} +html>body #toggle {position: fixed; top: 0; right: 0;} + +/* making the outline look pretty-ish */ + +#slide0 h1, #slide0 h2, #slide0 h3, #slide0 h4 {border: none; margin: 0;} +#slide0 h1 {padding-top: 1.5em;} +.slide h1 {margin: 1.5em 0 0; padding-top: 0.25em; + border-top: 1px solid #888; border-bottom: 1px solid #AAA;} +#toggle {border: 1px solid; border-width: 0 0 1px 1px; background: #FFF;} diff --git a/src/ui/default/pretty.css b/src/ui/default/pretty.css new file mode 100644 index 000000000..3d3acefff --- /dev/null +++ b/src/ui/default/pretty.css @@ -0,0 +1,86 @@ +/* Following are the presentation styles -- edit away! */ + +body {background: #FFF url(bodybg.gif) -16px 0 no-repeat; color: #000; font-size: 2em;} +:link, :visited {text-decoration: none; color: #00C;} +#controls :active {color: #88A !important;} +#controls :focus {outline: 1px dotted #227;} +h1, h2, h3, h4 {font-size: 100%; margin: 0; padding: 0; font-weight: inherit;} +ul, pre {margin: 0; line-height: 1em;} +html, body {margin: 0; padding: 0;} + +blockquote, q {font-style: italic;} +blockquote {padding: 0 2em 0.5em; margin: 0 1.5em 0.5em; text-align: center; font-size: 1em;} +blockquote p {margin: 0;} +blockquote i {font-style: normal;} +blockquote b {display: block; margin-top: 0.5em; font-weight: normal; font-size: smaller; font-style: normal;} +blockquote b i {font-style: italic;} + +kbd {font-weight: bold; font-size: 1em;} +sup {font-size: smaller; line-height: 1px;} + +.slide code {padding: 2px 0.25em; font-weight: bold; color: #533;} +.slide code.bad, code del {color: red;} +.slide code.old {color: silver;} +.slide pre {padding: 0; margin: 0.25em 0 0.5em 0.5em; color: #533; font-size: 90%;} +.slide pre code {display: block;} +.slide ul {margin-left: 5%; margin-right: 7%; list-style: disc;} +.slide li {margin-top: 0.75em; margin-right: 0;} +.slide ul ul {line-height: 1;} +.slide ul ul li {margin: .2em; font-size: 85%; list-style: square;} +.slide img.leader {display: block; margin: 0 auto;} + +div#header, div#footer {background: #005; color: #AAB; + font-family: Verdana, Helvetica, sans-serif;} +div#header {background: #005 url(bodybg.gif) -16px 0 no-repeat; + line-height: 1px;} +div#footer {font-size: 0.5em; font-weight: bold; padding: 1em 0;} +#footer h1, #footer h2 {display: block; padding: 0 1em;} +#footer h2 {font-style: italic;} + +div.long {font-size: 0.75em;} +.slide h1 {position: absolute; top: 0.7em; left: 87px; z-index: 1; + margin: 0; padding: 0.3em 0 0 50px; white-space: nowrap; + font: bold 150%/1em Helvetica, sans-serif; text-transform: capitalize; + color: #DDE; background: #005;} +.slide h3 {font-size: 130%;} +h1 abbr {font-variant: small-caps;} + +div#controls {position: absolute; left: 50%; bottom: 0; + width: 50%; + text-align: right; font: bold 0.9em Verdana, Helvetica, sans-serif;} +html>body div#controls {position: fixed; padding: 0 0 1em 0; + top: auto;} +div#controls form {position: absolute; bottom: 0; right: 0; width: 100%; + margin: 0; padding: 0;} +#controls #navLinks a {padding: 0; margin: 0 0.5em; + background: #005; border: none; color: #779; + cursor: pointer;} +#controls #navList {height: 1em;} +#controls #navList #jumplist {position: absolute; bottom: 0; right: 0; background: #DDD; color: #227;} + +#currentSlide {text-align: center; font-size: 0.5em; color: #449;} + +#slide0 {padding-top: 3.5em; font-size: 90%;} +#slide0 h1 {position: static; margin: 1em 0 0; padding: 0; + font: bold 2em Helvetica, sans-serif; white-space: normal; + color: #000; background: transparent;} +#slide0 h2 {font: bold italic 1em Helvetica, sans-serif; margin: 0.25em;} +#slide0 h3 {margin-top: 1.5em; font-size: 1.5em;} +#slide0 h4 {margin-top: 0; font-size: 1em;} + +ul.urls {list-style: none; display: inline; margin: 0;} +.urls li {display: inline; margin: 0;} +.note {display: none;} +.external {border-bottom: 1px dotted gray;} +html>body .external {border-bottom: none;} +.external:after {content: " \274F"; font-size: smaller; color: #77B;} + +.incremental, .incremental *, .incremental *:after {color: #DDE; visibility: visible;} +img.incremental {visibility: hidden;} +.slide .current {color: #B02;} + + +/* diagnostics + +li:after {content: " [" attr(class) "]"; color: #F88;} + */
\ No newline at end of file diff --git a/src/ui/default/print.css b/src/ui/default/print.css new file mode 100644 index 000000000..4a3554ddd --- /dev/null +++ b/src/ui/default/print.css @@ -0,0 +1,24 @@ +/* The following rule is necessary to have all slides appear in print! DO NOT REMOVE IT! */ +.slide, ul {page-break-inside: avoid; visibility: visible !important;} +h1 {page-break-after: avoid;} + +body {font-size: 12pt; background: white;} +* {color: black;} + +#slide0 h1 {font-size: 200%; border: none; margin: 0.5em 0 0.25em;} +#slide0 h3 {margin: 0; padding: 0;} +#slide0 h4 {margin: 0 0 0.5em; padding: 0;} +#slide0 {margin-bottom: 3em;} + +h1 {border-top: 2pt solid gray; border-bottom: 1px dotted silver;} +.extra {background: transparent !important;} +div.extra, pre.extra, .example {font-size: 10pt; color: #333;} +ul.extra a {font-weight: bold;} +p.example {display: none;} + +#header {display: none;} +#footer h1 {margin: 0; border-bottom: 1px solid; color: gray; font-style: italic;} +#footer h2, #controls {display: none;} + +/* The following rule keeps the layout stuff out of print. Remove at your own risk! */ +.layout, .layout * {display: none !important;} diff --git a/src/ui/default/s5-core.css b/src/ui/default/s5-core.css new file mode 100644 index 000000000..86444e041 --- /dev/null +++ b/src/ui/default/s5-core.css @@ -0,0 +1,9 @@ +/* Do not edit or override these styles! The system will likely break if you do. */ + +div#header, div#footer, div#controls, .slide {position: absolute;} +html>body div#header, html>body div#footer, + html>body div#controls, html>body .slide {position: fixed;} +.handout {display: none;} +.layout {display: block;} +.slide, .hideme, .incremental {visibility: hidden;} +#slide0 {visibility: visible;} diff --git a/src/ui/default/slides.css b/src/ui/default/slides.css new file mode 100644 index 000000000..0786d7dbd --- /dev/null +++ b/src/ui/default/slides.css @@ -0,0 +1,3 @@ +@import url(s5-core.css); /* required to make the slide show run at all */ +@import url(framing.css); /* sets basic placement and size of slide components */ +@import url(pretty.css); /* stuff that makes the slides look better than blah */
\ No newline at end of file diff --git a/src/ui/default/slides.js b/src/ui/default/slides.js new file mode 100644 index 000000000..38fe8531c --- /dev/null +++ b/src/ui/default/slides.js @@ -0,0 +1,553 @@ +// S5 v1.1 slides.js -- released into the Public Domain +// +// Please see http://www.meyerweb.com/eric/tools/s5/credits.html for information +// about all the wonderful and talented contributors to this code! + +var undef; +var slideCSS = ''; +var snum = 0; +var smax = 1; +var incpos = 0; +var number = undef; +var s5mode = true; +var defaultView = 'slideshow'; +var controlVis = 'visible'; + +var isIE = navigator.appName == 'Microsoft Internet Explorer' && navigator.userAgent.indexOf('Opera') < 1 ? 1 : 0; +var isOp = navigator.userAgent.indexOf('Opera') > -1 ? 1 : 0; +var isGe = navigator.userAgent.indexOf('Gecko') > -1 && navigator.userAgent.indexOf('Safari') < 1 ? 1 : 0; + +function hasClass(object, className) { + if (!object.className) return false; + return (object.className.search('(^|\\s)' + className + '(\\s|$)') != -1); +} + +function hasValue(object, value) { + if (!object) return false; + return (object.search('(^|\\s)' + value + '(\\s|$)') != -1); +} + +function removeClass(object,className) { + if (!object) return; + object.className = object.className.replace(new RegExp('(^|\\s)'+className+'(\\s|$)'), RegExp.$1+RegExp.$2); +} + +function addClass(object,className) { + if (!object || hasClass(object, className)) return; + if (object.className) { + object.className += ' '+className; + } else { + object.className = className; + } +} + +function GetElementsWithClassName(elementName,className) { + var allElements = document.getElementsByTagName(elementName); + var elemColl = new Array(); + for (var i = 0; i< allElements.length; i++) { + if (hasClass(allElements[i], className)) { + elemColl[elemColl.length] = allElements[i]; + } + } + return elemColl; +} + +function isParentOrSelf(element, id) { + if (element == null || element.nodeName=='BODY') return false; + else if (element.id == id) return true; + else return isParentOrSelf(element.parentNode, id); +} + +function nodeValue(node) { + var result = ""; + if (node.nodeType == 1) { + var children = node.childNodes; + for (var i = 0; i < children.length; ++i) { + result += nodeValue(children[i]); + } + } + else if (node.nodeType == 3) { + result = node.nodeValue; + } + return(result); +} + +function slideLabel() { + var slideColl = GetElementsWithClassName('*','slide'); + var list = document.getElementById('jumplist'); + smax = slideColl.length; + for (var n = 0; n < smax; n++) { + var obj = slideColl[n]; + + var did = 'slide' + n.toString(); + obj.setAttribute('id',did); + if (isOp) continue; + + var otext = ''; + var menu = obj.firstChild; + if (!menu) continue; // to cope with empty slides + while (menu && menu.nodeType == 3) { + menu = menu.nextSibling; + } + if (!menu) continue; // to cope with slides with only text nodes + + var menunodes = menu.childNodes; + for (var o = 0; o < menunodes.length; o++) { + otext += nodeValue(menunodes[o]); + } + list.options[list.length] = new Option(n + ' : ' + otext, n); + } +} + +function currentSlide() { + var cs; + if (document.getElementById) { + cs = document.getElementById('currentSlide'); + } else { + cs = document.currentSlide; + } + cs.innerHTML = '<span id="csHere">' + snum + '<\/span> ' + + '<span id="csSep">\/<\/span> ' + + '<span id="csTotal">' + (smax-1) + '<\/span>'; + if (snum == 0) { + cs.style.visibility = 'hidden'; + } else { + cs.style.visibility = 'visible'; + } +} + +function go(step) { + if (document.getElementById('slideProj').disabled || step == 0) return; + var jl = document.getElementById('jumplist'); + var cid = 'slide' + snum; + var ce = document.getElementById(cid); + if (incrementals[snum].length > 0) { + for (var i = 0; i < incrementals[snum].length; i++) { + removeClass(incrementals[snum][i], 'current'); + removeClass(incrementals[snum][i], 'incremental'); + } + } + if (step != 'j') { + snum += step; + lmax = smax - 1; + if (snum > lmax) snum = lmax; + if (snum < 0) snum = 0; + } else + snum = parseInt(jl.value); + var nid = 'slide' + snum; + var ne = document.getElementById(nid); + if (!ne) { + ne = document.getElementById('slide0'); + snum = 0; + } + if (step < 0) {incpos = incrementals[snum].length} else {incpos = 0;} + if (incrementals[snum].length > 0 && incpos == 0) { + for (var i = 0; i < incrementals[snum].length; i++) { + if (hasClass(incrementals[snum][i], 'current')) + incpos = i + 1; + else + addClass(incrementals[snum][i], 'incremental'); + } + } + if (incrementals[snum].length > 0 && incpos > 0) + addClass(incrementals[snum][incpos - 1], 'current'); + ce.style.visibility = 'hidden'; + ne.style.visibility = 'visible'; + jl.selectedIndex = snum; + currentSlide(); + number = 0; +} + +function goTo(target) { + if (target >= smax || target == snum) return; + go(target - snum); +} + +function subgo(step) { + if (step > 0) { + removeClass(incrementals[snum][incpos - 1],'current'); + removeClass(incrementals[snum][incpos], 'incremental'); + addClass(incrementals[snum][incpos],'current'); + incpos++; + } else { + incpos--; + removeClass(incrementals[snum][incpos],'current'); + addClass(incrementals[snum][incpos], 'incremental'); + addClass(incrementals[snum][incpos - 1],'current'); + } +} + +function toggle() { + var slideColl = GetElementsWithClassName('*','slide'); + var slides = document.getElementById('slideProj'); + var outline = document.getElementById('outlineStyle'); + if (!slides.disabled) { + slides.disabled = true; + outline.disabled = false; + s5mode = false; + fontSize('1em'); + for (var n = 0; n < smax; n++) { + var slide = slideColl[n]; + slide.style.visibility = 'visible'; + } + } else { + slides.disabled = false; + outline.disabled = true; + s5mode = true; + fontScale(); + for (var n = 0; n < smax; n++) { + var slide = slideColl[n]; + slide.style.visibility = 'hidden'; + } + slideColl[snum].style.visibility = 'visible'; + } +} + +function showHide(action) { + var obj = GetElementsWithClassName('*','hideme')[0]; + switch (action) { + case 's': obj.style.visibility = 'visible'; break; + case 'h': obj.style.visibility = 'hidden'; break; + case 'k': + if (obj.style.visibility != 'visible') { + obj.style.visibility = 'visible'; + } else { + obj.style.visibility = 'hidden'; + } + break; + } +} + +// 'keys' code adapted from MozPoint (http://mozpoint.mozdev.org/) +function keys(key) { + if (!key) { + key = event; + key.which = key.keyCode; + } + if (key.which == 84) { + toggle(); + return; + } + if (s5mode) { + switch (key.which) { + case 10: // return + case 13: // enter + if (window.event && isParentOrSelf(window.event.srcElement, 'controls')) return; + if (key.target && isParentOrSelf(key.target, 'controls')) return; + if(number != undef) { + goTo(number); + break; + } + case 32: // spacebar + case 34: // page down + case 39: // rightkey + case 40: // downkey + if(number != undef) { + go(number); + } else if (!incrementals[snum] || incpos >= incrementals[snum].length) { + go(1); + } else { + subgo(1); + } + break; + case 33: // page up + case 37: // leftkey + case 38: // upkey + if(number != undef) { + go(-1 * number); + } else if (!incrementals[snum] || incpos <= 0) { + go(-1); + } else { + subgo(-1); + } + break; + case 36: // home + goTo(0); + break; + case 35: // end + goTo(smax-1); + break; + case 67: // c + showHide('k'); + break; + } + if (key.which < 48 || key.which > 57) { + number = undef; + } else { + if (window.event && isParentOrSelf(window.event.srcElement, 'controls')) return; + if (key.target && isParentOrSelf(key.target, 'controls')) return; + number = (((number != undef) ? number : 0) * 10) + (key.which - 48); + } + } + return false; +} + +function clicker(e) { + number = undef; + var target; + if (window.event) { + target = window.event.srcElement; + e = window.event; + } else target = e.target; + if (target.getAttribute('href') != null || hasValue(target.rel, 'external') || isParentOrSelf(target, 'controls') || isParentOrSelf(target,'embed') || isParentOrSelf(target,'object')) return true; + if (!e.which || e.which == 1) { + if (!incrementals[snum] || incpos >= incrementals[snum].length) { + go(1); + } else { + subgo(1); + } + } +} + +function findSlide(hash) { + var target = null; + var slides = GetElementsWithClassName('*','slide'); + for (var i = 0; i < slides.length; i++) { + var targetSlide = slides[i]; + if ( (targetSlide.name && targetSlide.name == hash) + || (targetSlide.id && targetSlide.id == hash) ) { + target = targetSlide; + break; + } + } + while(target != null && target.nodeName != 'BODY') { + if (hasClass(target, 'slide')) { + return parseInt(target.id.slice(5)); + } + target = target.parentNode; + } + return null; +} + +function slideJump() { + if (window.location.hash == null) return; + var sregex = /^#slide(\d+)$/; + var matches = sregex.exec(window.location.hash); + var dest = null; + if (matches != null) { + dest = parseInt(matches[1]); + } else { + dest = findSlide(window.location.hash.slice(1)); + } + if (dest != null) + go(dest - snum); +} + +function fixLinks() { + var thisUri = window.location.href; + thisUri = thisUri.slice(0, thisUri.length - window.location.hash.length); + var aelements = document.getElementsByTagName('A'); + for (var i = 0; i < aelements.length; i++) { + var a = aelements[i].href; + var slideID = a.match('\#slide[0-9]{1,2}'); + if ((slideID) && (slideID[0].slice(0,1) == '#')) { + var dest = findSlide(slideID[0].slice(1)); + if (dest != null) { + if (aelements[i].addEventListener) { + aelements[i].addEventListener("click", new Function("e", + "if (document.getElementById('slideProj').disabled) return;" + + "go("+dest+" - snum); " + + "if (e.preventDefault) e.preventDefault();"), true); + } else if (aelements[i].attachEvent) { + aelements[i].attachEvent("onclick", new Function("", + "if (document.getElementById('slideProj').disabled) return;" + + "go("+dest+" - snum); " + + "event.returnValue = false;")); + } + } + } + } +} + +function externalLinks() { + if (!document.getElementsByTagName) return; + var anchors = document.getElementsByTagName('a'); + for (var i=0; i<anchors.length; i++) { + var anchor = anchors[i]; + if (anchor.getAttribute('href') && hasValue(anchor.rel, 'external')) { + anchor.target = '_blank'; + addClass(anchor,'external'); + } + } +} + +function createControls() { + var controlsDiv = document.getElementById("controls"); + if (!controlsDiv) return; + var hider = ' onmouseover="showHide(\'s\');" onmouseout="showHide(\'h\');"'; + var hideDiv, hideList = ''; + if (controlVis == 'hidden') { + hideDiv = hider; + } else { + hideList = hider; + } + controlsDiv.innerHTML = '<form action="#" id="controlForm"' + hideDiv + '>' + + '<div id="navLinks">' + + '<a accesskey="t" id="toggle" href="javascript:toggle();">Ø<\/a>' + + '<a accesskey="z" id="prev" href="javascript:go(-1);">«<\/a>' + + '<a accesskey="x" id="next" href="javascript:go(1);">»<\/a>' + + '<div id="navList"' + hideList + '><select id="jumplist" onchange="go(\'j\');"><\/select><\/div>' + + '<\/div><\/form>'; + if (controlVis == 'hidden') { + var hidden = document.getElementById('navLinks'); + } else { + var hidden = document.getElementById('jumplist'); + } + addClass(hidden,'hideme'); +} + +function fontScale() { // causes layout problems in FireFox that get fixed if browser's Reload is used; same may be true of other Gecko-based browsers + if (!s5mode) return false; + var vScale = 22; // both yield 32 (after rounding) at 1024x768 + var hScale = 32; // perhaps should auto-calculate based on theme's declared value? + if (window.innerHeight) { + var vSize = window.innerHeight; + var hSize = window.innerWidth; + } else if (document.documentElement.clientHeight) { + var vSize = document.documentElement.clientHeight; + var hSize = document.documentElement.clientWidth; + } else if (document.body.clientHeight) { + var vSize = document.body.clientHeight; + var hSize = document.body.clientWidth; + } else { + var vSize = 700; // assuming 1024x768, minus chrome and such + var hSize = 1024; // these do not account for kiosk mode or Opera Show + } + var newSize = Math.min(Math.round(vSize/vScale),Math.round(hSize/hScale)); + fontSize(newSize + 'px'); + if (isGe) { // hack to counter incremental reflow bugs + var obj = document.getElementsByTagName('body')[0]; + obj.style.display = 'none'; + obj.style.display = 'block'; + } +} + +function fontSize(value) { + if (!(s5ss = document.getElementById('s5ss'))) { + if (!isIE) { + document.getElementsByTagName('head')[0].appendChild(s5ss = document.createElement('style')); + s5ss.setAttribute('media','screen, projection'); + s5ss.setAttribute('id','s5ss'); + } else { + document.createStyleSheet(); + document.s5ss = document.styleSheets[document.styleSheets.length - 1]; + } + } + if (!isIE) { + while (s5ss.lastChild) s5ss.removeChild(s5ss.lastChild); + s5ss.appendChild(document.createTextNode('body {font-size: ' + value + ' !important;}')); + } else { + document.s5ss.addRule('body','font-size: ' + value + ' !important;'); + } +} + +function notOperaFix() { + slideCSS = document.getElementById('slideProj').href; + var slides = document.getElementById('slideProj'); + var outline = document.getElementById('outlineStyle'); + slides.setAttribute('media','screen'); + outline.disabled = true; + if (isGe) { + slides.setAttribute('href','null'); // Gecko fix + slides.setAttribute('href',slideCSS); // Gecko fix + } + if (isIE && document.styleSheets && document.styleSheets[0]) { + document.styleSheets[0].addRule('img', 'behavior: url(ui/default/iepngfix.htc)'); + document.styleSheets[0].addRule('div', 'behavior: url(ui/default/iepngfix.htc)'); + document.styleSheets[0].addRule('.slide', 'behavior: url(ui/default/iepngfix.htc)'); + } +} + +function getIncrementals(obj) { + var incrementals = new Array(); + if (!obj) + return incrementals; + var children = obj.childNodes; + for (var i = 0; i < children.length; i++) { + var child = children[i]; + if (hasClass(child, 'incremental')) { + if (child.nodeName == 'OL' || child.nodeName == 'UL') { + removeClass(child, 'incremental'); + for (var j = 0; j < child.childNodes.length; j++) { + if (child.childNodes[j].nodeType == 1) { + addClass(child.childNodes[j], 'incremental'); + } + } + } else { + incrementals[incrementals.length] = child; + removeClass(child,'incremental'); + } + } + if (hasClass(child, 'show-first')) { + if (child.nodeName == 'OL' || child.nodeName == 'UL') { + removeClass(child, 'show-first'); + if (child.childNodes[isGe].nodeType == 1) { + removeClass(child.childNodes[isGe], 'incremental'); + } + } else { + incrementals[incrementals.length] = child; + } + } + incrementals = incrementals.concat(getIncrementals(child)); + } + return incrementals; +} + +function createIncrementals() { + var incrementals = new Array(); + for (var i = 0; i < smax; i++) { + incrementals[i] = getIncrementals(document.getElementById('slide'+i)); + } + return incrementals; +} + +function defaultCheck() { + var allMetas = document.getElementsByTagName('meta'); + for (var i = 0; i< allMetas.length; i++) { + if (allMetas[i].name == 'defaultView') { + defaultView = allMetas[i].content; + } + if (allMetas[i].name == 'controlVis') { + controlVis = allMetas[i].content; + } + } +} + +// Key trap fix, new function body for trap() +function trap(e) { + if (!e) { + e = event; + e.which = e.keyCode; + } + try { + modifierKey = e.ctrlKey || e.altKey || e.metaKey; + } + catch(e) { + modifierKey = false; + } + return modifierKey || e.which == 0; +} + +function startup() { + defaultCheck(); + if (!isOp) + createControls(); + slideLabel(); + fixLinks(); + externalLinks(); + fontScale(); + if (!isOp) { + notOperaFix(); + incrementals = createIncrementals(); + slideJump(); + if (defaultView == 'outline') { + toggle(); + } + document.onkeyup = keys; + document.onkeypress = trap; + document.onclick = clicker; + } +} + +window.onload = startup; +window.onresize = function(){setTimeout('fontScale()', 50);}
\ No newline at end of file diff --git a/src/wrappers/common.sh b/src/wrappers/common.sh new file mode 100644 index 000000000..9605f5940 --- /dev/null +++ b/src/wrappers/common.sh @@ -0,0 +1,43 @@ +THIS=${0##*/} + +NEWLINE=' +' + +err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; } +errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; } + +usage () { + err "$1 - $2" # short description + err "See the $1(1) man page for usage." +} + +# Portable which(1). +pathfind () { + oldifs="$IFS"; IFS=':' + for _p in $PATH; do + if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then + IFS="$oldifs" + return 0 + fi + done + IFS="$oldifs" + return 1 +} + +for p in pandoc $REQUIRED; do + pathfind $p || { + err "You need '$p' to use this program!" + exit 1 + } +done + +CONF=$(pandoc --dump-args "$@" 2>&1) || { + errcode=$? + echo "$CONF" | sed -e '/^pandoc \[OPTIONS\] \[FILES\]/,$d' >&2 + [ $errcode -eq 2 ] && usage "$THIS" "$SYNOPSIS" + exit $errcode +} + +OUTPUT=$(echo "$CONF" | sed -ne '1p') +ARGS=$(echo "$CONF" | sed -e '1d') + diff --git a/src/wrappers/hsmarkdown.in b/src/wrappers/hsmarkdown.in new file mode 100644 index 000000000..17f970234 --- /dev/null +++ b/src/wrappers/hsmarkdown.in @@ -0,0 +1,5 @@ +#!/bin/sh +# hsmarkdown - intended as a drop-in replacement for Markdown.pl. +# Uses pandoc to convert from markdown to HTML, using --strict mode +# for maximum compatibility with official markdown syntax. +exec pandoc --from markdown --to html --strict -- "$@" diff --git a/src/wrappers/html2markdown.in b/src/wrappers/html2markdown.in new file mode 100644 index 000000000..0f4297128 --- /dev/null +++ b/src/wrappers/html2markdown.in @@ -0,0 +1,162 @@ +#!/bin/sh -e +# converts HTML from a URL, file, or stdin to markdown +# uses an available program to fetch URL and tidy to normalize it first + +REQUIRED="tidy" +SYNOPSIS="converts HTML from a URL, file, or STDIN to markdown-formatted text." + +### common.sh + +grab_url_with () { + url="${1:?internal error: grab_url_with: url required}" + + shift + cmdline="$@" + + prog= + prog_opts= + if [ -n "$cmdline" ]; then + eval "set -- $cmdline" + prog=$1 + shift + prog_opts="$@" + fi + + if [ -z "$prog" ]; then + # Locate a sensible web grabber (note the order). + for p in wget lynx w3m curl links w3c; do + if pathfind $p; then + prog=$p + break + fi + done + + [ -n "$prog" ] || { + errn "$THIS: Couldn't find a program to fetch the file from URL " + err "(e.g. wget, w3m, lynx, w3c, or curl)." + return 1 + } + else + pathfind "$prog" || { + err "$THIS: No such web grabber '$prog' found; aborting." + return 1 + } + fi + + # Setup proper base options for known grabbers. + base_opts= + case "$prog" in + wget) base_opts="-O-" ;; + lynx) base_opts="-source" ;; + w3m) base_opts="-dump_source" ;; + curl) base_opts="" ;; + links) base_opts="-source" ;; + w3c) base_opts="-n -get" ;; + *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds." + esac + + err "$THIS: invoking '$prog $base_opts $prog_opts $url'..." + eval "set -- $base_opts $prog_opts" + $prog "$@" "$url" +} + +# Parse command-line arguments +parse_arguments () { + while [ $# -gt 0 ]; do + case "$1" in + --encoding=*) + wholeopt="$1" + # extract encoding from after = + encoding="${wholeopt#*=}" ;; + -e|--encoding|-encoding) + shift + encoding="$1" ;; + --grabber=*) + wholeopt="$1" + # extract encoding from after = + grabber="\"${wholeopt#*=}\"" ;; + -g|--grabber|-grabber) + shift + grabber="$1" ;; + *) + if [ -z "$argument" ]; then + argument="$1" + else + err "Warning: extra argument '$1' will be ignored." + fi ;; + esac + shift + done +} + +argument= +encoding= +grabber= + +oldifs="$IFS" +IFS=$NEWLINE +parse_arguments $ARGS +IFS="$oldifs" + +inurl= +if [ -n "$argument" ] && ! [ -f "$argument" ]; then + # Treat given argument as an URL. + inurl="$argument" +fi + +### tempdir.sh + +if [ -n "$inurl" ]; then + err "Attempting to fetch file from '$inurl'..." + + grabber_out=$THIS_TEMPDIR/grabber.out + grabber_log=$THIS_TEMPDIR/grabber.log + if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then + errn "grab_url_with failed" + if [ -f $grabber_log ]; then + err " with the following error log." + err + cat >&2 $grabber_log + else + err . + fi + exit 1 + fi + + argument="$grabber_out" +fi + +if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then + # Try to determine character encoding if not specified + # and input is not STDIN. + encoding=$( + head "$argument" | + LC_ALL=C tr 'A-Z' 'a-z' | + sed -ne '/<meta .*content-type.*charset=/ { + s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p + }' + ) +fi + +if [ -n "$encoding" ] && pathfind iconv; then + alias to_utf8='iconv -f "$encoding" -t utf-8' +else # assume UTF-8 + alias to_utf8='cat' +fi + +htmlinput=$THIS_TEMPDIR/htmlinput + +if [ -z "$argument" ]; then + to_utf8 > $htmlinput # read from STDIN +elif [ -f "$argument" ]; then + to_utf8 "$argument" > $htmlinput # read from file +else + err "File '$argument' not found." + exit 1 +fi + +if ! cat $htmlinput | pandoc --ignore-args -r html -w markdown "$@" ; then + err "Failed to parse HTML. Trying again with tidy..." + tidy -q -asxhtml -utf8 $htmlinput | \ + pandoc --ignore-args -r html -w markdown "$@" +fi diff --git a/src/wrappers/markdown2pdf.in b/src/wrappers/markdown2pdf.in new file mode 100644 index 000000000..37be69469 --- /dev/null +++ b/src/wrappers/markdown2pdf.in @@ -0,0 +1,81 @@ +#!/bin/sh -e + +REQUIRED="pdflatex" +SYNOPSIS="converts markdown-formatted text to PDF, using pdflatex." + +### common.sh + +### tempdir.sh + +texname=output +logfile=$THIS_TEMPDIR/log + +pandoc -s -r markdown -w latex "$@" -o $THIS_TEMPDIR/$texname.tex + +if [ "$OUTPUT" = "-" ]; then + firstinfile="$(echo $ARGS | sed -ne '1p')" + firstinfilebase="${firstinfile%.*}" + destname="${firstinfilebase:-stdin}.pdf" +else + destname="$OUTPUT" +fi + +( + origdir=$(pwd) + cd $THIS_TEMPDIR + TEXINPUTS=$origdir:$TEXINPUTS: + export TEXINPUTS + finished=no + runs=0 + while [ $finished = "no" ]; do + pdflatex -interaction=batchmode $texname.tex >/dev/null || { + errcode=$? + err "${THIS}: pdfLaTeX failed with error code $errcode" + [ -f $texname.log ] && { + err "${THIS}: error context:" + sed -ne '/^!/,/^[[:space:]]*$/p' \ + -ne '/^[Ll]a[Tt]e[Xx] [Ww]arning/,/^[[:space:]]*$/p' \ + -ne '/^[Ee]rror/,/^[[:space:]]*$/p' $texname.log >&2 + if grep -q "File \`ucs.sty' not found" $texname.log; then + err "${THIS}: Please install the 'unicode' package from CTAN:" + err " http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/" + fi + if grep -q "File \`ulem.sty' not found" $texname.log; then + err "${THIS}: Please install the 'ulem' package from CTAN:" + err " http://www.ctan.org/tex-archive/macros/latex/contrib/misc/ulem.sty" + fi + } + exit $errcode + } + if [ $runs -lt 3 ] && + ((grep -q "LaTeX Warning: There were undefined references." $texname.log) || + (echo "$@" | grep -q -- "--toc\|--table-of-contents")); then + runs=$(($runs + 1)) + if grep -q "LaTeX Warning:.*[Cc]itation" $texname.log; then + bibtex $texname 2>&1 >bibtex.err + if [ $runs -gt 2 ]; then + if grep -q "error message" bibtex.err || + grep -q "Warning" bibtex.err; then + cat bibtex.err >&2 + fi + fi + fi + else + finished=yes + fi + done +) || exit $? + +is_target_exists= +if [ -f "$destname" ]; then + is_target_exists=1 + mv "$destname" "$destname~" +fi + +mv -f $THIS_TEMPDIR/$texname.pdf "$destname" + +errn "Created $destname" +[ -z "$is_target_exists" ] || { + errn " (previous file has been backed up as $destname~)" +} +err . diff --git a/src/wrappers/tempdir.sh b/src/wrappers/tempdir.sh new file mode 100644 index 000000000..f25ae7f51 --- /dev/null +++ b/src/wrappers/tempdir.sh @@ -0,0 +1,18 @@ +# As a security measure refuse to proceed if mktemp is not available. +pathfind mktemp || { err "Couldn't find 'mktemp'; aborting."; exit 1; } + +# Avoid issues with /tmp directory on Windows/Cygwin +cygwin= +cygwin=$(uname | sed -ne '/^CYGWIN/p') +if [ -n "$cygwin" ]; then + TMPDIR=. + export TMPDIR +fi + +THIS_TEMPDIR= +THIS_TEMPDIR="$(mktemp -d -t $THIS.XXXXXXXX)" || exit 1 +readonly THIS_TEMPDIR + +trap 'exitcode=$? + [ -z "$THIS_TEMPDIR" ] || rm -rf "$THIS_TEMPDIR" + exit $exitcode' 0 1 2 3 13 15 |