diff options
-rw-r--r-- | web/Makefile | 4 | ||||
-rw-r--r-- | web/demos | 3 | ||||
-rwxr-xr-x | web/html2x.pl | 34 | ||||
-rw-r--r-- | web/html2x.txt | 50 |
4 files changed, 89 insertions, 2 deletions
diff --git a/web/Makefile b/web/Makefile index 8d38dcc5d..314573614 100644 --- a/web/Makefile +++ b/web/Makefile @@ -1,4 +1,4 @@ -ALL := index.html README.html INSTALL.html examples.html pandoc1.html markdown2pdf1.html html2markdown1.html hsmarkdown1.html +ALL := index.html README.html INSTALL.html examples.html pandoc1.html markdown2pdf1.html html2markdown1.html hsmarkdown1.html html2x.html PANDOC_PATH ?= $(dir $(shell which pandoc)) MAKEPAGE = $(PANDOC_PATH)/pandoc -s -S -c pandoc.css -A footer.html all : $(ALL) @@ -34,4 +34,4 @@ INSTALL.html : INSTALL $(MAKEPAGE) $< > $@ upload : - sitecopy --update macfarlane + make -C .. upload @@ -72,3 +72,6 @@ click on the name of the output file: 15. A simple wiki program using [HAppS](http://happs.org) and pandoc: [pandocwiki](http://pandocwiki.googlecode.com/svn/trunk/) +16. A web application that converts web pages to any of eight different + formats: [html2x](html2x.html). + diff --git a/web/html2x.pl b/web/html2x.pl new file mode 100755 index 000000000..98c23dccc --- /dev/null +++ b/web/html2x.pl @@ -0,0 +1,34 @@ +#!/usr/bin/env perl +use CGI qw/:standard/; +use CGI::Carp 'fatalsToBrowser'; + +$CGI::POST_MAX=1024 * 100; # max 100K posts +$CGI::DISABLE_UPLOADS = 1; # no uploads + +if (param('url') && param('format')) { + $options = '--standalone --reference-links'; + $url = param('url'); + $format = param('format') || 'markdown'; + if ($format =~ '^markdown$') { + $options .= ' --strict'; + } + if ($format =~ '^markdown\+$') { + $format = 'markdown'; + } + $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -r html -w $format $options`; + if ($format =~ "rtf") { + $type = "application/rtf" + } else { + $type = "text/plain" + }; + print header(-charset=>"utf8",-type=>"$type"), + $output; +} else { + print start_html(-title=>"html2x"), + h1("Usage"), + p("You have tried to call html2x.pl without the proper parameters."), + p("Please use <a href=\"/pandoc/html2x.html\">this form</a>."), + end_html(); +} + + diff --git a/web/html2x.txt b/web/html2x.txt new file mode 100644 index 000000000..d803cc337 --- /dev/null +++ b/web/html2x.txt @@ -0,0 +1,50 @@ +% html2x + +This form uses [pandoc] to convert a web page to markdown, +reStructuredText, DocBook XML, LaTeX, RTF, or a unix man page. + +<form action="/pandoc/html2x.pl" method="get"> +<p> +<label for="url">URL:</label> +<input type="text" size="60" name="url" /> +</p> +<p> +<label for="format">Format:</label><br/> +<input type="radio" name="format" value="markdown" checked="checked" />Markdown<br/> +<input type="radio" name="format" value="markdown+" />Markdown with +<a href="/pandoc/README.html#pandocs-markdown-vs-standard-markdown">pandoc extensions</a><br/> +<input type="radio" name="format" value="rst" />reStructuredText<br/> +<input type="radio" name="format" value="docbook" />DocBook XML<br/> +<input type="radio" name="format" value="latex" />LaTeX<br/> +<input type="radio" name="format" value="context" />ConTeXt<br/> +<input type="radio" name="format" value="rtf" />Rich Text Format (RTF)<br/> +<input type="radio" name="format" value="man" />Groff man page +</p> +<input type="submit" value="Convert" /><br/> +</form> + +Javascript bookmarklets (right-click and add bookmark): + +- [2markdown] +- [2markdown+] +- [2rst] +- [2docbook] +- [2LaTeX] +- [2ConTeXt] +- [2RTF] +- [2man] + +(Inspired by [the Asciinator].) + +[2markdown]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=markdown&url='+document.location.href; +[2markdown+]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=markdown+&url='+document.location.href; +[2rst]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=rst&url='+document.location.href; +[2docbook]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=docbook&url='+document.location.href; +[2LaTeX]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=latex&url='+document.location.href; +[2ConTeXt]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=context&url='+document.location.href; +[2RTF]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=rtf&url='+document.location.href; +[2man]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=man&url='+document.location.href; + +[pandoc]: /pandoc/ +[the Asciinator]: http://www.aaronsw.com/2002/html2text/ + |