aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--web/Makefile4
-rw-r--r--web/demos3
-rwxr-xr-xweb/html2x.pl34
-rw-r--r--web/html2x.txt50
4 files changed, 89 insertions, 2 deletions
diff --git a/web/Makefile b/web/Makefile
index 8d38dcc5d..314573614 100644
--- a/web/Makefile
+++ b/web/Makefile
@@ -1,4 +1,4 @@
-ALL := index.html README.html INSTALL.html examples.html pandoc1.html markdown2pdf1.html html2markdown1.html hsmarkdown1.html
+ALL := index.html README.html INSTALL.html examples.html pandoc1.html markdown2pdf1.html html2markdown1.html hsmarkdown1.html html2x.html
PANDOC_PATH ?= $(dir $(shell which pandoc))
MAKEPAGE = $(PANDOC_PATH)/pandoc -s -S -c pandoc.css -A footer.html
all : $(ALL)
@@ -34,4 +34,4 @@ INSTALL.html : INSTALL
$(MAKEPAGE) $< > $@
upload :
- sitecopy --update macfarlane
+ make -C .. upload
diff --git a/web/demos b/web/demos
index 0b64ce942..840d55c8a 100644
--- a/web/demos
+++ b/web/demos
@@ -72,3 +72,6 @@ click on the name of the output file:
15. A simple wiki program using [HAppS](http://happs.org) and pandoc:
[pandocwiki](http://pandocwiki.googlecode.com/svn/trunk/)
+16. A web application that converts web pages to any of eight different
+ formats: [html2x](html2x.html).
+
diff --git a/web/html2x.pl b/web/html2x.pl
new file mode 100755
index 000000000..98c23dccc
--- /dev/null
+++ b/web/html2x.pl
@@ -0,0 +1,34 @@
+#!/usr/bin/env perl
+use CGI qw/:standard/;
+use CGI::Carp 'fatalsToBrowser';
+
+$CGI::POST_MAX=1024 * 100; # max 100K posts
+$CGI::DISABLE_UPLOADS = 1; # no uploads
+
+if (param('url') && param('format')) {
+ $options = '--standalone --reference-links';
+ $url = param('url');
+ $format = param('format') || 'markdown';
+ if ($format =~ '^markdown$') {
+ $options .= ' --strict';
+ }
+ if ($format =~ '^markdown\+$') {
+ $format = 'markdown';
+ }
+ $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -r html -w $format $options`;
+ if ($format =~ "rtf") {
+ $type = "application/rtf"
+ } else {
+ $type = "text/plain"
+ };
+ print header(-charset=>"utf8",-type=>"$type"),
+ $output;
+} else {
+ print start_html(-title=>"html2x"),
+ h1("Usage"),
+ p("You have tried to call html2x.pl without the proper parameters."),
+ p("Please use <a href=\"/pandoc/html2x.html\">this form</a>."),
+ end_html();
+}
+
+
diff --git a/web/html2x.txt b/web/html2x.txt
new file mode 100644
index 000000000..d803cc337
--- /dev/null
+++ b/web/html2x.txt
@@ -0,0 +1,50 @@
+% html2x
+
+This form uses [pandoc] to convert a web page to markdown,
+reStructuredText, DocBook XML, LaTeX, RTF, or a unix man page.
+
+<form action="/pandoc/html2x.pl" method="get">
+<p>
+<label for="url">URL:</label>
+<input type="text" size="60" name="url" />
+</p>
+<p>
+<label for="format">Format:</label><br/>
+<input type="radio" name="format" value="markdown" checked="checked" />Markdown<br/>
+<input type="radio" name="format" value="markdown+" />Markdown with
+<a href="/pandoc/README.html#pandocs-markdown-vs-standard-markdown">pandoc extensions</a><br/>
+<input type="radio" name="format" value="rst" />reStructuredText<br/>
+<input type="radio" name="format" value="docbook" />DocBook XML<br/>
+<input type="radio" name="format" value="latex" />LaTeX<br/>
+<input type="radio" name="format" value="context" />ConTeXt<br/>
+<input type="radio" name="format" value="rtf" />Rich Text Format (RTF)<br/>
+<input type="radio" name="format" value="man" />Groff man page
+</p>
+<input type="submit" value="Convert" /><br/>
+</form>
+
+Javascript bookmarklets (right-click and add bookmark):
+
+- [2markdown]
+- [2markdown+]
+- [2rst]
+- [2docbook]
+- [2LaTeX]
+- [2ConTeXt]
+- [2RTF]
+- [2man]
+
+(Inspired by [the Asciinator].)
+
+[2markdown]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=markdown&url='+document.location.href;
+[2markdown+]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=markdown+&url='+document.location.href;
+[2rst]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=rst&url='+document.location.href;
+[2docbook]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=docbook&url='+document.location.href;
+[2LaTeX]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=latex&url='+document.location.href;
+[2ConTeXt]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=context&url='+document.location.href;
+[2RTF]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=rtf&url='+document.location.href;
+[2man]: javascript:location.href='http://johnmacfarlane.net/pandoc/html2x.pl?format=man&url='+document.location.href;
+
+[pandoc]: /pandoc/
+[the Asciinator]: http://www.aaronsw.com/2002/html2text/
+