diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-09-15 03:15:27 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-09-15 03:15:27 +0000 |
commit | bf100f82769b194336ef4b92f5a9803f262f0d8b (patch) | |
tree | 2faacef34283b131a4a46411e74e1c3b3c0317a2 /web | |
parent | b5819b8ed6b107a089f55e636c78cac7166ea36d (diff) | |
download | pandoc-bf100f82769b194336ef4b92f5a9803f262f0d8b.tar.gz |
Added security measures to html2x.pl.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1018 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'web')
-rwxr-xr-x | web/html2x.pl | 52 |
1 files changed, 28 insertions, 24 deletions
diff --git a/web/html2x.pl b/web/html2x.pl index 98c23dccc..a034f0e58 100755 --- a/web/html2x.pl +++ b/web/html2x.pl @@ -1,34 +1,38 @@ #!/usr/bin/env perl +use strict; use CGI qw/:standard/; use CGI::Carp 'fatalsToBrowser'; $CGI::POST_MAX=1024 * 100; # max 100K posts $CGI::DISABLE_UPLOADS = 1; # no uploads -if (param('url') && param('format')) { - $options = '--standalone --reference-links'; - $url = param('url'); - $format = param('format') || 'markdown'; - if ($format =~ '^markdown$') { - $options .= ' --strict'; - } - if ($format =~ '^markdown\+$') { - $format = 'markdown'; - } - $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -r html -w $format $options`; - if ($format =~ "rtf") { - $type = "application/rtf" - } else { - $type = "text/plain" - }; - print header(-charset=>"utf8",-type=>"$type"), - $output; -} else { - print start_html(-title=>"html2x"), - h1("Usage"), - p("You have tried to call html2x.pl without the proper parameters."), - p("Please use <a href=\"/pandoc/html2x.html\">this form</a>."), - end_html(); +param('url') && param('format') or die "Missing url and/or format parameters.\n"; + +my $options = '-r html --standalone --reference-links'; +my $url = param('url'); +my $format = param('format') || 'markdown'; +if ($format =~ /^markdown$/) { + $options .= ' --strict'; +} +if ($format =~ /^markdown\+$/) { + $format = 'markdown'; } +# Validate URL and format +unless ($url =~ /^(https?:\/\/)?[\w#?_-]+(\.[\w#?_-]+)+[\w\/#?_.-]*$/) { + die "Illegal URL: $url\n" ; +} +unless ($format =~ /^markdown\+?|rst|latex|context|rtf|man|docbook$/) { + die "Illegal format: $format\n"; +} +my $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -w $format $options`; +if ($output =~ /^\s*$/) { + print start_html, + h1("No output"), + p("Either $url could not be retrieved, or its HTML was too malformed to parse."), + end_html; + exit 0; +} +print header(-charset=>"utf8",-type=>"text/plain"), + $output; |