diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-09-15 21:30:31 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-09-15 21:30:31 +0000 |
commit | 9ed11f4500a04f42d46be8b654f8782d1ef268ec (patch) | |
tree | 653715796a5b05c284d3be5570453454217ee3c1 /web | |
parent | bf100f82769b194336ef4b92f5a9803f262f0d8b (diff) | |
download | pandoc-9ed11f4500a04f42d46be8b654f8782d1ef268ec.tar.gz |
Fixed URL regex in html2x.pl, and added a command to the pipe
to truncate input pages to 100K.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1019 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'web')
-rwxr-xr-x | web/html2x.pl | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/web/html2x.pl b/web/html2x.pl index a034f0e58..43218682d 100755 --- a/web/html2x.pl +++ b/web/html2x.pl @@ -19,14 +19,16 @@ if ($format =~ /^markdown\+$/) { } # Validate URL and format -unless ($url =~ /^(https?:\/\/)?[\w#?_-]+(\.[\w#?_-]+)+[\w\/#?_.-]*$/) { +unless ($url =~ /^(https?:\/\/)?[\w#_-]+(\.[\w#_-]+)+[\w\/#=?_.-]*$/) { die "Illegal URL: $url\n" ; } unless ($format =~ /^markdown\+?|rst|latex|context|rtf|man|docbook$/) { die "Illegal format: $format\n"; } -my $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -w $format $options`; +# Note - pass through head to truncate file to 100K if greater. +# This should prevent certain kinds of DoS attacks. +my $output = `wget -O- $url | head -c100000 | tidy -asxhtml -utf8 | pandoc -w $format $options`; if ($output =~ /^\s*$/) { print start_html, h1("No output"), |