aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/wrappers/html2markdown.in25
1 files changed, 15 insertions, 10 deletions
diff --git a/src/wrappers/html2markdown.in b/src/wrappers/html2markdown.in
index ad026c24e..0f4297128 100644
--- a/src/wrappers/html2markdown.in
+++ b/src/wrappers/html2markdown.in
@@ -104,11 +104,11 @@ if [ -n "$argument" ] && ! [ -f "$argument" ]; then
inurl="$argument"
fi
+### tempdir.sh
+
if [ -n "$inurl" ]; then
err "Attempting to fetch file from '$inurl'..."
- ### tempdir.sh
-
grabber_out=$THIS_TEMPDIR/grabber.out
grabber_log=$THIS_TEMPDIR/grabber.log
if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then
@@ -144,14 +144,19 @@ else # assume UTF-8
alias to_utf8='cat'
fi
+htmlinput=$THIS_TEMPDIR/htmlinput
+
if [ -z "$argument" ]; then
- tidy -asxhtml -utf8 2>/dev/null | pandoc --ignore-args -r html -w markdown "$@"
+ to_utf8 > $htmlinput # read from STDIN
+elif [ -f "$argument" ]; then
+ to_utf8 "$argument" > $htmlinput # read from file
else
- if [ -f "$argument" ]; then
- to_utf8 "$argument" |
- tidy -asxhtml -utf8 2>/dev/null | pandoc --ignore-args -r html -w markdown "$@"
- else
- err "File '$argument' not found."
- exit 1
- fi
+ err "File '$argument' not found."
+ exit 1
+fi
+
+if ! cat $htmlinput | pandoc --ignore-args -r html -w markdown "$@" ; then
+ err "Failed to parse HTML. Trying again with tidy..."
+ tidy -q -asxhtml -utf8 $htmlinput | \
+ pandoc --ignore-args -r html -w markdown "$@"
fi