From df7b68225101966051f8b592a27127bf789eb81e Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Tue, 17 Oct 2006 14:22:29 +0000 Subject: initial import git-svn-id: https://pandoc.googlecode.com/svn/trunk@2 788f1e2b-df1e-0410-8736-df70ead52e1b --- html2markdown | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 html2markdown (limited to 'html2markdown') diff --git a/html2markdown b/html2markdown new file mode 100644 index 000000000..3f9a4857e --- /dev/null +++ b/html2markdown @@ -0,0 +1,39 @@ +#!/bin/sh -e +# converts html to markdown +# uses an available program to fetch URL and tidy to normalize it first + +[ -n "$(which pandoc)" ] || { + echo >&2 "You need 'pandoc' to use this program!" + exit 1 +} +[ -n "$(which tidy)" ] || { + echo >&2 "You need 'tidy' to use this program!" + exit 1 +} + +if [ -z "$1" ] || [ -f $1 ]; then + tidy -utf8 $1 2>/dev/null | pandoc -r html -w markdown -s +else + # Treat given argument as an URL. Locate a + # sensible text based browser (note the order). + for p in wget lynx w3m curl links w3c; do + if which $p >/dev/null; then + DUMPER=$p + break + fi + done + # Setup proper options. + case "$DUMPER" in + wget) OPT="-O-" ;; + lynx) OPT="-source" ;; + w3m) OPT="-dump_source" ;; + curl) OPT="" ;; + links) OPT="-source" ;; + w3c) OPT="-n -get" ;; + "") echo -n >&2 "Needs a program to fetch the URL " + echo -n >&2 "(e.g. wget, w3m, lynx, w3m or curl)." + exit 1 ;; + esac + # Fetch and feed to pandoc. + $DUMPER $OPT $1 2>/dev/null | tidy -utf8 2>/dev/null | pandoc -r html -w markdown -s +fi -- cgit v1.2.3