#!/bin/sh -e # converts html to markdown # uses an available program to fetch URL and tidy to normalize it first for p in pandoc tidy; do which $p >/dev/null 2>&1 || { echo >&2 "You need '$p' to use this program!" exit 1 } done if [ -z "$1" ] || [ -f $1 ]; then tidy -utf8 $1 2>/dev/null | \ pandoc $PANDOC_OPTS -r html -w markdown -s | \ iconv -f utf-8 else # Treat given argument as an URL. Locate a # sensible text based browser (note the order). for p in wget lynx w3m curl links w3c; do if which $p >/dev/null 2>&1; then DUMPER=$p break fi done # Setup proper options. case "$DUMPER" in wget) OPT="-O-" ;; lynx) OPT="-source" ;; w3m) OPT="-dump_source" ;; curl) OPT="" ;; links) OPT="-source" ;; w3c) OPT="-n -get" ;; "") printf "Needs a program to fetch the URL " >&2 printf "(e.g. wget, w3m, lynx, w3c, or curl)." >&2 exit 1 ;; esac # Fetch and feed to pandoc. $DUMPER $OPT $1 2>/dev/null | \ tidy -utf8 2>/dev/null | \ pandoc $PANDOC_OPTS -r html -w markdown -s | \ iconv -f utf-8 fi