diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2006-12-29 18:50:13 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2006-12-29 18:50:13 +0000 |
commit | 3491420b53b03dbc24b6001e4f379fd2fbdbea8d (patch) | |
tree | be9d84d8c60ece73f30189472d3b8a3925565c28 /src/wrappers/web2markdown.in | |
parent | eea359203ac7f861ac9536b39e639f6c65579501 (diff) | |
download | pandoc-3491420b53b03dbc24b6001e4f379fd2fbdbea8d.tar.gz |
+ Changed 'web2markdown' to 'html2markdown'.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@309 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/wrappers/web2markdown.in')
-rw-r--r-- | src/wrappers/web2markdown.in | 176 |
1 files changed, 0 insertions, 176 deletions
diff --git a/src/wrappers/web2markdown.in b/src/wrappers/web2markdown.in deleted file mode 100644 index 89e884c3d..000000000 --- a/src/wrappers/web2markdown.in +++ /dev/null @@ -1,176 +0,0 @@ -#!/bin/sh -e -# converts HTML from a URL, file, or stdin to markdown -# uses an available program to fetch URL and tidy to normalize it first - -REQUIRED="tidy" - -### common.sh - -grab_url_with () { - url="${1:?internal error: grab_url_with: url required}" - - shift - cmdline="$@" - - prog= - prog_opts= - if [ -n "$cmdline" ]; then - eval "set -- $cmdline" - prog=$1 - shift - prog_opts="$@" - fi - - if [ -z "$prog" ]; then - # Locate a sensible web grabber (note the order). - for p in wget lynx w3m curl links w3c; do - if pathfind $p; then - prog=$p - break - fi - done - - [ -n "$prog" ] || { - errn "$THIS: Couldn't find a program to fetch the file from URL " - err "(e.g. wget, w3m, lynx, w3c, or curl)." - return 1 - } - else - pathfind "$prog" || { - err "$THIS: No such web grabber '$prog' found; aborting." - return 1 - } - fi - - # Setup proper base options for known grabbers. - base_opts= - case "$prog" in - wget) base_opts="-O-" ;; - lynx) base_opts="-source" ;; - w3m) base_opts="-dump_source" ;; - curl) base_opts="" ;; - links) base_opts="-source" ;; - w3c) base_opts="-n -get" ;; - *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds." - esac - - err "$THIS: invoking '$prog $base_opts $prog_opts $url'..." - eval "set -- $base_opts $prog_opts" - $prog "$@" "$url" -} - -add_option () { - options="$options$NEWLINE$1" -} - -options= -argument= -encoding= -grabber= - -# Parse command-line arguments -while [ $# -gt 0 ]; do - case "$1" in - -h|--help) - pandoc -h 2>&1 | sed -e 's/pandoc/web2markdown/' \ - -e '/^[[:space:]]*\(-f\|-t\|-S\|-N\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\ - 1>&2 - err " -e ENCODING, --encoding=ENCODING" - err " Specify character encoding of input" - err " -g COMMAND, --grabber=COMMAND" - err " Specify command to be used to grab contents of URL" - exit 0 ;; - -v|--version) - pandoc -v 2>&1 | sed -e 's/pandoc/web2markdown/' 1>&2 - exit 0 ;; - -e) - shift - encoding=$1 ;; - --encoding=*) - wholeopt=$1 - # extract encoding from after = - encoding=${wholeopt#*=} ;; - -g) - shift - grabber=$1 ;; - --grabber=*) - wholeopt=$1 - # extract encoding from after = - grabber=${wholeopt#*=} ;; - -o|--output|-b|--tab-stop|-H|--include-in-header| \ - -A|--include-after-body|-C|-B|--include-before-body| \ - -C|--custom-header|-T|--title-prefix) - add_option $1 - shift - add_option $1 ;; - -*) add_option $1 ;; - *) - if [ -z "$argument" ]; then - argument=$1 - else - err "Warning: extra argument '$1' will be ignored." - fi ;; - esac - shift -done - -# Unpack options. Now "$@" will hold the pandoc options. -oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs" - -inurl= -if [ -n "$argument" ] && ! [ -f "$argument" ]; then - # Treat given argument as an URL. - inurl="$argument" -fi - -if [ -n "$inurl" ]; then - err "Attempting to fetch file from '$inurl'..." - - ### tempdir.sh - - grabber_out=$THIS_TEMPDIR/grabber.out - grabber_log=$THIS_TEMPDIR/grabber.log - if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then - errn "grab_url_with failed" - if [ -f $grabber_log ]; then - err " with the following error log." - err - cat >&2 $grabber_log - else - err . - fi - exit 1 - fi - - argument="$grabber_out" -fi - -if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then - # Try to determine character encoding if not specified - # and input is not STDIN. - encoding=$( - head "$argument" | - LC_ALL=C tr 'A-Z' 'a-z' | - sed -ne '/<meta .*content-type.*charset=/ { - s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p - }' - ) -fi - -if [ -n "$encoding" ] && pathfind iconv; then - alias to_utf8='iconv -f "$encoding" -t utf-8' -else # assume UTF-8 - alias to_utf8='cat' -fi - -if [ -z "$argument" ]; then - tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@" -else - if [ -f "$argument" ]; then - to_utf8 "$argument" | - tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@" - else - err "File '$argument' not found." - exit 1 - fi -fi |