aboutsummaryrefslogtreecommitdiff
path: root/src/wrappers
diff options
context:
space:
mode:
Diffstat (limited to 'src/wrappers')
-rw-r--r--src/wrappers/checkin.sh7
-rw-r--r--src/wrappers/common.sh27
-rw-r--r--src/wrappers/getopts.sh12
-rw-r--r--src/wrappers/html2markdown.in134
-rw-r--r--src/wrappers/latex2markdown.in14
-rw-r--r--src/wrappers/markdown2html.in12
-rw-r--r--src/wrappers/markdown2latex.in12
-rw-r--r--src/wrappers/markdown2pdf.in68
-rw-r--r--src/wrappers/postopts.sh17
-rw-r--r--src/wrappers/singlearg.sh7
-rw-r--r--src/wrappers/testwrapper.in141
-rw-r--r--src/wrappers/web2markdown.in173
12 files changed, 202 insertions, 422 deletions
diff --git a/src/wrappers/checkin.sh b/src/wrappers/checkin.sh
deleted file mode 100644
index c9c564a23..000000000
--- a/src/wrappers/checkin.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-# Check if input files exist.
-for f; do
- if [ -n "$f" ] && ! [ -f "$f" ]; then
- err "File '$f' not found."
- exit 1
- fi
-done
diff --git a/src/wrappers/common.sh b/src/wrappers/common.sh
index 99a83be50..3481affff 100644
--- a/src/wrappers/common.sh
+++ b/src/wrappers/common.sh
@@ -8,22 +8,6 @@ WRAPPEE_ARGS=
err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; }
errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; }
-usage () {
- synopsis="$@"
- err "Usage: $THIS $synopsis"
- err "See $THIS(1) man file for details."
-}
-
-runpandoc () {
- if [ -n "$WRAPPEE_ARGS" ]; then
- # Unpack arguments that will be passed to pandoc.
- oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPEE_ARGS "$@"; IFS="$oldifs"
- case "$1" in --) shift;; esac # tolerate the existence of a leading '--'
- fi
-
- pandoc "$@"
-}
-
# Portable which(1).
pathfind () {
oldifs="$IFS"; IFS=':'
@@ -37,17 +21,6 @@ pathfind () {
return 1
}
-HAVE_ICONV=
-if pathfind iconv; then
- HAVE_ICONV=1
- alias to_utf8='iconv -t utf-8'
- alias from_utf8='iconv -f utf-8'
-else
- err "Warning: iconv not present. Assuming UTF-8 character encoding."
- alias to_utf8='cat'
- alias from_utf8='cat'
-fi
-
for p in pandoc $REQUIRED; do
pathfind $p || {
err "You need '$p' to use this program!"
diff --git a/src/wrappers/getopts.sh b/src/wrappers/getopts.sh
deleted file mode 100644
index 263263c07..000000000
--- a/src/wrappers/getopts.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-if [ -z "$SYNOPSIS" ]; then
- SYNOPSIS="[-h] [input_file]"
- [ -n "$THIS_NARG" ] || SYNOPSIS="${SYNOPSIS}..."
-fi
-
-while getopts h opt; do
- case $opt in
- h|?) usage "$SYNOPSIS"; exit 2 ;;
- esac
-done
-
-shift $(($OPTIND - 1))
diff --git a/src/wrappers/html2markdown.in b/src/wrappers/html2markdown.in
deleted file mode 100644
index 0fece3ccd..000000000
--- a/src/wrappers/html2markdown.in
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/bin/sh -e
-# converts html to markdown
-# uses an available program to fetch URL and tidy to normalize it first
-
-REQUIRED=tidy
-
-### common.sh
-
-grab_url_with () {
- url="${1:?internal error: grab_url_with: url required}"
-
- shift
- cmdline="$@"
-
- prog=
- prog_opts=
- if [ -n "$cmdline" ]; then
- eval "set -- $cmdline"
- prog=$1
- shift
- prog_opts="$@"
- fi
-
- if [ -z "$prog" ]; then
- # Locate a sensible web grabber (note the order).
- for p in wget lynx w3m curl links w3c; do
- if pathfind $p; then
- prog=$p
- break
- fi
- done
-
- [ -n "$prog" ] || {
- errn "$THIS: Couldn't find a program to fetch the file from URL "
- err "(e.g. wget, w3m, lynx, w3c, or curl)."
- return 1
- }
- else
- pathfind "$prog" || {
- err "$THIS: No such web grabber '$prog' found; aborting."
- return 1
- }
- fi
-
- # Setup proper base options for known grabbers.
- base_opts=
- case "$prog" in
- wget) base_opts="-O-" ;;
- lynx) base_opts="-source" ;;
- w3m) base_opts="-dump_source" ;;
- curl) base_opts="" ;;
- links) base_opts="-source" ;;
- w3c) base_opts="-n -get" ;;
- *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds."
- esac
-
- err "$THIS: invoking '$prog $base_opts $prog_opts $url'..."
- eval "set -- $base_opts $prog_opts"
- $prog "$@" "$url"
-}
-
-encoding=
-grabber=
-nograb=
-while getopts e:g:nh opt; do
- case $opt in
- e) encoding="$OPTARG" ;;
- g) grabber="$OPTARG" ;;
- n) nograb=1 ;;
- h|?)
- usage "[-e encoding] [-g grabber_command] [-n] [-h] [input_file|url]"
- exit 2 ;;
- esac
-done
-
-shift $(($OPTIND - 1))
-
-### postopts.sh
-
-### singlearg.sh
-
-inurl=
-if [ -n "$1" ] && ! [ -f "$1" ]; then
- if [ -n "$nograb" ]; then
- err "'$1' not found; refusing to treat input as URL."
- exit 1
- fi
- # Treat given argument as an URL.
- inurl="$1"
-fi
-
-if [ -n "$inurl" ]; then
- err "Attempting to fetch file from '$inurl'..."
-
- ### tempdir.sh
-
- grabber_out=$THIS_TEMPDIR/grabber.out
- grabber_log=$THIS_TEMPDIR/grabber.log
- if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out \
- 2>$grabber_log; then
- errn "grab_url_with failed"
- if [ -f $grabber_log ]; then
- err " with the following error log."
- err
- cat >&2 $grabber_log
- else
- err .
- fi
- exit 1
- fi
-
- set -- $grabber_out
-fi
-
-if [ -z "$encoding" ] && [ "x$@" != "x" ]; then
- # Try to determine character encoding unless not specified
- # and input is STDIN.
- encoding=$(
- head "$@" |
- LC_ALL=C tr 'A-Z' 'a-z' |
- sed -ne '/<meta .*content-type.*charset=/ {
- s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p
- }'
- )
-fi
-
-if [ -n "$encoding" ] && [ -n "$HAVE_ICONV" ]; then
- alias to_utf8='iconv -f "$encoding" -t utf-8'
-elif [ -n "$inurl" ]; then # assume web pages are UTF-8
- alias to_utf8='cat'
-fi # else just use local encoding
-
-to_utf8 "$@" | tidy -utf8 2>/dev/null |
-runpandoc -r html -w markdown -s | from_utf8
diff --git a/src/wrappers/latex2markdown.in b/src/wrappers/latex2markdown.in
deleted file mode 100644
index e8cde8a97..000000000
--- a/src/wrappers/latex2markdown.in
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/sh -e
-# runs pandoc to convert latex to markdown
-
-### common.sh
-
-### getopts.sh
-
-### postopts.sh
-
-### singlearg.sh
-
-### checkin.sh
-
-to_utf8 "$@" | runpandoc -r latex -w markdown -s | from_utf8
diff --git a/src/wrappers/markdown2html.in b/src/wrappers/markdown2html.in
deleted file mode 100644
index e255398d2..000000000
--- a/src/wrappers/markdown2html.in
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh -e
-# converts markdown to HTML
-
-### common.sh
-
-### getopts.sh
-
-### postopts.sh
-
-### checkin.sh
-
-to_utf8 "$@" | runpandoc | from_utf8
diff --git a/src/wrappers/markdown2latex.in b/src/wrappers/markdown2latex.in
deleted file mode 100644
index c532b2f99..000000000
--- a/src/wrappers/markdown2latex.in
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh -e
-# converts markdown to latex
-
-### common.sh
-
-### getopts.sh
-
-### postopts.sh
-
-### checkin.sh
-
-to_utf8 "$@" | runpandoc -w latex -s | from_utf8
diff --git a/src/wrappers/markdown2pdf.in b/src/wrappers/markdown2pdf.in
index 838767224..c222c1cbd 100644
--- a/src/wrappers/markdown2pdf.in
+++ b/src/wrappers/markdown2pdf.in
@@ -1,64 +1,54 @@
#!/bin/sh -e
-# converts markdown to latex, then uses latex to make a PDF
-REQUIRED=pdflatex
+REQUIRED="markdown2latex pdflatex"
### common.sh
-outfile=
-while getopts o:h opt; do
- case $opt in
- o) outfile="$OPTARG" ;;
- h|?) usage "[-o output_file] [-h] [input_file]..."; exit 2 ;;
- esac
-done
-
-shift $(($OPTIND - 1))
-
-### postopts.sh
+### tempdir.sh
-### checkin.sh
+texname=output
+logfile=$THIS_TEMPDIR/log
-if [ -z "$outfile" ]; then
- if [ -n "$1" ]; then
- outfile="${1%.*}"
- else
- outfile="stdin" # input is STDIN, since no argument given
- fi
+if ! markdown2latex -s -d "$@" >$THIS_TEMPDIR/$texname.tex 2>$logfile; then
+ [ -f $logfile ] && sed -e 's/markdown2latex/markdown2pdf/g' \
+ -e '/^INPUT=/d' -e '/^OUTPUT=/d' $logfile >&2
+ exit 1
fi
-case "$outfile" in
-*.*) ;; # skip appending extension if one is already present
-*) outfile="${outfile%.*}.pdf";;
-esac
-### tempdir.sh
-
-# We should use a filename without white spaces for pdflatex.
-TEXNAME=$THIS
+outfile="$(sed -ne 's/^OUTPUT=//p' $logfile)"
+IFS="$NEWLINE"
+set -- $(sed -ne 's/^INPUT=//p' $logfile)
+firstinfilebase="${1%.*}"
+defaultdest="${firstinfilebase:-stdin}.pdf"
+destname="${outfile:-$defaultdest}"
-to_utf8 "$@" | runpandoc -w latex -s >$THIS_TEMPDIR/$TEXNAME.tex
(
cd $THIS_TEMPDIR
- if ! pdflatex -interaction=batchmode $TEXNAME.tex >/dev/null 2>&1; then
+ if ! pdflatex -interaction=batchmode $texname.tex >/dev/null 2>&1; then
err "LaTeX errors:"
- from_utf8 $TEXNAME.log | sed -ne '/^!/,/^ *$/p' >&2
- if grep -q "File \`ucs.sty' not found" $TEXNAME.log; then
- err "Please install the 'unicode' package from ctan.org."
+ sed -ne '/^!/,/^ *$/p' $texname.log >&2
+ if grep -q "File \`ucs.sty' not found" $texname.log; then
+ err "Please install the 'unicode' package from CTAN:"
+ err "http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"
+ fi
+ if grep -q "File \`fancyvrb.sty' not found" $texname.log; then
+ err "Please install the 'fancyvrb' package from CTAN:"
+ err "http://www.ctan.org/tex-archive/macros/latex/contrib/fancyvrb/"
fi
exit 1
fi
-)
+) || exit $?
is_target_exists=
-if [ -f "$outfile" ]; then
+if [ -f "$destname" ]; then
is_target_exists=1
- mv -f "$outfile" "$outfile~"
+ mv "$destname" "$destname~"
fi
-mv -f $THIS_TEMPDIR/$TEXNAME.pdf "$outfile"
+mv -f $THIS_TEMPDIR/$texname.pdf "$destname"
-errn "Created '$outfile'"
+errn "Created $destname"
[ -z "$is_target_exists" ] || {
- errn " (previous file has been backed up as '$outfile~')"
+ errn " (previous file has been backed up as $destname~)"
}
err .
diff --git a/src/wrappers/postopts.sh b/src/wrappers/postopts.sh
deleted file mode 100644
index e0d015f41..000000000
--- a/src/wrappers/postopts.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-# Parse wrapper and wrappee (pandoc) arguments by taking
-# into account that they may have space or tab characters.
-pick="WRAPPER_ARGS"
-while [ $# -gt 0 ]; do
- if [ "$pick" = "WRAPPER_ARGS" ]; then
- case "$1" in
- -*) pick="WRAPPEE_ARGS" ;;
- esac
- fi
- # Pack args with NEWLINE to preserve spaces,
- # and put them into the picked variable.
- eval "$pick=\"\$${pick}${NEWLINE}${1}\""
- shift
-done
-
-# Unpack filename arguments. Now "$@" will hold the filenames.
-oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPER_ARGS; IFS="$oldifs"
diff --git a/src/wrappers/singlearg.sh b/src/wrappers/singlearg.sh
deleted file mode 100644
index f742d1383..000000000
--- a/src/wrappers/singlearg.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-# Ensure to work with a single argument.
-if [ $# -gt 1 ]; then
- first_arg="$1"
- shift
- err "Warning: extra arguments '$@' will be ignored."
- set -- $first_arg
-fi
diff --git a/src/wrappers/testwrapper.in b/src/wrappers/testwrapper.in
deleted file mode 100644
index e025c87e7..000000000
--- a/src/wrappers/testwrapper.in
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/bin/sh
-
-THIS=$1
-
-ASH="ash -s"
-BASH="bash --posix -s"
-DASH="dash -s"
-KSH="ksh -s"
-POSH="posh -s"
-ZSH="zsh -s"
-
-ERROR=""
-
-wrapper () {
- $SH -- "$@" <<-'EOF'
-### common.sh
-
-outfile=
-while getopts o: opt; do
- case $opt in
- o) outfile="$OPTARG" ;;
- esac
-done
-
-shift $(($OPTIND - 1))
-
-### postopts.sh
-
-echo "Options passed to wrapper:"
-[ -z "$outfile" ] || echo "|$outfile|"
-
-echo "Arguments passed to wrapper:"
-for arg; do
- echo "|$arg|"
-done
-
-pandoc () {
- echo "Arguments passed to wrappee:"
- for arg; do
- echo "|$arg|"
- done
-}
-runpandoc
-EOF
-}
-
-# Portable which(1).
-pathfind () {
- oldifs="$IFS"; IFS=':'
- for _p in $PATH; do
- if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then
- IFS="$oldifs"
- return 0
- fi
- done
- IFS="$oldifs"
- return 1
-}
-
-check_results () {
- if [ "$1" = "$2" ]; then
- echo >&2 ok
- return 0
- else
- echo >&2 failed
- sed "s/^/\t/" >&2 <<EOF
-Command line: '$3'
-===> Expected:
-$2
-<=== Got:
-$1
-EOF
- return 1
- fi
-}
-
-for SH in "$BASH" "$DASH" "$KSH" "$ZSH"; do
- CMD=${SH%% *}
- echo >&2 " Testing with $CMD..."
- if pathfind "$CMD"; then
- if [ "$CMD" = "zsh" ]; then
- # Zsh needs to be called as 'sh' to enable POSIX mode.
- ln -s $(which zsh) ./sh
- SH="./sh ${SH#* }"
- trap 'err=$?; rm -f ./sh; exit $err' 0 1 2 3 13 15
- fi
-
- set -e
-
- # Test 1
- printf >&2 " test case 1... "
- actual=$(wrapper -o "output file" "foo bar" -A "quux baz" -B)
- expected=$(cat <<'EOF'
-Options passed to wrapper:
-|output file|
-Arguments passed to wrapper:
-|foo bar|
-Arguments passed to wrappee:
-|-A|
-|quux baz|
-|-B|
-EOF
-)
- check_results "$actual" "$expected" \
- 'wrapper -o "output file" "foo bar" -A "quux baz" -B'
-
- # Test 2
- printf >&2 " test case 2... "
- actual=$(wrapper -- -A "foo bar")
- expected=$(cat <<'EOF'
-Options passed to wrapper:
-Arguments passed to wrapper:
-Arguments passed to wrappee:
-|-A|
-|foo bar|
-EOF
-)
- check_results "$actual" "$expected" 'wrapper -- -A "foo bar"'
-
- # Test 3 (Test 1 with a redundant '--')
- printf >&2 " test case 3... "
- actual=$(wrapper -o "output file" "foo bar" -- -A "quux baz" -B)
- expected=$(cat <<'EOF'
-Options passed to wrapper:
-|output file|
-Arguments passed to wrapper:
-|foo bar|
-Arguments passed to wrappee:
-|-A|
-|quux baz|
-|-B|
-EOF
-)
- check_results "$actual" "$expected" \
- 'wrapper -o "output file" "foo bar" -- -A "quux baz" -B'
- else
- echo >&2 "Warning: cannot verify correctness with $CMD; shell not available"
- fi
-done
-
-exit 0
diff --git a/src/wrappers/web2markdown.in b/src/wrappers/web2markdown.in
new file mode 100644
index 000000000..64ff3db9b
--- /dev/null
+++ b/src/wrappers/web2markdown.in
@@ -0,0 +1,173 @@
+#!/bin/sh -e
+# converts HTML from a URL, file, or stdin to markdown
+# uses an available program to fetch URL and tidy to normalize it first
+
+REQUIRED="tidy html2markdown"
+
+### common.sh
+
+grab_url_with () {
+ url="${1:?internal error: grab_url_with: url required}"
+
+ shift
+ cmdline="$@"
+
+ prog=
+ prog_opts=
+ if [ -n "$cmdline" ]; then
+ eval "set -- $cmdline"
+ prog=$1
+ shift
+ prog_opts="$@"
+ fi
+
+ if [ -z "$prog" ]; then
+ # Locate a sensible web grabber (note the order).
+ for p in wget lynx w3m curl links w3c; do
+ if pathfind $p; then
+ prog=$p
+ break
+ fi
+ done
+
+ [ -n "$prog" ] || {
+ errn "$THIS: Couldn't find a program to fetch the file from URL "
+ err "(e.g. wget, w3m, lynx, w3c, or curl)."
+ return 1
+ }
+ else
+ pathfind "$prog" || {
+ err "$THIS: No such web grabber '$prog' found; aborting."
+ return 1
+ }
+ fi
+
+ # Setup proper base options for known grabbers.
+ base_opts=
+ case "$prog" in
+ wget) base_opts="-O-" ;;
+ lynx) base_opts="-source" ;;
+ w3m) base_opts="-dump_source" ;;
+ curl) base_opts="" ;;
+ links) base_opts="-source" ;;
+ w3c) base_opts="-n -get" ;;
+ *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds."
+ esac
+
+ err "$THIS: invoking '$prog $base_opts $prog_opts $url'..."
+ eval "set -- $base_opts $prog_opts"
+ $prog "$@" "$url"
+}
+
+add_option () {
+ options="$options$NEWLINE$1"
+}
+
+options=
+argument=
+encoding=
+grabber=
+
+# Parse command-line arguments
+while [ $# -gt 0 ]; do
+ case "$1" in
+ -h|--help)
+ html2markdown -h 2>&1 | sed -e 's/html2markdown/web2markdown/' 1>&2
+ err " -e ENCODING, --encoding=ENCODING"
+ err " Specify character encoding of input"
+ err " -g COMMAND, --grabber=COMMAND"
+ err " Specify command to be used to grab contents of URL"
+ exit 0 ;;
+ -v|--version)
+ html2markdown -v
+ exit 0 ;;
+ -e)
+ shift
+ encoding=$1 ;;
+ --encoding=*)
+ wholeopt=$1
+ # extract encoding from after =
+ encoding=${wholeopt#*=} ;;
+ -g)
+ shift
+ grabber=$1 ;;
+ --grabber=*)
+ wholeopt=$1
+ # extract encoding from after =
+ grabber=${wholeopt#*=} ;;
+ -o|--output|-b|--tab-stop|-H|--include-in-header| \
+ -A|--include-after-body|-C|-B|--include-before-body| \
+ -C|--custom-header|-T|--title-prefix)
+ add_option $1
+ shift
+ add_option $1 ;;
+ -*) add_option $1 ;;
+ *)
+ if [ -z "$argument" ]; then
+ argument=$1
+ else
+ err "Warning: extra argument '$1' will be ignored."
+ fi ;;
+ esac
+ shift
+done
+
+# Unpack options. Now "$@" will hold the html2markdown options.
+oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs"
+
+inurl=
+if [ -n "$argument" ] && ! [ -f "$argument" ]; then
+ # Treat given argument as an URL.
+ inurl="$argument"
+fi
+
+if [ -n "$inurl" ]; then
+ err "Attempting to fetch file from '$inurl'..."
+
+ ### tempdir.sh
+
+ grabber_out=$THIS_TEMPDIR/grabber.out
+ grabber_log=$THIS_TEMPDIR/grabber.log
+ if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then
+ errn "grab_url_with failed"
+ if [ -f $grabber_log ]; then
+ err " with the following error log."
+ err
+ cat >&2 $grabber_log
+ else
+ err .
+ fi
+ exit 1
+ fi
+
+ argument="$grabber_out"
+fi
+
+if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then
+ # Try to determine character encoding if not specified
+ # and input is not STDIN.
+ encoding=$(
+ head "$argument" |
+ LC_ALL=C tr 'A-Z' 'a-z' |
+ sed -ne '/<meta .*content-type.*charset=/ {
+ s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p
+ }'
+ )
+fi
+
+if [ -n "$encoding" ] && pathfind iconv; then
+ alias to_utf8='iconv -f "$encoding" -t utf-8'
+else # assume UTF-8
+ alias to_utf8='cat'
+fi
+
+if [ -z "$argument" ]; then
+ tidy -utf8 2>/dev/null | html2markdown "$@"
+else
+ if [ -f "$argument" ]; then
+ to_utf8 "$argument" | tidy -utf8 2>/dev/null | html2markdown "$@"
+ else
+ err "File '$argument' not found."
+ exit 1
+ fi
+fi