diff options
author | roktas <roktas@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2006-12-12 07:04:09 +0000 |
---|---|---|
committer | roktas <roktas@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2006-12-12 07:04:09 +0000 |
commit | 426cbadfef6c26323faedcab2cd5ea7efa64d1bb (patch) | |
tree | e16afb28eec790226a7b0524b8fb325594232e5c /src | |
parent | 6411ea7466f67f94816c541a22abb7249d36c377 (diff) | |
download | pandoc-426cbadfef6c26323faedcab2cd5ea7efa64d1bb.tar.gz |
Merge changes in branches/wrappers into trunk.
[in trunk] svn merge -r105:HEAD \
https://pandoc.googlecode.com/svn/branches/wrappers
git-svn-id: https://pandoc.googlecode.com/svn/trunk@177 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src')
-rw-r--r-- | src/wrappers/checkin.sh | 7 | ||||
-rw-r--r-- | src/wrappers/common.sh | 56 | ||||
-rw-r--r-- | src/wrappers/getopts.sh | 12 | ||||
-rw-r--r-- | src/wrappers/html2markdown.in | 134 | ||||
-rw-r--r-- | src/wrappers/latex2markdown.in | 14 | ||||
-rw-r--r-- | src/wrappers/markdown2html.in | 12 | ||||
-rw-r--r-- | src/wrappers/markdown2latex.in | 12 | ||||
-rw-r--r-- | src/wrappers/markdown2pdf.in | 64 | ||||
-rw-r--r-- | src/wrappers/postopts.sh | 17 | ||||
-rw-r--r-- | src/wrappers/singlearg.sh | 7 | ||||
-rw-r--r-- | src/wrappers/tempdir.sh | 10 | ||||
-rw-r--r-- | src/wrappers/testwrapper.in | 141 |
12 files changed, 486 insertions, 0 deletions
diff --git a/src/wrappers/checkin.sh b/src/wrappers/checkin.sh new file mode 100644 index 000000000..c9c564a23 --- /dev/null +++ b/src/wrappers/checkin.sh @@ -0,0 +1,7 @@ +# Check if input files exist. +for f; do + if [ -n "$f" ] && ! [ -f "$f" ]; then + err "File '$f' not found." + exit 1 + fi +done diff --git a/src/wrappers/common.sh b/src/wrappers/common.sh new file mode 100644 index 000000000..99a83be50 --- /dev/null +++ b/src/wrappers/common.sh @@ -0,0 +1,56 @@ +THIS=${0##*/} + +NEWLINE=' +' +WRAPPER_ARGS= +WRAPPEE_ARGS= + +err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; } +errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; } + +usage () { + synopsis="$@" + err "Usage: $THIS $synopsis" + err "See $THIS(1) man file for details." +} + +runpandoc () { + if [ -n "$WRAPPEE_ARGS" ]; then + # Unpack arguments that will be passed to pandoc. + oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPEE_ARGS "$@"; IFS="$oldifs" + case "$1" in --) shift;; esac # tolerate the existence of a leading '--' + fi + + pandoc "$@" +} + +# Portable which(1). +pathfind () { + oldifs="$IFS"; IFS=':' + for _p in $PATH; do + if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then + IFS="$oldifs" + return 0 + fi + done + IFS="$oldifs" + return 1 +} + +HAVE_ICONV= +if pathfind iconv; then + HAVE_ICONV=1 + alias to_utf8='iconv -t utf-8' + alias from_utf8='iconv -f utf-8' +else + err "Warning: iconv not present. Assuming UTF-8 character encoding." + alias to_utf8='cat' + alias from_utf8='cat' +fi + +for p in pandoc $REQUIRED; do + pathfind $p || { + err "You need '$p' to use this program!" + exit 1 + } +done diff --git a/src/wrappers/getopts.sh b/src/wrappers/getopts.sh new file mode 100644 index 000000000..263263c07 --- /dev/null +++ b/src/wrappers/getopts.sh @@ -0,0 +1,12 @@ +if [ -z "$SYNOPSIS" ]; then + SYNOPSIS="[-h] [input_file]" + [ -n "$THIS_NARG" ] || SYNOPSIS="${SYNOPSIS}..." +fi + +while getopts h opt; do + case $opt in + h|?) usage "$SYNOPSIS"; exit 2 ;; + esac +done + +shift $(($OPTIND - 1)) diff --git a/src/wrappers/html2markdown.in b/src/wrappers/html2markdown.in new file mode 100644 index 000000000..0fece3ccd --- /dev/null +++ b/src/wrappers/html2markdown.in @@ -0,0 +1,134 @@ +#!/bin/sh -e +# converts html to markdown +# uses an available program to fetch URL and tidy to normalize it first + +REQUIRED=tidy + +### common.sh + +grab_url_with () { + url="${1:?internal error: grab_url_with: url required}" + + shift + cmdline="$@" + + prog= + prog_opts= + if [ -n "$cmdline" ]; then + eval "set -- $cmdline" + prog=$1 + shift + prog_opts="$@" + fi + + if [ -z "$prog" ]; then + # Locate a sensible web grabber (note the order). + for p in wget lynx w3m curl links w3c; do + if pathfind $p; then + prog=$p + break + fi + done + + [ -n "$prog" ] || { + errn "$THIS: Couldn't find a program to fetch the file from URL " + err "(e.g. wget, w3m, lynx, w3c, or curl)." + return 1 + } + else + pathfind "$prog" || { + err "$THIS: No such web grabber '$prog' found; aborting." + return 1 + } + fi + + # Setup proper base options for known grabbers. + base_opts= + case "$prog" in + wget) base_opts="-O-" ;; + lynx) base_opts="-source" ;; + w3m) base_opts="-dump_source" ;; + curl) base_opts="" ;; + links) base_opts="-source" ;; + w3c) base_opts="-n -get" ;; + *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds." + esac + + err "$THIS: invoking '$prog $base_opts $prog_opts $url'..." + eval "set -- $base_opts $prog_opts" + $prog "$@" "$url" +} + +encoding= +grabber= +nograb= +while getopts e:g:nh opt; do + case $opt in + e) encoding="$OPTARG" ;; + g) grabber="$OPTARG" ;; + n) nograb=1 ;; + h|?) + usage "[-e encoding] [-g grabber_command] [-n] [-h] [input_file|url]" + exit 2 ;; + esac +done + +shift $(($OPTIND - 1)) + +### postopts.sh + +### singlearg.sh + +inurl= +if [ -n "$1" ] && ! [ -f "$1" ]; then + if [ -n "$nograb" ]; then + err "'$1' not found; refusing to treat input as URL." + exit 1 + fi + # Treat given argument as an URL. + inurl="$1" +fi + +if [ -n "$inurl" ]; then + err "Attempting to fetch file from '$inurl'..." + + ### tempdir.sh + + grabber_out=$THIS_TEMPDIR/grabber.out + grabber_log=$THIS_TEMPDIR/grabber.log + if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out \ + 2>$grabber_log; then + errn "grab_url_with failed" + if [ -f $grabber_log ]; then + err " with the following error log." + err + cat >&2 $grabber_log + else + err . + fi + exit 1 + fi + + set -- $grabber_out +fi + +if [ -z "$encoding" ] && [ "x$@" != "x" ]; then + # Try to determine character encoding unless not specified + # and input is STDIN. + encoding=$( + head "$@" | + LC_ALL=C tr 'A-Z' 'a-z' | + sed -ne '/<meta .*content-type.*charset=/ { + s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p + }' + ) +fi + +if [ -n "$encoding" ] && [ -n "$HAVE_ICONV" ]; then + alias to_utf8='iconv -f "$encoding" -t utf-8' +elif [ -n "$inurl" ]; then # assume web pages are UTF-8 + alias to_utf8='cat' +fi # else just use local encoding + +to_utf8 "$@" | tidy -utf8 2>/dev/null | +runpandoc -r html -w markdown -s | from_utf8 diff --git a/src/wrappers/latex2markdown.in b/src/wrappers/latex2markdown.in new file mode 100644 index 000000000..e8cde8a97 --- /dev/null +++ b/src/wrappers/latex2markdown.in @@ -0,0 +1,14 @@ +#!/bin/sh -e +# runs pandoc to convert latex to markdown + +### common.sh + +### getopts.sh + +### postopts.sh + +### singlearg.sh + +### checkin.sh + +to_utf8 "$@" | runpandoc -r latex -w markdown -s | from_utf8 diff --git a/src/wrappers/markdown2html.in b/src/wrappers/markdown2html.in new file mode 100644 index 000000000..e255398d2 --- /dev/null +++ b/src/wrappers/markdown2html.in @@ -0,0 +1,12 @@ +#!/bin/sh -e +# converts markdown to HTML + +### common.sh + +### getopts.sh + +### postopts.sh + +### checkin.sh + +to_utf8 "$@" | runpandoc | from_utf8 diff --git a/src/wrappers/markdown2latex.in b/src/wrappers/markdown2latex.in new file mode 100644 index 000000000..c532b2f99 --- /dev/null +++ b/src/wrappers/markdown2latex.in @@ -0,0 +1,12 @@ +#!/bin/sh -e +# converts markdown to latex + +### common.sh + +### getopts.sh + +### postopts.sh + +### checkin.sh + +to_utf8 "$@" | runpandoc -w latex -s | from_utf8 diff --git a/src/wrappers/markdown2pdf.in b/src/wrappers/markdown2pdf.in new file mode 100644 index 000000000..838767224 --- /dev/null +++ b/src/wrappers/markdown2pdf.in @@ -0,0 +1,64 @@ +#!/bin/sh -e +# converts markdown to latex, then uses latex to make a PDF + +REQUIRED=pdflatex + +### common.sh + +outfile= +while getopts o:h opt; do + case $opt in + o) outfile="$OPTARG" ;; + h|?) usage "[-o output_file] [-h] [input_file]..."; exit 2 ;; + esac +done + +shift $(($OPTIND - 1)) + +### postopts.sh + +### checkin.sh + +if [ -z "$outfile" ]; then + if [ -n "$1" ]; then + outfile="${1%.*}" + else + outfile="stdin" # input is STDIN, since no argument given + fi +fi +case "$outfile" in +*.*) ;; # skip appending extension if one is already present +*) outfile="${outfile%.*}.pdf";; +esac + +### tempdir.sh + +# We should use a filename without white spaces for pdflatex. +TEXNAME=$THIS + +to_utf8 "$@" | runpandoc -w latex -s >$THIS_TEMPDIR/$TEXNAME.tex +( + cd $THIS_TEMPDIR + if ! pdflatex -interaction=batchmode $TEXNAME.tex >/dev/null 2>&1; then + err "LaTeX errors:" + from_utf8 $TEXNAME.log | sed -ne '/^!/,/^ *$/p' >&2 + if grep -q "File \`ucs.sty' not found" $TEXNAME.log; then + err "Please install the 'unicode' package from ctan.org." + fi + exit 1 + fi +) + +is_target_exists= +if [ -f "$outfile" ]; then + is_target_exists=1 + mv -f "$outfile" "$outfile~" +fi + +mv -f $THIS_TEMPDIR/$TEXNAME.pdf "$outfile" + +errn "Created '$outfile'" +[ -z "$is_target_exists" ] || { + errn " (previous file has been backed up as '$outfile~')" +} +err . diff --git a/src/wrappers/postopts.sh b/src/wrappers/postopts.sh new file mode 100644 index 000000000..e0d015f41 --- /dev/null +++ b/src/wrappers/postopts.sh @@ -0,0 +1,17 @@ +# Parse wrapper and wrappee (pandoc) arguments by taking +# into account that they may have space or tab characters. +pick="WRAPPER_ARGS" +while [ $# -gt 0 ]; do + if [ "$pick" = "WRAPPER_ARGS" ]; then + case "$1" in + -*) pick="WRAPPEE_ARGS" ;; + esac + fi + # Pack args with NEWLINE to preserve spaces, + # and put them into the picked variable. + eval "$pick=\"\$${pick}${NEWLINE}${1}\"" + shift +done + +# Unpack filename arguments. Now "$@" will hold the filenames. +oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPER_ARGS; IFS="$oldifs" diff --git a/src/wrappers/singlearg.sh b/src/wrappers/singlearg.sh new file mode 100644 index 000000000..f742d1383 --- /dev/null +++ b/src/wrappers/singlearg.sh @@ -0,0 +1,7 @@ +# Ensure to work with a single argument. +if [ $# -gt 1 ]; then + first_arg="$1" + shift + err "Warning: extra arguments '$@' will be ignored." + set -- $first_arg +fi diff --git a/src/wrappers/tempdir.sh b/src/wrappers/tempdir.sh new file mode 100644 index 000000000..8d2754b1c --- /dev/null +++ b/src/wrappers/tempdir.sh @@ -0,0 +1,10 @@ +# As a security measure refuse to proceed if mktemp is not available. +pathfind mktemp || { err "Couldn't find 'mktemp'; aborting."; exit 1; } + +THIS_TEMPDIR= +THIS_TEMPDIR="$(mktemp -d -t $THIS.XXXXXXXX)" || exit 1 +readonly THIS_TEMPDIR + +trap 'exitcode=$? + [ -z "$THIS_TEMPDIR" ] || rm -rf "$THIS_TEMPDIR" + exit $exitcode' 0 1 2 3 13 15 diff --git a/src/wrappers/testwrapper.in b/src/wrappers/testwrapper.in new file mode 100644 index 000000000..b9e10b450 --- /dev/null +++ b/src/wrappers/testwrapper.in @@ -0,0 +1,141 @@ +#!/bin/sh + +THIS=$1 + +ASH="ash -s" +BASH="bash --posix -s" +DASH="dash -s" +KSH="ksh -s" +POSH="posh -s" +ZSH="zsh -s" + +ERROR="" + +wrapper () { + $SH -- "$@" <<-'EOF' +### common.sh + +outfile= +while getopts o: opt; do + case $opt in + o) outfile="$OPTARG" ;; + esac +done + +shift $(($OPTIND - 1)) + +### postopts.sh + +echo "Options passed to wrapper:" +[ -z "$outfile" ] || echo "|$outfile|" + +echo "Arguments passed to wrapper:" +for arg; do + echo "|$arg|" +done + +pandoc () { + echo "Arguments passed to wrappee:" + for arg; do + echo "|$arg|" + done +} +runpandoc +EOF +} + +# Portable which(1). +pathfind () { + oldifs="$IFS"; IFS=':' + for _p in $PATH; do + if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then + IFS="$oldifs" + return 0 + fi + done + IFS="$oldifs" + return 1 +} + +check_results () { + if [ "$1" = "$2" ]; then + echo >&2 ok + return 0 + else + echo >&2 failed + sed "s/^/\t/" >&2 <<EOF +Command line: '$3' +===> Expected: +$2 +<=== Got: +$1 +EOF + return 1 + fi +} + +for SH in "$BASH" "$DASH" "$KSH" "$ZSH"; do + CMD=${SH%% *} + echo >&2 " Testing with $CMD..." + if pathfind "$CMD"; then + if [ "$CMD" = "zsh" ]; then + # Zsh needs to be called as 'sh' to enable POSIX mode. + ln -s $(which zsh) ./sh + SH="./sh ${SH#* }" + trap 'err=$?; rm -f ./sh; exit $err' 0 1 2 3 13 15 + fi + + set -e + + # Test 1 + printf >&2 " test case 1... " + actual=$(wrapper -o "output file" "foo bar" -A "quux baz" -B) + expected=$(cat <<'EOF' +Options passed to wrapper: +|output file| +Arguments passed to wrapper: +|foo bar| +Arguments passed to wrappee: +|-A| +|quux baz| +|-B| +EOF +) + check_results "$actual" "$expected" \ + 'wrapper -o "output file" "foo bar" -A "quux baz" -B' + + # Test 2 + printf >&2 " test case 2... " + actual=$(wrapper -- -A "foo bar") + expected=$(cat <<'EOF' +Options passed to wrapper: +Arguments passed to wrapper: +Arguments passed to wrappee: +|-A| +|foo bar| +EOF +) + check_results "$actual" "$expected" 'wrapper -- -A "foo bar"' + + # Test 3 (Test 1 with a redundant '--') + printf >&2 " test case 4... " + actual=$(wrapper -o "output file" "foo bar" -- -A "quux baz" -B) + expected=$(cat <<'EOF' +Options passed to wrapper: +|output file| +Arguments passed to wrapper: +|foo bar| +Arguments passed to wrappee: +|-A| +|quux baz| +|-B| +EOF +) + check_results "$actual" "$expected" \ + 'wrapper -o "output file" "foo bar" -- -A "quux baz" -B' + else + echo >&2 "Warning: cannot verify correctness with $CMD; shell not available" + fi +done + +exit 0 |