diff options
| author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2006-12-22 20:16:03 +0000 |
|---|---|---|
| committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2006-12-22 20:16:03 +0000 |
| commit | d829c4820adbe7a7634f1c1d825d0d206512e6e7 (patch) | |
| tree | 2de3d3459e6f2788b3a9aede93add68503f5a588 /src/wrappers | |
| parent | cfaf0c178c422e00706eb04daea88d21a7fe9429 (diff) | |
| download | pandoc-d829c4820adbe7a7634f1c1d825d0d206512e6e7.tar.gz | |
Merged changes from branches/wrappers since r177.
Summary of main changes:
+ Added -o/--output and -d/--debug options to pandoc.
+ Modified pandoc to behave differently depending on the name
of the program. For example, if the program name is 'html2latex',
the default reader will be html and the default writer latex.
+ Removed most of the old wrappers, replacing them with symlinks
to pandoc.
+ Rewrote markdown2pdf and created a new wrapper web2markdown,
with the functionality of the old html2markdown script. These
new scripts exploit pandoc's -d option to avoid having to do
complex command-line parsing.
+ Revised man pages and documentation appropriately.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@279 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/wrappers')
| -rw-r--r-- | src/wrappers/checkin.sh | 7 | ||||
| -rw-r--r-- | src/wrappers/common.sh | 27 | ||||
| -rw-r--r-- | src/wrappers/getopts.sh | 12 | ||||
| -rw-r--r-- | src/wrappers/html2markdown.in | 134 | ||||
| -rw-r--r-- | src/wrappers/latex2markdown.in | 14 | ||||
| -rw-r--r-- | src/wrappers/markdown2html.in | 12 | ||||
| -rw-r--r-- | src/wrappers/markdown2latex.in | 12 | ||||
| -rw-r--r-- | src/wrappers/markdown2pdf.in | 68 | ||||
| -rw-r--r-- | src/wrappers/postopts.sh | 17 | ||||
| -rw-r--r-- | src/wrappers/singlearg.sh | 7 | ||||
| -rw-r--r-- | src/wrappers/testwrapper.in | 141 | ||||
| -rw-r--r-- | src/wrappers/web2markdown.in | 173 |
12 files changed, 202 insertions, 422 deletions
diff --git a/src/wrappers/checkin.sh b/src/wrappers/checkin.sh deleted file mode 100644 index c9c564a23..000000000 --- a/src/wrappers/checkin.sh +++ /dev/null @@ -1,7 +0,0 @@ -# Check if input files exist. -for f; do - if [ -n "$f" ] && ! [ -f "$f" ]; then - err "File '$f' not found." - exit 1 - fi -done diff --git a/src/wrappers/common.sh b/src/wrappers/common.sh index 99a83be50..3481affff 100644 --- a/src/wrappers/common.sh +++ b/src/wrappers/common.sh @@ -8,22 +8,6 @@ WRAPPEE_ARGS= err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; } errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; } -usage () { - synopsis="$@" - err "Usage: $THIS $synopsis" - err "See $THIS(1) man file for details." -} - -runpandoc () { - if [ -n "$WRAPPEE_ARGS" ]; then - # Unpack arguments that will be passed to pandoc. - oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPEE_ARGS "$@"; IFS="$oldifs" - case "$1" in --) shift;; esac # tolerate the existence of a leading '--' - fi - - pandoc "$@" -} - # Portable which(1). pathfind () { oldifs="$IFS"; IFS=':' @@ -37,17 +21,6 @@ pathfind () { return 1 } -HAVE_ICONV= -if pathfind iconv; then - HAVE_ICONV=1 - alias to_utf8='iconv -t utf-8' - alias from_utf8='iconv -f utf-8' -else - err "Warning: iconv not present. Assuming UTF-8 character encoding." - alias to_utf8='cat' - alias from_utf8='cat' -fi - for p in pandoc $REQUIRED; do pathfind $p || { err "You need '$p' to use this program!" diff --git a/src/wrappers/getopts.sh b/src/wrappers/getopts.sh deleted file mode 100644 index 263263c07..000000000 --- a/src/wrappers/getopts.sh +++ /dev/null @@ -1,12 +0,0 @@ -if [ -z "$SYNOPSIS" ]; then - SYNOPSIS="[-h] [input_file]" - [ -n "$THIS_NARG" ] || SYNOPSIS="${SYNOPSIS}..." -fi - -while getopts h opt; do - case $opt in - h|?) usage "$SYNOPSIS"; exit 2 ;; - esac -done - -shift $(($OPTIND - 1)) diff --git a/src/wrappers/html2markdown.in b/src/wrappers/html2markdown.in deleted file mode 100644 index 0fece3ccd..000000000 --- a/src/wrappers/html2markdown.in +++ /dev/null @@ -1,134 +0,0 @@ -#!/bin/sh -e -# converts html to markdown -# uses an available program to fetch URL and tidy to normalize it first - -REQUIRED=tidy - -### common.sh - -grab_url_with () { - url="${1:?internal error: grab_url_with: url required}" - - shift - cmdline="$@" - - prog= - prog_opts= - if [ -n "$cmdline" ]; then - eval "set -- $cmdline" - prog=$1 - shift - prog_opts="$@" - fi - - if [ -z "$prog" ]; then - # Locate a sensible web grabber (note the order). - for p in wget lynx w3m curl links w3c; do - if pathfind $p; then - prog=$p - break - fi - done - - [ -n "$prog" ] || { - errn "$THIS: Couldn't find a program to fetch the file from URL " - err "(e.g. wget, w3m, lynx, w3c, or curl)." - return 1 - } - else - pathfind "$prog" || { - err "$THIS: No such web grabber '$prog' found; aborting." - return 1 - } - fi - - # Setup proper base options for known grabbers. - base_opts= - case "$prog" in - wget) base_opts="-O-" ;; - lynx) base_opts="-source" ;; - w3m) base_opts="-dump_source" ;; - curl) base_opts="" ;; - links) base_opts="-source" ;; - w3c) base_opts="-n -get" ;; - *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds." - esac - - err "$THIS: invoking '$prog $base_opts $prog_opts $url'..." - eval "set -- $base_opts $prog_opts" - $prog "$@" "$url" -} - -encoding= -grabber= -nograb= -while getopts e:g:nh opt; do - case $opt in - e) encoding="$OPTARG" ;; - g) grabber="$OPTARG" ;; - n) nograb=1 ;; - h|?) - usage "[-e encoding] [-g grabber_command] [-n] [-h] [input_file|url]" - exit 2 ;; - esac -done - -shift $(($OPTIND - 1)) - -### postopts.sh - -### singlearg.sh - -inurl= -if [ -n "$1" ] && ! [ -f "$1" ]; then - if [ -n "$nograb" ]; then - err "'$1' not found; refusing to treat input as URL." - exit 1 - fi - # Treat given argument as an URL. - inurl="$1" -fi - -if [ -n "$inurl" ]; then - err "Attempting to fetch file from '$inurl'..." - - ### tempdir.sh - - grabber_out=$THIS_TEMPDIR/grabber.out - grabber_log=$THIS_TEMPDIR/grabber.log - if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out \ - 2>$grabber_log; then - errn "grab_url_with failed" - if [ -f $grabber_log ]; then - err " with the following error log." - err - cat >&2 $grabber_log - else - err . - fi - exit 1 - fi - - set -- $grabber_out -fi - -if [ -z "$encoding" ] && [ "x$@" != "x" ]; then - # Try to determine character encoding unless not specified - # and input is STDIN. - encoding=$( - head "$@" | - LC_ALL=C tr 'A-Z' 'a-z' | - sed -ne '/<meta .*content-type.*charset=/ { - s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p - }' - ) -fi - -if [ -n "$encoding" ] && [ -n "$HAVE_ICONV" ]; then - alias to_utf8='iconv -f "$encoding" -t utf-8' -elif [ -n "$inurl" ]; then # assume web pages are UTF-8 - alias to_utf8='cat' -fi # else just use local encoding - -to_utf8 "$@" | tidy -utf8 2>/dev/null | -runpandoc -r html -w markdown -s | from_utf8 diff --git a/src/wrappers/latex2markdown.in b/src/wrappers/latex2markdown.in deleted file mode 100644 index e8cde8a97..000000000 --- a/src/wrappers/latex2markdown.in +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -e -# runs pandoc to convert latex to markdown - -### common.sh - -### getopts.sh - -### postopts.sh - -### singlearg.sh - -### checkin.sh - -to_utf8 "$@" | runpandoc -r latex -w markdown -s | from_utf8 diff --git a/src/wrappers/markdown2html.in b/src/wrappers/markdown2html.in deleted file mode 100644 index e255398d2..000000000 --- a/src/wrappers/markdown2html.in +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -e -# converts markdown to HTML - -### common.sh - -### getopts.sh - -### postopts.sh - -### checkin.sh - -to_utf8 "$@" | runpandoc | from_utf8 diff --git a/src/wrappers/markdown2latex.in b/src/wrappers/markdown2latex.in deleted file mode 100644 index c532b2f99..000000000 --- a/src/wrappers/markdown2latex.in +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -e -# converts markdown to latex - -### common.sh - -### getopts.sh - -### postopts.sh - -### checkin.sh - -to_utf8 "$@" | runpandoc -w latex -s | from_utf8 diff --git a/src/wrappers/markdown2pdf.in b/src/wrappers/markdown2pdf.in index 838767224..c222c1cbd 100644 --- a/src/wrappers/markdown2pdf.in +++ b/src/wrappers/markdown2pdf.in @@ -1,64 +1,54 @@ #!/bin/sh -e -# converts markdown to latex, then uses latex to make a PDF -REQUIRED=pdflatex +REQUIRED="markdown2latex pdflatex" ### common.sh -outfile= -while getopts o:h opt; do - case $opt in - o) outfile="$OPTARG" ;; - h|?) usage "[-o output_file] [-h] [input_file]..."; exit 2 ;; - esac -done - -shift $(($OPTIND - 1)) - -### postopts.sh +### tempdir.sh -### checkin.sh +texname=output +logfile=$THIS_TEMPDIR/log -if [ -z "$outfile" ]; then - if [ -n "$1" ]; then - outfile="${1%.*}" - else - outfile="stdin" # input is STDIN, since no argument given - fi +if ! markdown2latex -s -d "$@" >$THIS_TEMPDIR/$texname.tex 2>$logfile; then + [ -f $logfile ] && sed -e 's/markdown2latex/markdown2pdf/g' \ + -e '/^INPUT=/d' -e '/^OUTPUT=/d' $logfile >&2 + exit 1 fi -case "$outfile" in -*.*) ;; # skip appending extension if one is already present -*) outfile="${outfile%.*}.pdf";; -esac -### tempdir.sh - -# We should use a filename without white spaces for pdflatex. -TEXNAME=$THIS +outfile="$(sed -ne 's/^OUTPUT=//p' $logfile)" +IFS="$NEWLINE" +set -- $(sed -ne 's/^INPUT=//p' $logfile) +firstinfilebase="${1%.*}" +defaultdest="${firstinfilebase:-stdin}.pdf" +destname="${outfile:-$defaultdest}" -to_utf8 "$@" | runpandoc -w latex -s >$THIS_TEMPDIR/$TEXNAME.tex ( cd $THIS_TEMPDIR - if ! pdflatex -interaction=batchmode $TEXNAME.tex >/dev/null 2>&1; then + if ! pdflatex -interaction=batchmode $texname.tex >/dev/null 2>&1; then err "LaTeX errors:" - from_utf8 $TEXNAME.log | sed -ne '/^!/,/^ *$/p' >&2 - if grep -q "File \`ucs.sty' not found" $TEXNAME.log; then - err "Please install the 'unicode' package from ctan.org." + sed -ne '/^!/,/^ *$/p' $texname.log >&2 + if grep -q "File \`ucs.sty' not found" $texname.log; then + err "Please install the 'unicode' package from CTAN:" + err "http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/" + fi + if grep -q "File \`fancyvrb.sty' not found" $texname.log; then + err "Please install the 'fancyvrb' package from CTAN:" + err "http://www.ctan.org/tex-archive/macros/latex/contrib/fancyvrb/" fi exit 1 fi -) +) || exit $? is_target_exists= -if [ -f "$outfile" ]; then +if [ -f "$destname" ]; then is_target_exists=1 - mv -f "$outfile" "$outfile~" + mv "$destname" "$destname~" fi -mv -f $THIS_TEMPDIR/$TEXNAME.pdf "$outfile" +mv -f $THIS_TEMPDIR/$texname.pdf "$destname" -errn "Created '$outfile'" +errn "Created $destname" [ -z "$is_target_exists" ] || { - errn " (previous file has been backed up as '$outfile~')" + errn " (previous file has been backed up as $destname~)" } err . diff --git a/src/wrappers/postopts.sh b/src/wrappers/postopts.sh deleted file mode 100644 index e0d015f41..000000000 --- a/src/wrappers/postopts.sh +++ /dev/null @@ -1,17 +0,0 @@ -# Parse wrapper and wrappee (pandoc) arguments by taking -# into account that they may have space or tab characters. -pick="WRAPPER_ARGS" -while [ $# -gt 0 ]; do - if [ "$pick" = "WRAPPER_ARGS" ]; then - case "$1" in - -*) pick="WRAPPEE_ARGS" ;; - esac - fi - # Pack args with NEWLINE to preserve spaces, - # and put them into the picked variable. - eval "$pick=\"\$${pick}${NEWLINE}${1}\"" - shift -done - -# Unpack filename arguments. Now "$@" will hold the filenames. -oldifs="$IFS"; IFS="$NEWLINE"; set -- $WRAPPER_ARGS; IFS="$oldifs" diff --git a/src/wrappers/singlearg.sh b/src/wrappers/singlearg.sh deleted file mode 100644 index f742d1383..000000000 --- a/src/wrappers/singlearg.sh +++ /dev/null @@ -1,7 +0,0 @@ -# Ensure to work with a single argument. -if [ $# -gt 1 ]; then - first_arg="$1" - shift - err "Warning: extra arguments '$@' will be ignored." - set -- $first_arg -fi diff --git a/src/wrappers/testwrapper.in b/src/wrappers/testwrapper.in deleted file mode 100644 index e025c87e7..000000000 --- a/src/wrappers/testwrapper.in +++ /dev/null @@ -1,141 +0,0 @@ -#!/bin/sh - -THIS=$1 - -ASH="ash -s" -BASH="bash --posix -s" -DASH="dash -s" -KSH="ksh -s" -POSH="posh -s" -ZSH="zsh -s" - -ERROR="" - -wrapper () { - $SH -- "$@" <<-'EOF' -### common.sh - -outfile= -while getopts o: opt; do - case $opt in - o) outfile="$OPTARG" ;; - esac -done - -shift $(($OPTIND - 1)) - -### postopts.sh - -echo "Options passed to wrapper:" -[ -z "$outfile" ] || echo "|$outfile|" - -echo "Arguments passed to wrapper:" -for arg; do - echo "|$arg|" -done - -pandoc () { - echo "Arguments passed to wrappee:" - for arg; do - echo "|$arg|" - done -} -runpandoc -EOF -} - -# Portable which(1). -pathfind () { - oldifs="$IFS"; IFS=':' - for _p in $PATH; do - if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then - IFS="$oldifs" - return 0 - fi - done - IFS="$oldifs" - return 1 -} - -check_results () { - if [ "$1" = "$2" ]; then - echo >&2 ok - return 0 - else - echo >&2 failed - sed "s/^/\t/" >&2 <<EOF -Command line: '$3' -===> Expected: -$2 -<=== Got: -$1 -EOF - return 1 - fi -} - -for SH in "$BASH" "$DASH" "$KSH" "$ZSH"; do - CMD=${SH%% *} - echo >&2 " Testing with $CMD..." - if pathfind "$CMD"; then - if [ "$CMD" = "zsh" ]; then - # Zsh needs to be called as 'sh' to enable POSIX mode. - ln -s $(which zsh) ./sh - SH="./sh ${SH#* }" - trap 'err=$?; rm -f ./sh; exit $err' 0 1 2 3 13 15 - fi - - set -e - - # Test 1 - printf >&2 " test case 1... " - actual=$(wrapper -o "output file" "foo bar" -A "quux baz" -B) - expected=$(cat <<'EOF' -Options passed to wrapper: -|output file| -Arguments passed to wrapper: -|foo bar| -Arguments passed to wrappee: -|-A| -|quux baz| -|-B| -EOF -) - check_results "$actual" "$expected" \ - 'wrapper -o "output file" "foo bar" -A "quux baz" -B' - - # Test 2 - printf >&2 " test case 2... " - actual=$(wrapper -- -A "foo bar") - expected=$(cat <<'EOF' -Options passed to wrapper: -Arguments passed to wrapper: -Arguments passed to wrappee: -|-A| -|foo bar| -EOF -) - check_results "$actual" "$expected" 'wrapper -- -A "foo bar"' - - # Test 3 (Test 1 with a redundant '--') - printf >&2 " test case 3... " - actual=$(wrapper -o "output file" "foo bar" -- -A "quux baz" -B) - expected=$(cat <<'EOF' -Options passed to wrapper: -|output file| -Arguments passed to wrapper: -|foo bar| -Arguments passed to wrappee: -|-A| -|quux baz| -|-B| -EOF -) - check_results "$actual" "$expected" \ - 'wrapper -o "output file" "foo bar" -- -A "quux baz" -B' - else - echo >&2 "Warning: cannot verify correctness with $CMD; shell not available" - fi -done - -exit 0 diff --git a/src/wrappers/web2markdown.in b/src/wrappers/web2markdown.in new file mode 100644 index 000000000..64ff3db9b --- /dev/null +++ b/src/wrappers/web2markdown.in @@ -0,0 +1,173 @@ +#!/bin/sh -e +# converts HTML from a URL, file, or stdin to markdown +# uses an available program to fetch URL and tidy to normalize it first + +REQUIRED="tidy html2markdown" + +### common.sh + +grab_url_with () { + url="${1:?internal error: grab_url_with: url required}" + + shift + cmdline="$@" + + prog= + prog_opts= + if [ -n "$cmdline" ]; then + eval "set -- $cmdline" + prog=$1 + shift + prog_opts="$@" + fi + + if [ -z "$prog" ]; then + # Locate a sensible web grabber (note the order). + for p in wget lynx w3m curl links w3c; do + if pathfind $p; then + prog=$p + break + fi + done + + [ -n "$prog" ] || { + errn "$THIS: Couldn't find a program to fetch the file from URL " + err "(e.g. wget, w3m, lynx, w3c, or curl)." + return 1 + } + else + pathfind "$prog" || { + err "$THIS: No such web grabber '$prog' found; aborting." + return 1 + } + fi + + # Setup proper base options for known grabbers. + base_opts= + case "$prog" in + wget) base_opts="-O-" ;; + lynx) base_opts="-source" ;; + w3m) base_opts="-dump_source" ;; + curl) base_opts="" ;; + links) base_opts="-source" ;; + w3c) base_opts="-n -get" ;; + *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds." + esac + + err "$THIS: invoking '$prog $base_opts $prog_opts $url'..." + eval "set -- $base_opts $prog_opts" + $prog "$@" "$url" +} + +add_option () { + options="$options$NEWLINE$1" +} + +options= +argument= +encoding= +grabber= + +# Parse command-line arguments +while [ $# -gt 0 ]; do + case "$1" in + -h|--help) + html2markdown -h 2>&1 | sed -e 's/html2markdown/web2markdown/' 1>&2 + err " -e ENCODING, --encoding=ENCODING" + err " Specify character encoding of input" + err " -g COMMAND, --grabber=COMMAND" + err " Specify command to be used to grab contents of URL" + exit 0 ;; + -v|--version) + html2markdown -v + exit 0 ;; + -e) + shift + encoding=$1 ;; + --encoding=*) + wholeopt=$1 + # extract encoding from after = + encoding=${wholeopt#*=} ;; + -g) + shift + grabber=$1 ;; + --grabber=*) + wholeopt=$1 + # extract encoding from after = + grabber=${wholeopt#*=} ;; + -o|--output|-b|--tab-stop|-H|--include-in-header| \ + -A|--include-after-body|-C|-B|--include-before-body| \ + -C|--custom-header|-T|--title-prefix) + add_option $1 + shift + add_option $1 ;; + -*) add_option $1 ;; + *) + if [ -z "$argument" ]; then + argument=$1 + else + err "Warning: extra argument '$1' will be ignored." + fi ;; + esac + shift +done + +# Unpack options. Now "$@" will hold the html2markdown options. +oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs" + +inurl= +if [ -n "$argument" ] && ! [ -f "$argument" ]; then + # Treat given argument as an URL. + inurl="$argument" +fi + +if [ -n "$inurl" ]; then + err "Attempting to fetch file from '$inurl'..." + + ### tempdir.sh + + grabber_out=$THIS_TEMPDIR/grabber.out + grabber_log=$THIS_TEMPDIR/grabber.log + if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then + errn "grab_url_with failed" + if [ -f $grabber_log ]; then + err " with the following error log." + err + cat >&2 $grabber_log + else + err . + fi + exit 1 + fi + + argument="$grabber_out" +fi + +if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then + # Try to determine character encoding if not specified + # and input is not STDIN. + encoding=$( + head "$argument" | + LC_ALL=C tr 'A-Z' 'a-z' | + sed -ne '/<meta .*content-type.*charset=/ { + s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p + }' + ) +fi + +if [ -n "$encoding" ] && pathfind iconv; then + alias to_utf8='iconv -f "$encoding" -t utf-8' +else # assume UTF-8 + alias to_utf8='cat' +fi + +if [ -z "$argument" ]; then + tidy -utf8 2>/dev/null | html2markdown "$@" +else + if [ -f "$argument" ]; then + to_utf8 "$argument" | tidy -utf8 2>/dev/null | html2markdown "$@" + else + err "File '$argument' not found." + exit 1 + fi +fi |
