From 2ba5ff94833c9fee0fb2799e5440d8b235f9410e Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 9 Aug 2008 23:45:14 +0000 Subject: Include shell scripts themselves in repo, rather than generating from wrappers. + Removed wrappers directory + Removed wrappers Makefile target + Added hsmarkdown, html2markdown, and markdown2pdf git-svn-id: https://pandoc.googlecode.com/svn/trunk@1387 788f1e2b-df1e-0410-8736-df70ead52e1b --- .gitignore | 3 - Makefile | 21 ----- hsmarkdown | 5 ++ html2markdown | 221 ++++++++++++++++++++++++++++++++++++++++++++++ markdown2pdf | 140 +++++++++++++++++++++++++++++ wrappers/common.sh | 43 --------- wrappers/hsmarkdown.in | 5 -- wrappers/html2markdown.in | 162 --------------------------------- wrappers/markdown2pdf.in | 81 ----------------- wrappers/tempdir.sh | 18 ---- 10 files changed, 366 insertions(+), 333 deletions(-) create mode 100755 hsmarkdown create mode 100755 html2markdown create mode 100755 markdown2pdf delete mode 100644 wrappers/common.sh delete mode 100644 wrappers/hsmarkdown.in delete mode 100644 wrappers/html2markdown.in delete mode 100644 wrappers/markdown2pdf.in delete mode 100644 wrappers/tempdir.sh diff --git a/.gitignore b/.gitignore index b06f96c78..86e2c499b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,9 +5,6 @@ INSTALL.* .configure-stamp man/man?/*.1 man/man?/*.html -markdown2pdf -html2markdown -hsmarkdown *.diff pandoc.cabal.orig diff --git a/Makefile b/Makefile index 5442e3364..305c8debc 100644 --- a/Makefile +++ b/Makefile @@ -93,33 +93,12 @@ all: build-program %.1: %.1.md $(MAIN) ./$(MAIN) -s -S -w man $< >$@ || rm -f $@ -define generate-shell-script -echo >&2 "Generating $@..."; \ -awk ' \ - /^[ \t]*###+ / { \ - lead = $$0; sub(/[^ \t].*$$/, "", lead); \ - t = "$(dir $<)/"$$2; \ - while (getline line < t > 0) \ - print lead line; \ - next; \ - } \ - { print } \ -' <$< >$@ -chmod +x $@ -endef - cleanup_files+=$(ODTREF) $(ODTREF): $(addprefix $(ODTSTYLES)/, layout-cache meta.xml styles.xml content.xml mimetype \ settings.xml Configurations2 Thumbnails META-INF) cd $(ODTSTYLES) ; \ zip -9 -q -r $(notdir $@) * -x $(notdir $@) -.PHONY: wrappers -wrappers: $(WRAPPERS) -cleanup_files+=$(WRAPPERS) -$(WRAPPERS): %: $(SRCDIR)/wrappers/%.in $(SRCDIR)/wrappers/*.sh - @$(generate-shell-script) - .PHONY: configure cleanup_files+=Setup.hi Setup.o $(BUILDCMD) $(BUILDVARS) ifdef GHC_PKG diff --git a/hsmarkdown b/hsmarkdown new file mode 100755 index 000000000..17f970234 --- /dev/null +++ b/hsmarkdown @@ -0,0 +1,5 @@ +#!/bin/sh +# hsmarkdown - intended as a drop-in replacement for Markdown.pl. +# Uses pandoc to convert from markdown to HTML, using --strict mode +# for maximum compatibility with official markdown syntax. +exec pandoc --from markdown --to html --strict -- "$@" diff --git a/html2markdown b/html2markdown new file mode 100755 index 000000000..0649e0478 --- /dev/null +++ b/html2markdown @@ -0,0 +1,221 @@ +#!/bin/sh -e +# converts HTML from a URL, file, or stdin to markdown +# uses an available program to fetch URL and tidy to normalize it first + +REQUIRED="tidy" +SYNOPSIS="converts HTML from a URL, file, or STDIN to markdown-formatted text." + +THIS=${0##*/} + +NEWLINE=' +' + +err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; } +errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; } + +usage () { + err "$1 - $2" # short description + err "See the $1(1) man page for usage." +} + +# Portable which(1). +pathfind () { + oldifs="$IFS"; IFS=':' + for _p in $PATH; do + if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then + IFS="$oldifs" + return 0 + fi + done + IFS="$oldifs" + return 1 +} + +for p in pandoc $REQUIRED; do + pathfind $p || { + err "You need '$p' to use this program!" + exit 1 + } +done + +CONF=$(pandoc --dump-args "$@" 2>&1) || { + errcode=$? + echo "$CONF" | sed -e '/^pandoc \[OPTIONS\] \[FILES\]/,$d' >&2 + [ $errcode -eq 2 ] && usage "$THIS" "$SYNOPSIS" + exit $errcode +} + +OUTPUT=$(echo "$CONF" | sed -ne '1p') +ARGS=$(echo "$CONF" | sed -e '1d') + + +grab_url_with () { + url="${1:?internal error: grab_url_with: url required}" + + shift + cmdline="$@" + + prog= + prog_opts= + if [ -n "$cmdline" ]; then + eval "set -- $cmdline" + prog=$1 + shift + prog_opts="$@" + fi + + if [ -z "$prog" ]; then + # Locate a sensible web grabber (note the order). + for p in wget lynx w3m curl links w3c; do + if pathfind $p; then + prog=$p + break + fi + done + + [ -n "$prog" ] || { + errn "$THIS: Couldn't find a program to fetch the file from URL " + err "(e.g. wget, w3m, lynx, w3c, or curl)." + return 1 + } + else + pathfind "$prog" || { + err "$THIS: No such web grabber '$prog' found; aborting." + return 1 + } + fi + + # Setup proper base options for known grabbers. + base_opts= + case "$prog" in + wget) base_opts="-O-" ;; + lynx) base_opts="-source" ;; + w3m) base_opts="-dump_source" ;; + curl) base_opts="" ;; + links) base_opts="-source" ;; + w3c) base_opts="-n -get" ;; + *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds." + esac + + err "$THIS: invoking '$prog $base_opts $prog_opts $url'..." + eval "set -- $base_opts $prog_opts" + $prog "$@" "$url" +} + +# Parse command-line arguments +parse_arguments () { + while [ $# -gt 0 ]; do + case "$1" in + --encoding=*) + wholeopt="$1" + # extract encoding from after = + encoding="${wholeopt#*=}" ;; + -e|--encoding|-encoding) + shift + encoding="$1" ;; + --grabber=*) + wholeopt="$1" + # extract encoding from after = + grabber="\"${wholeopt#*=}\"" ;; + -g|--grabber|-grabber) + shift + grabber="$1" ;; + *) + if [ -z "$argument" ]; then + argument="$1" + else + err "Warning: extra argument '$1' will be ignored." + fi ;; + esac + shift + done +} + +argument= +encoding= +grabber= + +oldifs="$IFS" +IFS=$NEWLINE +parse_arguments $ARGS +IFS="$oldifs" + +inurl= +if [ -n "$argument" ] && ! [ -f "$argument" ]; then + # Treat given argument as an URL. + inurl="$argument" +fi + +# As a security measure refuse to proceed if mktemp is not available. +pathfind mktemp || { err "Couldn't find 'mktemp'; aborting."; exit 1; } + +# Avoid issues with /tmp directory on Windows/Cygwin +cygwin= +cygwin=$(uname | sed -ne '/^CYGWIN/p') +if [ -n "$cygwin" ]; then + TMPDIR=. + export TMPDIR +fi + +THIS_TEMPDIR= +THIS_TEMPDIR="$(mktemp -d -t $THIS.XXXXXXXX)" || exit 1 +readonly THIS_TEMPDIR + +trap 'exitcode=$? + [ -z "$THIS_TEMPDIR" ] || rm -rf "$THIS_TEMPDIR" + exit $exitcode' 0 1 2 3 13 15 + +if [ -n "$inurl" ]; then + err "Attempting to fetch file from '$inurl'..." + + grabber_out=$THIS_TEMPDIR/grabber.out + grabber_log=$THIS_TEMPDIR/grabber.log + if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then + errn "grab_url_with failed" + if [ -f $grabber_log ]; then + err " with the following error log." + err + cat >&2 $grabber_log + else + err . + fi + exit 1 + fi + + argument="$grabber_out" +fi + +if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then + # Try to determine character encoding if not specified + # and input is not STDIN. + encoding=$( + head "$argument" | + LC_ALL=C tr 'A-Z' 'a-z' | + sed -ne '/ $htmlinput # read from STDIN +elif [ -f "$argument" ]; then + to_utf8 "$argument" > $htmlinput # read from file +else + err "File '$argument' not found." + exit 1 +fi + +if ! cat $htmlinput | pandoc --ignore-args -r html -w markdown "$@" ; then + err "Failed to parse HTML. Trying again with tidy..." + tidy -q -asxhtml -utf8 $htmlinput | \ + pandoc --ignore-args -r html -w markdown "$@" +fi diff --git a/markdown2pdf b/markdown2pdf new file mode 100755 index 000000000..ab0f3ae78 --- /dev/null +++ b/markdown2pdf @@ -0,0 +1,140 @@ +#!/bin/sh -e + +REQUIRED="pdflatex" +SYNOPSIS="converts markdown-formatted text to PDF, using pdflatex." + +THIS=${0##*/} + +NEWLINE=' +' + +err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; } +errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; } + +usage () { + err "$1 - $2" # short description + err "See the $1(1) man page for usage." +} + +# Portable which(1). +pathfind () { + oldifs="$IFS"; IFS=':' + for _p in $PATH; do + if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then + IFS="$oldifs" + return 0 + fi + done + IFS="$oldifs" + return 1 +} + +for p in pandoc $REQUIRED; do + pathfind $p || { + err "You need '$p' to use this program!" + exit 1 + } +done + +CONF=$(pandoc --dump-args "$@" 2>&1) || { + errcode=$? + echo "$CONF" | sed -e '/^pandoc \[OPTIONS\] \[FILES\]/,$d' >&2 + [ $errcode -eq 2 ] && usage "$THIS" "$SYNOPSIS" + exit $errcode +} + +OUTPUT=$(echo "$CONF" | sed -ne '1p') +ARGS=$(echo "$CONF" | sed -e '1d') + + +# As a security measure refuse to proceed if mktemp is not available. +pathfind mktemp || { err "Couldn't find 'mktemp'; aborting."; exit 1; } + +# Avoid issues with /tmp directory on Windows/Cygwin +cygwin= +cygwin=$(uname | sed -ne '/^CYGWIN/p') +if [ -n "$cygwin" ]; then + TMPDIR=. + export TMPDIR +fi + +THIS_TEMPDIR= +THIS_TEMPDIR="$(mktemp -d -t $THIS.XXXXXXXX)" || exit 1 +readonly THIS_TEMPDIR + +trap 'exitcode=$? + [ -z "$THIS_TEMPDIR" ] || rm -rf "$THIS_TEMPDIR" + exit $exitcode' 0 1 2 3 13 15 + +texname=output +logfile=$THIS_TEMPDIR/log + +pandoc -s -r markdown -w latex "$@" -o $THIS_TEMPDIR/$texname.tex + +if [ "$OUTPUT" = "-" ]; then + firstinfile="$(echo $ARGS | sed -ne '1p')" + firstinfilebase="${firstinfile%.*}" + destname="${firstinfilebase:-stdin}.pdf" +else + destname="$OUTPUT" +fi + +( + origdir=$(pwd) + cd $THIS_TEMPDIR + TEXINPUTS=$origdir:$TEXINPUTS: + export TEXINPUTS + finished=no + runs=0 + while [ $finished = "no" ]; do + pdflatex -interaction=batchmode $texname.tex >/dev/null || { + errcode=$? + err "${THIS}: pdfLaTeX failed with error code $errcode" + [ -f $texname.log ] && { + err "${THIS}: error context:" + sed -ne '/^!/,/^[[:space:]]*$/p' \ + -ne '/^[Ll]a[Tt]e[Xx] [Ww]arning/,/^[[:space:]]*$/p' \ + -ne '/^[Ee]rror/,/^[[:space:]]*$/p' $texname.log >&2 + if grep -q "File \`ucs.sty' not found" $texname.log; then + err "${THIS}: Please install the 'unicode' package from CTAN:" + err " http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/" + fi + if grep -q "File \`ulem.sty' not found" $texname.log; then + err "${THIS}: Please install the 'ulem' package from CTAN:" + err " http://www.ctan.org/tex-archive/macros/latex/contrib/misc/ulem.sty" + fi + } + exit $errcode + } + if [ $runs -lt 3 ] && + ((grep -q "LaTeX Warning: There were undefined references." $texname.log) || + (echo "$@" | grep -q -- "--toc\|--table-of-contents")); then + runs=$(($runs + 1)) + if grep -q "LaTeX Warning:.*[Cc]itation" $texname.log; then + bibtex $texname 2>&1 >bibtex.err + if [ $runs -gt 2 ]; then + if grep -q "error message" bibtex.err || + grep -q "Warning" bibtex.err; then + cat bibtex.err >&2 + fi + fi + fi + else + finished=yes + fi + done +) || exit $? + +is_target_exists= +if [ -f "$destname" ]; then + is_target_exists=1 + mv "$destname" "$destname~" +fi + +mv -f $THIS_TEMPDIR/$texname.pdf "$destname" + +errn "Created $destname" +[ -z "$is_target_exists" ] || { + errn " (previous file has been backed up as $destname~)" +} +err . diff --git a/wrappers/common.sh b/wrappers/common.sh deleted file mode 100644 index 9605f5940..000000000 --- a/wrappers/common.sh +++ /dev/null @@ -1,43 +0,0 @@ -THIS=${0##*/} - -NEWLINE=' -' - -err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; } -errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; } - -usage () { - err "$1 - $2" # short description - err "See the $1(1) man page for usage." -} - -# Portable which(1). -pathfind () { - oldifs="$IFS"; IFS=':' - for _p in $PATH; do - if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then - IFS="$oldifs" - return 0 - fi - done - IFS="$oldifs" - return 1 -} - -for p in pandoc $REQUIRED; do - pathfind $p || { - err "You need '$p' to use this program!" - exit 1 - } -done - -CONF=$(pandoc --dump-args "$@" 2>&1) || { - errcode=$? - echo "$CONF" | sed -e '/^pandoc \[OPTIONS\] \[FILES\]/,$d' >&2 - [ $errcode -eq 2 ] && usage "$THIS" "$SYNOPSIS" - exit $errcode -} - -OUTPUT=$(echo "$CONF" | sed -ne '1p') -ARGS=$(echo "$CONF" | sed -e '1d') - diff --git a/wrappers/hsmarkdown.in b/wrappers/hsmarkdown.in deleted file mode 100644 index 17f970234..000000000 --- a/wrappers/hsmarkdown.in +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -# hsmarkdown - intended as a drop-in replacement for Markdown.pl. -# Uses pandoc to convert from markdown to HTML, using --strict mode -# for maximum compatibility with official markdown syntax. -exec pandoc --from markdown --to html --strict -- "$@" diff --git a/wrappers/html2markdown.in b/wrappers/html2markdown.in deleted file mode 100644 index 0f4297128..000000000 --- a/wrappers/html2markdown.in +++ /dev/null @@ -1,162 +0,0 @@ -#!/bin/sh -e -# converts HTML from a URL, file, or stdin to markdown -# uses an available program to fetch URL and tidy to normalize it first - -REQUIRED="tidy" -SYNOPSIS="converts HTML from a URL, file, or STDIN to markdown-formatted text." - -### common.sh - -grab_url_with () { - url="${1:?internal error: grab_url_with: url required}" - - shift - cmdline="$@" - - prog= - prog_opts= - if [ -n "$cmdline" ]; then - eval "set -- $cmdline" - prog=$1 - shift - prog_opts="$@" - fi - - if [ -z "$prog" ]; then - # Locate a sensible web grabber (note the order). - for p in wget lynx w3m curl links w3c; do - if pathfind $p; then - prog=$p - break - fi - done - - [ -n "$prog" ] || { - errn "$THIS: Couldn't find a program to fetch the file from URL " - err "(e.g. wget, w3m, lynx, w3c, or curl)." - return 1 - } - else - pathfind "$prog" || { - err "$THIS: No such web grabber '$prog' found; aborting." - return 1 - } - fi - - # Setup proper base options for known grabbers. - base_opts= - case "$prog" in - wget) base_opts="-O-" ;; - lynx) base_opts="-source" ;; - w3m) base_opts="-dump_source" ;; - curl) base_opts="" ;; - links) base_opts="-source" ;; - w3c) base_opts="-n -get" ;; - *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds." - esac - - err "$THIS: invoking '$prog $base_opts $prog_opts $url'..." - eval "set -- $base_opts $prog_opts" - $prog "$@" "$url" -} - -# Parse command-line arguments -parse_arguments () { - while [ $# -gt 0 ]; do - case "$1" in - --encoding=*) - wholeopt="$1" - # extract encoding from after = - encoding="${wholeopt#*=}" ;; - -e|--encoding|-encoding) - shift - encoding="$1" ;; - --grabber=*) - wholeopt="$1" - # extract encoding from after = - grabber="\"${wholeopt#*=}\"" ;; - -g|--grabber|-grabber) - shift - grabber="$1" ;; - *) - if [ -z "$argument" ]; then - argument="$1" - else - err "Warning: extra argument '$1' will be ignored." - fi ;; - esac - shift - done -} - -argument= -encoding= -grabber= - -oldifs="$IFS" -IFS=$NEWLINE -parse_arguments $ARGS -IFS="$oldifs" - -inurl= -if [ -n "$argument" ] && ! [ -f "$argument" ]; then - # Treat given argument as an URL. - inurl="$argument" -fi - -### tempdir.sh - -if [ -n "$inurl" ]; then - err "Attempting to fetch file from '$inurl'..." - - grabber_out=$THIS_TEMPDIR/grabber.out - grabber_log=$THIS_TEMPDIR/grabber.log - if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then - errn "grab_url_with failed" - if [ -f $grabber_log ]; then - err " with the following error log." - err - cat >&2 $grabber_log - else - err . - fi - exit 1 - fi - - argument="$grabber_out" -fi - -if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then - # Try to determine character encoding if not specified - # and input is not STDIN. - encoding=$( - head "$argument" | - LC_ALL=C tr 'A-Z' 'a-z' | - sed -ne '/ $htmlinput # read from STDIN -elif [ -f "$argument" ]; then - to_utf8 "$argument" > $htmlinput # read from file -else - err "File '$argument' not found." - exit 1 -fi - -if ! cat $htmlinput | pandoc --ignore-args -r html -w markdown "$@" ; then - err "Failed to parse HTML. Trying again with tidy..." - tidy -q -asxhtml -utf8 $htmlinput | \ - pandoc --ignore-args -r html -w markdown "$@" -fi diff --git a/wrappers/markdown2pdf.in b/wrappers/markdown2pdf.in deleted file mode 100644 index 37be69469..000000000 --- a/wrappers/markdown2pdf.in +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/sh -e - -REQUIRED="pdflatex" -SYNOPSIS="converts markdown-formatted text to PDF, using pdflatex." - -### common.sh - -### tempdir.sh - -texname=output -logfile=$THIS_TEMPDIR/log - -pandoc -s -r markdown -w latex "$@" -o $THIS_TEMPDIR/$texname.tex - -if [ "$OUTPUT" = "-" ]; then - firstinfile="$(echo $ARGS | sed -ne '1p')" - firstinfilebase="${firstinfile%.*}" - destname="${firstinfilebase:-stdin}.pdf" -else - destname="$OUTPUT" -fi - -( - origdir=$(pwd) - cd $THIS_TEMPDIR - TEXINPUTS=$origdir:$TEXINPUTS: - export TEXINPUTS - finished=no - runs=0 - while [ $finished = "no" ]; do - pdflatex -interaction=batchmode $texname.tex >/dev/null || { - errcode=$? - err "${THIS}: pdfLaTeX failed with error code $errcode" - [ -f $texname.log ] && { - err "${THIS}: error context:" - sed -ne '/^!/,/^[[:space:]]*$/p' \ - -ne '/^[Ll]a[Tt]e[Xx] [Ww]arning/,/^[[:space:]]*$/p' \ - -ne '/^[Ee]rror/,/^[[:space:]]*$/p' $texname.log >&2 - if grep -q "File \`ucs.sty' not found" $texname.log; then - err "${THIS}: Please install the 'unicode' package from CTAN:" - err " http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/" - fi - if grep -q "File \`ulem.sty' not found" $texname.log; then - err "${THIS}: Please install the 'ulem' package from CTAN:" - err " http://www.ctan.org/tex-archive/macros/latex/contrib/misc/ulem.sty" - fi - } - exit $errcode - } - if [ $runs -lt 3 ] && - ((grep -q "LaTeX Warning: There were undefined references." $texname.log) || - (echo "$@" | grep -q -- "--toc\|--table-of-contents")); then - runs=$(($runs + 1)) - if grep -q "LaTeX Warning:.*[Cc]itation" $texname.log; then - bibtex $texname 2>&1 >bibtex.err - if [ $runs -gt 2 ]; then - if grep -q "error message" bibtex.err || - grep -q "Warning" bibtex.err; then - cat bibtex.err >&2 - fi - fi - fi - else - finished=yes - fi - done -) || exit $? - -is_target_exists= -if [ -f "$destname" ]; then - is_target_exists=1 - mv "$destname" "$destname~" -fi - -mv -f $THIS_TEMPDIR/$texname.pdf "$destname" - -errn "Created $destname" -[ -z "$is_target_exists" ] || { - errn " (previous file has been backed up as $destname~)" -} -err . diff --git a/wrappers/tempdir.sh b/wrappers/tempdir.sh deleted file mode 100644 index f25ae7f51..000000000 --- a/wrappers/tempdir.sh +++ /dev/null @@ -1,18 +0,0 @@ -# As a security measure refuse to proceed if mktemp is not available. -pathfind mktemp || { err "Couldn't find 'mktemp'; aborting."; exit 1; } - -# Avoid issues with /tmp directory on Windows/Cygwin -cygwin= -cygwin=$(uname | sed -ne '/^CYGWIN/p') -if [ -n "$cygwin" ]; then - TMPDIR=. - export TMPDIR -fi - -THIS_TEMPDIR= -THIS_TEMPDIR="$(mktemp -d -t $THIS.XXXXXXXX)" || exit 1 -readonly THIS_TEMPDIR - -trap 'exitcode=$? - [ -z "$THIS_TEMPDIR" ] || rm -rf "$THIS_TEMPDIR" - exit $exitcode' 0 1 2 3 13 15 -- cgit v1.2.3