aboutsummaryrefslogtreecommitdiff
path: root/wrappers
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-11-03 22:14:03 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-11-03 22:14:03 +0000
commit4a841bfc5464907adea4cdd655485565565b40ae (patch)
tree36c0a21e3639614c8d25b5fb1909c32d0ab11dcd /wrappers
parent3116d30133196e1bb258f7e74e03d4a85f3b21ae (diff)
downloadpandoc-4a841bfc5464907adea4cdd655485565565b40ae.tar.gz
Use template haskell to avoid the need for templates:
+ Added library Text.Pandoc.Include, with a template haskell function $(includeStrFrom fname) to include a file as a string constant at compile time. + This removes the need for the 'templates' directory or Makefile target. These have been removed. + The base source directory has been changed from src to . + A new 'data' directory has been added, containing the ASCIIMathML.js script, writer headers, and S5 files. + The src/wrappers directory has been moved to 'wrappers'. + The Text.Pandoc.ASCIIMathML library is no longer needed, since Text.Pandoc.Writers.HTML can use includeStrFrom to include the ASCIIMathML.js code directly. It has been removed. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1063 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'wrappers')
-rw-r--r--wrappers/common.sh43
-rw-r--r--wrappers/hsmarkdown.in5
-rw-r--r--wrappers/html2markdown.in162
-rw-r--r--wrappers/markdown2pdf.in81
-rw-r--r--wrappers/tempdir.sh18
5 files changed, 309 insertions, 0 deletions
diff --git a/wrappers/common.sh b/wrappers/common.sh
new file mode 100644
index 000000000..9605f5940
--- /dev/null
+++ b/wrappers/common.sh
@@ -0,0 +1,43 @@
+THIS=${0##*/}
+
+NEWLINE='
+'
+
+err () { echo "$*" | fold -s -w ${COLUMNS:-110} >&2; }
+errn () { printf "$*" | fold -s -w ${COLUMNS:-110} >&2; }
+
+usage () {
+ err "$1 - $2" # short description
+ err "See the $1(1) man page for usage."
+}
+
+# Portable which(1).
+pathfind () {
+ oldifs="$IFS"; IFS=':'
+ for _p in $PATH; do
+ if [ -x "$_p/$*" ] && [ -f "$_p/$*" ]; then
+ IFS="$oldifs"
+ return 0
+ fi
+ done
+ IFS="$oldifs"
+ return 1
+}
+
+for p in pandoc $REQUIRED; do
+ pathfind $p || {
+ err "You need '$p' to use this program!"
+ exit 1
+ }
+done
+
+CONF=$(pandoc --dump-args "$@" 2>&1) || {
+ errcode=$?
+ echo "$CONF" | sed -e '/^pandoc \[OPTIONS\] \[FILES\]/,$d' >&2
+ [ $errcode -eq 2 ] && usage "$THIS" "$SYNOPSIS"
+ exit $errcode
+}
+
+OUTPUT=$(echo "$CONF" | sed -ne '1p')
+ARGS=$(echo "$CONF" | sed -e '1d')
+
diff --git a/wrappers/hsmarkdown.in b/wrappers/hsmarkdown.in
new file mode 100644
index 000000000..17f970234
--- /dev/null
+++ b/wrappers/hsmarkdown.in
@@ -0,0 +1,5 @@
+#!/bin/sh
+# hsmarkdown - intended as a drop-in replacement for Markdown.pl.
+# Uses pandoc to convert from markdown to HTML, using --strict mode
+# for maximum compatibility with official markdown syntax.
+exec pandoc --from markdown --to html --strict -- "$@"
diff --git a/wrappers/html2markdown.in b/wrappers/html2markdown.in
new file mode 100644
index 000000000..0f4297128
--- /dev/null
+++ b/wrappers/html2markdown.in
@@ -0,0 +1,162 @@
+#!/bin/sh -e
+# converts HTML from a URL, file, or stdin to markdown
+# uses an available program to fetch URL and tidy to normalize it first
+
+REQUIRED="tidy"
+SYNOPSIS="converts HTML from a URL, file, or STDIN to markdown-formatted text."
+
+### common.sh
+
+grab_url_with () {
+ url="${1:?internal error: grab_url_with: url required}"
+
+ shift
+ cmdline="$@"
+
+ prog=
+ prog_opts=
+ if [ -n "$cmdline" ]; then
+ eval "set -- $cmdline"
+ prog=$1
+ shift
+ prog_opts="$@"
+ fi
+
+ if [ -z "$prog" ]; then
+ # Locate a sensible web grabber (note the order).
+ for p in wget lynx w3m curl links w3c; do
+ if pathfind $p; then
+ prog=$p
+ break
+ fi
+ done
+
+ [ -n "$prog" ] || {
+ errn "$THIS: Couldn't find a program to fetch the file from URL "
+ err "(e.g. wget, w3m, lynx, w3c, or curl)."
+ return 1
+ }
+ else
+ pathfind "$prog" || {
+ err "$THIS: No such web grabber '$prog' found; aborting."
+ return 1
+ }
+ fi
+
+ # Setup proper base options for known grabbers.
+ base_opts=
+ case "$prog" in
+ wget) base_opts="-O-" ;;
+ lynx) base_opts="-source" ;;
+ w3m) base_opts="-dump_source" ;;
+ curl) base_opts="" ;;
+ links) base_opts="-source" ;;
+ w3c) base_opts="-n -get" ;;
+ *) err "$THIS: unhandled web grabber '$prog'; hope it succeeds."
+ esac
+
+ err "$THIS: invoking '$prog $base_opts $prog_opts $url'..."
+ eval "set -- $base_opts $prog_opts"
+ $prog "$@" "$url"
+}
+
+# Parse command-line arguments
+parse_arguments () {
+ while [ $# -gt 0 ]; do
+ case "$1" in
+ --encoding=*)
+ wholeopt="$1"
+ # extract encoding from after =
+ encoding="${wholeopt#*=}" ;;
+ -e|--encoding|-encoding)
+ shift
+ encoding="$1" ;;
+ --grabber=*)
+ wholeopt="$1"
+ # extract encoding from after =
+ grabber="\"${wholeopt#*=}\"" ;;
+ -g|--grabber|-grabber)
+ shift
+ grabber="$1" ;;
+ *)
+ if [ -z "$argument" ]; then
+ argument="$1"
+ else
+ err "Warning: extra argument '$1' will be ignored."
+ fi ;;
+ esac
+ shift
+ done
+}
+
+argument=
+encoding=
+grabber=
+
+oldifs="$IFS"
+IFS=$NEWLINE
+parse_arguments $ARGS
+IFS="$oldifs"
+
+inurl=
+if [ -n "$argument" ] && ! [ -f "$argument" ]; then
+ # Treat given argument as an URL.
+ inurl="$argument"
+fi
+
+### tempdir.sh
+
+if [ -n "$inurl" ]; then
+ err "Attempting to fetch file from '$inurl'..."
+
+ grabber_out=$THIS_TEMPDIR/grabber.out
+ grabber_log=$THIS_TEMPDIR/grabber.log
+ if ! grab_url_with "$inurl" "$grabber" 1>$grabber_out 2>$grabber_log; then
+ errn "grab_url_with failed"
+ if [ -f $grabber_log ]; then
+ err " with the following error log."
+ err
+ cat >&2 $grabber_log
+ else
+ err .
+ fi
+ exit 1
+ fi
+
+ argument="$grabber_out"
+fi
+
+if [ -z "$encoding" ] && [ "x$argument" != "x" ]; then
+ # Try to determine character encoding if not specified
+ # and input is not STDIN.
+ encoding=$(
+ head "$argument" |
+ LC_ALL=C tr 'A-Z' 'a-z' |
+ sed -ne '/<meta .*content-type.*charset=/ {
+ s/.*charset=["'\'']*\([-a-zA-Z0-9]*\).*["'\'']*/\1/p
+ }'
+ )
+fi
+
+if [ -n "$encoding" ] && pathfind iconv; then
+ alias to_utf8='iconv -f "$encoding" -t utf-8'
+else # assume UTF-8
+ alias to_utf8='cat'
+fi
+
+htmlinput=$THIS_TEMPDIR/htmlinput
+
+if [ -z "$argument" ]; then
+ to_utf8 > $htmlinput # read from STDIN
+elif [ -f "$argument" ]; then
+ to_utf8 "$argument" > $htmlinput # read from file
+else
+ err "File '$argument' not found."
+ exit 1
+fi
+
+if ! cat $htmlinput | pandoc --ignore-args -r html -w markdown "$@" ; then
+ err "Failed to parse HTML. Trying again with tidy..."
+ tidy -q -asxhtml -utf8 $htmlinput | \
+ pandoc --ignore-args -r html -w markdown "$@"
+fi
diff --git a/wrappers/markdown2pdf.in b/wrappers/markdown2pdf.in
new file mode 100644
index 000000000..37be69469
--- /dev/null
+++ b/wrappers/markdown2pdf.in
@@ -0,0 +1,81 @@
+#!/bin/sh -e
+
+REQUIRED="pdflatex"
+SYNOPSIS="converts markdown-formatted text to PDF, using pdflatex."
+
+### common.sh
+
+### tempdir.sh
+
+texname=output
+logfile=$THIS_TEMPDIR/log
+
+pandoc -s -r markdown -w latex "$@" -o $THIS_TEMPDIR/$texname.tex
+
+if [ "$OUTPUT" = "-" ]; then
+ firstinfile="$(echo $ARGS | sed -ne '1p')"
+ firstinfilebase="${firstinfile%.*}"
+ destname="${firstinfilebase:-stdin}.pdf"
+else
+ destname="$OUTPUT"
+fi
+
+(
+ origdir=$(pwd)
+ cd $THIS_TEMPDIR
+ TEXINPUTS=$origdir:$TEXINPUTS:
+ export TEXINPUTS
+ finished=no
+ runs=0
+ while [ $finished = "no" ]; do
+ pdflatex -interaction=batchmode $texname.tex >/dev/null || {
+ errcode=$?
+ err "${THIS}: pdfLaTeX failed with error code $errcode"
+ [ -f $texname.log ] && {
+ err "${THIS}: error context:"
+ sed -ne '/^!/,/^[[:space:]]*$/p' \
+ -ne '/^[Ll]a[Tt]e[Xx] [Ww]arning/,/^[[:space:]]*$/p' \
+ -ne '/^[Ee]rror/,/^[[:space:]]*$/p' $texname.log >&2
+ if grep -q "File \`ucs.sty' not found" $texname.log; then
+ err "${THIS}: Please install the 'unicode' package from CTAN:"
+ err " http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"
+ fi
+ if grep -q "File \`ulem.sty' not found" $texname.log; then
+ err "${THIS}: Please install the 'ulem' package from CTAN:"
+ err " http://www.ctan.org/tex-archive/macros/latex/contrib/misc/ulem.sty"
+ fi
+ }
+ exit $errcode
+ }
+ if [ $runs -lt 3 ] &&
+ ((grep -q "LaTeX Warning: There were undefined references." $texname.log) ||
+ (echo "$@" | grep -q -- "--toc\|--table-of-contents")); then
+ runs=$(($runs + 1))
+ if grep -q "LaTeX Warning:.*[Cc]itation" $texname.log; then
+ bibtex $texname 2>&1 >bibtex.err
+ if [ $runs -gt 2 ]; then
+ if grep -q "error message" bibtex.err ||
+ grep -q "Warning" bibtex.err; then
+ cat bibtex.err >&2
+ fi
+ fi
+ fi
+ else
+ finished=yes
+ fi
+ done
+) || exit $?
+
+is_target_exists=
+if [ -f "$destname" ]; then
+ is_target_exists=1
+ mv "$destname" "$destname~"
+fi
+
+mv -f $THIS_TEMPDIR/$texname.pdf "$destname"
+
+errn "Created $destname"
+[ -z "$is_target_exists" ] || {
+ errn " (previous file has been backed up as $destname~)"
+}
+err .
diff --git a/wrappers/tempdir.sh b/wrappers/tempdir.sh
new file mode 100644
index 000000000..f25ae7f51
--- /dev/null
+++ b/wrappers/tempdir.sh
@@ -0,0 +1,18 @@
+# As a security measure refuse to proceed if mktemp is not available.
+pathfind mktemp || { err "Couldn't find 'mktemp'; aborting."; exit 1; }
+
+# Avoid issues with /tmp directory on Windows/Cygwin
+cygwin=
+cygwin=$(uname | sed -ne '/^CYGWIN/p')
+if [ -n "$cygwin" ]; then
+ TMPDIR=.
+ export TMPDIR
+fi
+
+THIS_TEMPDIR=
+THIS_TEMPDIR="$(mktemp -d -t $THIS.XXXXXXXX)" || exit 1
+readonly THIS_TEMPDIR
+
+trap 'exitcode=$?
+ [ -z "$THIS_TEMPDIR" ] || rm -rf "$THIS_TEMPDIR"
+ exit $exitcode' 0 1 2 3 13 15