summaryrefslogtreecommitdiff
path: root/bzip2
diff options
context:
space:
mode:
authorIgor Pashev <igor.pashev@nexenta.com>2012-12-12 17:50:27 +0400
committerIgor Pashev <igor.pashev@nexenta.com>2012-12-12 17:51:30 +0400
commitcd2f7b4ee7b23bb95bf9a2c30fdb37e7d9421447 (patch)
treef893955e37819830976d38977a758cda2ad8be43 /bzip2
parent1cb3843973cf6a82a50866a30a66427ebf718b53 (diff)
downloadcibs-pkgs-cd2f7b4ee7b23bb95bf9a2c30fdb37e7d9421447.tar.gz
BZip2
Diffstat (limited to 'bzip2')
-rw-r--r--bzip2/Makefile31
-rw-r--r--bzip2/bzip2.p5m29
-rw-r--r--bzip2/libbz2-1.0.p5m12
-rw-r--r--bzip2/libbz2.p5m14
-rw-r--r--bzip2/patches/10-bzip2.1.patch367
-rw-r--r--bzip2/patches/20-legacy.patch6164
6 files changed, 6617 insertions, 0 deletions
diff --git a/bzip2/Makefile b/bzip2/Makefile
new file mode 100644
index 0000000..c7cc23f
--- /dev/null
+++ b/bzip2/Makefile
@@ -0,0 +1,31 @@
+include /usr/share/cibs/rules/ips.mk
+include /usr/share/cibs/rules/patch.mk
+include /usr/share/cibs/rules/archive.mk
+include /usr/share/cibs/rules/32.mk
+include /usr/share/cibs/rules/64.mk
+include /usr/share/cibs/rules/copy.mk
+
+summary := high-quality block-sorting file compressor
+license := Bzip2
+license-file := LICENSE
+
+build-depends += \
+
+name := bzip2
+home := http://www.bzip.org/
+version := 1.0.6
+archive := $(name)-$(version).tar.gz
+download := http://www.bzip.org/$(version)/$(archive)
+checksum := \
+ md5:00b516f4704d4a7cb50a1d97e6e8e15b \
+ sha1:3f89f861209ce81a6bab1fd1998c0ef311712002 \
+ sha256:a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd \
+ size:782025
+
+build-%-stamp:
+ cd "$(builddir)" && $(MAKE) $(make-jobs:%=-j%) CC="$(CC)"
+ touch $@
+
+install-%-stamp:
+ mkdir -p $(protodir)
+ touch $@
diff --git a/bzip2/bzip2.p5m b/bzip2/bzip2.p5m
new file mode 100644
index 0000000..cf9e854
--- /dev/null
+++ b/bzip2/bzip2.p5m
@@ -0,0 +1,29 @@
+set name=pkg.fmri value=pkg:/compress/bzip2@$(ips-version)
+set name=pkg.summary value="$(summary)"
+set name=info.upstream-url value="$(home)"
+set name=info.source-url value="$(download)"
+
+license $(license-file) license="$(license)"
+
+
+# XXX 64-bit is default
+file $(builddir.64)/bzip2 path=usr/bin/bzip2
+file $(builddir.64)/bzip2recover path=usr/bin/bzip2recover
+link target=bzip2 path=usr/bin/bzcat
+link target=bzip2 path=usr/bin/bunzip2
+
+# Man pages are from source directory:
+file bzip2.1 path=usr/share/man/man1/bzip2.1
+link target=bzip2.1 path=usr/share/man/man1/bzcat.1
+link target=bzip2.1 path=usr/share/man/man1/bunzip2.1
+link target=bzip2.1 path=usr/share/man/man1/bzip2recover.1
+
+
+# These are shell scripts from source dir:
+file bzgrep path=usr/bin/bzgrep
+file bzmore path=usr/bin/bzmore
+file bzmore.1 path=usr/share/man/man1/bzmore.1
+link target=bzgrep path=usr/bin/bzfgrep
+link target=bzgrep path=usr/bin/bzegrep
+link target=bzmore path=usr/bin/bzless
+link target=bzmore.1 path=usr/share/man/man1/bzless.1
diff --git a/bzip2/libbz2-1.0.p5m b/bzip2/libbz2-1.0.p5m
new file mode 100644
index 0000000..d9902e0
--- /dev/null
+++ b/bzip2/libbz2-1.0.p5m
@@ -0,0 +1,12 @@
+set name=pkg.fmri value=pkg:/library/libbz2-1.0@$(ips-version)
+set name=pkg.summary value="$(summary), shared library"
+set name=info.upstream-url value="$(home)"
+set name=info.source-url value="$(download)"
+
+license $(license-file) license="$(license)"
+
+file $(builddir.32)/libbz2.so.1.0.4 path=usr/lib/libbz2.so.1.0.4
+file $(builddir.64)/libbz2.so.1.0.4 path=usr/lib/$(mach64)/libbz2.so.1.0.4
+
+link target=libbz2.so.1.0.4 path=usr/lib/libbz2.so.1.0
+link target=libbz2.so.1.0.4 path=usr/lib/$(mach64)/libbz2.so.1.0
diff --git a/bzip2/libbz2.p5m b/bzip2/libbz2.p5m
new file mode 100644
index 0000000..5b469b5
--- /dev/null
+++ b/bzip2/libbz2.p5m
@@ -0,0 +1,14 @@
+set name=pkg.fmri value=pkg:/library/libbz2@$(ips-version)
+set name=pkg.summary value="$(summary)"
+set name=info.upstream-url value="$(home)"
+set name=info.source-url value="$(download)"
+
+license $(license-file) license="$(license)"
+
+depend fmri=pkg:/library/libbz2-1.0@$(ips-version) type=require
+depend fmri=pkg:/library/libbz2-1.0@$(ips-version) type=incorporate
+
+file bzlib.h path=usr/include/bzlib.h
+link target=libbz2.so.1.0 path=usr/lib/libbz2.so
+link target=libbz2.so.1.0 path=usr/lib/$(mach64)/libbz2.so
+
diff --git a/bzip2/patches/10-bzip2.1.patch b/bzip2/patches/10-bzip2.1.patch
new file mode 100644
index 0000000..6f121d6
--- /dev/null
+++ b/bzip2/patches/10-bzip2.1.patch
@@ -0,0 +1,367 @@
+diff --git a/bzip2.1 b/bzip2.1
+index ce3a78e..c34056c 100644
+--- a/bzip2.1
++++ b/bzip2.1
+@@ -1,4 +1,3 @@
+-.PU
+ .TH bzip2 1
+ .SH NAME
+ bzip2, bunzip2 \- a block-sorting file compressor, v1.0.6
+@@ -18,13 +17,13 @@ bzip2recover \- recovers data from damaged bzip2 files
+ .br
+ .B bunzip2
+ .RB [ " \-fkvsVL " ]
+-[
++[
+ .I "filenames \&..."
+ ]
+ .br
+ .B bzcat
+ .RB [ " \-s " ]
+-[
++[
+ .I "filenames \&..."
+ ]
+ .br
+@@ -39,15 +38,15 @@ generally considerably better than that achieved by more conventional
+ LZ77/LZ78-based compressors, and approaches the performance of the PPM
+ family of statistical compressors.
+
+-The command-line options are deliberately very similar to
+-those of
+-.I GNU gzip,
++The command-line options are deliberately very similar to
++those of
++.I GNU gzip,
+ but they are not identical.
+
+ .I bzip2
+ expects a list of file names to accompany the
+ command-line flags. Each file is replaced by a compressed version of
+-itself, with the name "original_name.bz2".
++itself, with the name "original_name.bz2".
+ Each compressed file
+ has the same modification date, permissions, and, when possible,
+ ownership as the corresponding original, so that these properties can
+@@ -74,13 +73,13 @@ incomprehensible and therefore pointless.
+
+ .I bunzip2
+ (or
+-.I bzip2 \-d)
++.I bzip2 \-d)
+ decompresses all
+-specified files. Files which were not created by
++specified files. Files which were not created by
+ .I bzip2
+-will be detected and ignored, and a warning issued.
++will be detected and ignored, and a warning issued.
+ .I bzip2
+-attempts to guess the filename for the decompressed file
++attempts to guess the filename for the decompressed file
+ from that of the compressed file as follows:
+
+ filename.bz2 becomes filename
+@@ -89,13 +88,13 @@ from that of the compressed file as follows:
+ filename.tbz becomes filename.tar
+ anyothername becomes anyothername.out
+
+-If the file does not end in one of the recognised endings,
+-.I .bz2,
+-.I .bz,
++If the file does not end in one of the recognised endings,
++.I .bz2,
++.I .bz,
+ .I .tbz2
+ or
+-.I .tbz,
+-.I bzip2
++.I .tbz,
++.I bzip2
+ complains that it cannot
+ guess the name of the original file, and uses the original name
+ with
+@@ -103,25 +102,25 @@ with
+ appended.
+
+ As with compression, supplying no
+-filenames causes decompression from
++filenames causes decompression from
+ standard input to standard output.
+
+-.I bunzip2
++.I bunzip2
+ will correctly decompress a file which is the
+ concatenation of two or more compressed files. The result is the
+ concatenation of the corresponding uncompressed files. Integrity
+-testing (\-t)
+-of concatenated
++testing (\-t)
++of concatenated
+ compressed files is also supported.
+
+ You can also compress or decompress files to the standard output by
+ giving the \-c flag. Multiple files may be compressed and
+ decompressed like this. The resulting outputs are fed sequentially to
+-stdout. Compression of multiple files
++stdout. Compression of multiple files
+ in this manner generates a stream
+ containing multiple compressed file representations. Such a stream
+ can be decompressed correctly only by
+-.I bzip2
++.I bzip2
+ version 0.9.0 or
+ later. Earlier versions of
+ .I bzip2
+@@ -130,7 +129,7 @@ the first file in the stream.
+
+ .I bzcat
+ (or
+-.I bzip2 -dc)
++.I bzip2 -dc)
+ decompresses all specified files to
+ the standard output.
+
+@@ -140,10 +139,10 @@ will read arguments from the environment variables
+ and
+ .I BZIP,
+ in that order, and will process them
+-before any arguments read from the command line. This gives a
++before any arguments read from the command line. This gives a
+ convenient way to supply default arguments.
+
+-Compression is always performed, even if the compressed
++Compression is always performed, even if the compressed
+ file is slightly
+ larger than the original. Files of less than about one hundred bytes
+ tend to get larger, since the compression mechanism has a constant
+@@ -151,9 +150,8 @@ overhead in the region of 50 bytes. Random data (including the output
+ of most file compressors) is coded at about 8.05 bits per byte, giving
+ an expansion of around 0.5%.
+
+-As a self-check for your protection,
+-.I
+-bzip2
++As a self-check for your protection,
++.I bzip2
+ uses 32-bit CRCs to
+ make sure that the decompressed version of a file is identical to the
+ original. This guards against corruption of the compressed data, and
+@@ -163,9 +161,9 @@ against undetected bugs in
+ chances of data corruption going undetected is microscopic, about one
+ chance in four billion for each file processed. Be aware, though, that
+ the check occurs upon decompression, so it can only tell you that
+-something is wrong. It can't help you
++something is wrong. It can't help you
+ recover the original uncompressed
+-data. You can use
++data. You can use
+ .I bzip2recover
+ to try to recover data from
+ damaged files.
+@@ -183,15 +181,15 @@ to panic.
+ Compress or decompress to standard output.
+ .TP
+ .B \-d --decompress
+-Force decompression.
+-.I bzip2,
+-.I bunzip2
++Force decompression.
++.I bzip2,
++.I bunzip2
+ and
+-.I bzcat
++.I bzcat
+ are
+ really the same program, and the decision about what actions to take is
+ done on the basis of which name is used. This flag overrides that
+-mechanism, and forces
++mechanism, and forces
+ .I bzip2
+ to decompress.
+ .TP
+@@ -205,10 +203,10 @@ This really performs a trial decompression and throws away the result.
+ .TP
+ .B \-f --force
+ Force overwrite of output files. Normally,
+-.I bzip2
++.I bzip2
+ will not overwrite
+-existing output files. Also forces
+-.I bzip2
++existing output files. Also forces
++.I bzip2
+ to break hard links
+ to files, which it otherwise wouldn't do.
+
+@@ -224,9 +222,9 @@ or decompression.
+ Reduce memory usage, for compression, decompression and testing. Files
+ are decompressed and tested using a modified algorithm which only
+ requires 2.5 bytes per block byte. This means any file can be
+-decompressed in 2300k of memory, albeit at about half the normal speed.
++decompressed in 2300\ k of memory, albeit at about half the normal speed.
+
+-During compression, \-s selects a block size of 200k, which limits
++During compression, \-s selects a block size of 200\ k, which limits
+ memory use to around the same figure, at the expense of your compression
+ ratio. In short, if your machine is low on memory (8 megabytes or
+ less), use \-s for everything. See MEMORY MANAGEMENT below.
+@@ -244,11 +242,11 @@ information which is primarily of interest for diagnostic purposes.
+ Display the software version, license terms and conditions.
+ .TP
+ .B \-1 (or \-\-fast) to \-9 (or \-\-best)
+-Set the block size to 100 k, 200 k .. 900 k when compressing. Has no
++Set the block size to 100 k, 200 k ... 900 k when compressing. Has no
+ effect when decompressing. See MEMORY MANAGEMENT below.
+-The \-\-fast and \-\-best aliases are primarily for GNU gzip
++The \-\-fast and \-\-best aliases are primarily for GNU gzip
+ compatibility. In particular, \-\-fast doesn't make things
+-significantly faster.
++significantly faster.
+ And \-\-best merely selects the default behaviour.
+ .TP
+ .B \--
+@@ -263,7 +261,7 @@ earlier versions, which was sometimes useful. 0.9.5 and above have an
+ improved algorithm which renders these flags irrelevant.
+
+ .SH MEMORY MANAGEMENT
+-.I bzip2
++.I bzip2
+ compresses large files in blocks. The block size affects
+ both the compression ratio achieved, and the amount of memory needed for
+ compression and decompression. The flags \-1 through \-9
+@@ -276,13 +274,13 @@ the file. Since block sizes are stored in compressed files, it follows
+ that the flags \-1 to \-9 are irrelevant to and so ignored
+ during decompression.
+
+-Compression and decompression requirements,
++Compression and decompression requirements,
+ in bytes, can be estimated as:
+
+- Compression: 400k + ( 8 x block size )
++ Compression: 400\ k + ( 8 x block size )
+
+- Decompression: 100k + ( 4 x block size ), or
+- 100k + ( 2.5 x block size )
++ Decompression: 100\ k + ( 4 x block size ), or
++ 100\ k + ( 2.5 x block size )
+
+ Larger block sizes give rapidly diminishing marginal returns. Most of
+ the compression comes from the first two or three hundred k of block
+@@ -292,10 +290,10 @@ on small machines.
+ It is also important to appreciate that the decompression memory
+ requirement is set at compression time by the choice of block size.
+
+-For files compressed with the default 900k block size,
++For files compressed with the default 900\ k block size,
+ .I bunzip2
+ will require about 3700 kbytes to decompress. To support decompression
+-of any file on a 4 megabyte machine,
++of any file on a 4 megabyte machine,
+ .I bunzip2
+ has an option to
+ decompress using approximately half this amount of memory, about 2300
+@@ -311,9 +309,9 @@ Another significant point applies to files which fit in a single block
+ amount of real memory touched is proportional to the size of the file,
+ since the file is smaller than a block. For example, compressing a file
+ 20,000 bytes long with the flag -9 will cause the compressor to
+-allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560
+-kbytes of it. Similarly, the decompressor will allocate 3700k but only
+-touch 100k + 20000 * 4 = 180 kbytes.
++allocate around 7600\ k of memory, but only touch 400\ k + 20000 * 8 = 560
++kbytes of it. Similarly, the decompressor will allocate 3700\ k but only
++touch 100\ k + 20000 * 4 = 180 kbytes.
+
+ Here is a table which summarises the maximum memory usage for different
+ block sizes. Also recorded is the total compressed size for 14 files of
+@@ -337,7 +335,7 @@ larger files, since the Corpus is dominated by smaller files.
+
+ .SH RECOVERING DATA FROM DAMAGED FILES
+ .I bzip2
+-compresses files in blocks, usually 900kbytes long. Each
++compresses files in blocks, usually 900\ kbytes long. Each
+ block is handled independently. If a media or transmission error causes
+ a multi-block .bz2
+ file to become damaged, it may be possible to
+@@ -350,36 +348,36 @@ damaged blocks can be distinguished from undamaged ones.
+
+ .I bzip2recover
+ is a simple program whose purpose is to search for
+-blocks in .bz2 files, and write each block out into its own .bz2
++blocks in .bz2 files, and write each block out into its own .bz2
+ file. You can then use
+-.I bzip2
++.I bzip2
+ \-t
+ to test the
+ integrity of the resulting files, and decompress those which are
+ undamaged.
+
+ .I bzip2recover
+-takes a single argument, the name of the damaged file,
++takes a single argument, the name of the damaged file,
+ and writes a number of files "rec00001file.bz2",
+-"rec00002file.bz2", etc, containing the extracted blocks.
+-The output filenames are designed so that the use of
+-wildcards in subsequent processing -- for example,
+-"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in
++"rec00002file.bz2", etc., containing the extracted blocks.
++The output filenames are designed so that the use of
++wildcards in subsequent processing -- for example,
++"bzip2 -dc rec*file.bz2 > recovered_data" -- processes the files in
+ the correct order.
+
+ .I bzip2recover
+ should be of most use dealing with large .bz2
+-files, as these will contain many blocks. It is clearly
+-futile to use it on damaged single-block files, since a
+-damaged block cannot be recovered. If you wish to minimise
+-any potential data loss through media or transmission errors,
++files, as these will contain many blocks. It is clearly
++futile to use it on damaged single-block files, since a
++damaged block cannot be recovered. If you wish to minimise
++any potential data loss through media or transmission errors,
+ you might consider compressing with a smaller
+ block size.
+
+ .SH PERFORMANCE NOTES
+ The sorting phase of compression gathers together similar strings in the
+ file. Because of this, files containing very long runs of repeated
+-symbols, like "aabaabaabaab ..." (repeated several hundred times) may
++symbols, like "aabaabaabaab ...\&" (repeated several hundred times) may
+ compress more slowly than normal. Versions 0.9.5 and above fare much
+ better than previous versions in this respect. The ratio between
+ worst-case and average-case compression time is in the region of 10:1.
+@@ -395,7 +393,7 @@ that performance, both for compressing and decompressing, is largely
+ determined by the speed at which your machine can service cache misses.
+ Because of this, small changes to the code to reduce the miss rate have
+ been observed to give disproportionately large performance improvements.
+-I imagine
++I imagine
+ .I bzip2
+ will perform best on machines with very large caches.
+
+@@ -406,7 +404,7 @@ tries hard to detect I/O errors and exit cleanly, but the details of
+ what the problem is sometimes seem rather misleading.
+
+ This manual page pertains to version 1.0.6 of
+-.I bzip2.
++.I bzip2.
+ Compressed data created by this version is entirely forwards and
+ backwards compatible with the previous public releases, versions
+ 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1, 1.0.2 and above, but with the following
+@@ -440,13 +438,13 @@ Fenwick (for the structured coding model in the original
+ .I bzip,
+ and many refinements), and Alistair Moffat, Radford Neal and Ian Witten
+ (for the arithmetic coder in the original
+-.I bzip).
++.I bzip).
+ I am much
+ indebted for their help, support and advice. See the manual in the
+ source distribution for pointers to sources of documentation. Christian
+ von Roques encouraged me to look for faster sorting algorithms, so as to
+ speed up compression. Bela Lubkin encouraged me to improve the
+-worst-case compression performance.
++worst-case compression performance.
+ Donna Robinson XMLised the documentation.
+ The bz* scripts are derived from those of GNU gzip.
+ Many people sent patches, helped
diff --git a/bzip2/patches/20-legacy.patch b/bzip2/patches/20-legacy.patch
new file mode 100644
index 0000000..e7fa818
--- /dev/null
+++ b/bzip2/patches/20-legacy.patch
@@ -0,0 +1,6164 @@
+--- a/manual.texi 2011-12-04 13:55:53.589856334 +1100
++++ b/manual.texi 2011-12-04 18:16:28.000000000 +1100
+@@ -0,0 +1,2880 @@
++\input texinfo
++@setfilename untitled.info
++@documentencoding us-ascii
++@dircategory Development
++@direntry
++* Bzip2: (bzip2). A program and library for data compression.
++@end direntry
++
++@node Top, Introduction, , (dir)
++@top bzip2 and libbzip2, version 1.0.3
++@documentlanguage en
++
++@menu
++* Introduction::
++* How to use bzip2::
++* Programming with libbzip2::
++* Miscellanea::
++
++@detailmenu
++--- The Detailed Node Listing ---
++
++How to use bzip2
++
++* NAME::
++* SYNOPSIS::
++* DESCRIPTION::
++* OPTIONS::
++* MEMORY MANAGEMENT::
++* RECOVERING DATA FROM DAMAGED FILES::
++* PERFORMANCE NOTES::
++* CAVEATS::
++* AUTHOR::
++
++ Programming with libbzip2
++
++* Top-level structure::
++* Error handling::
++* Low-level interface: >Low-level interface.
++* High-level interface::
++* Utility functions::
++* zlib compatibility functions::
++* Using the library in a stdio-free environment::
++* Making a Windows DLL::
++
++Miscellanea
++
++* Limitations of the compressed file format::
++* Portability issues::
++* Reporting bugs::
++* Did you get the right package?::
++* Further Reading::
++
++@end detailmenu
++@end menu
++
++@node Introduction, How to use bzip2, Top, Top
++@chapter Introduction
++
++@samp{bzip2} compresses files
++using the Burrows-Wheeler block-sorting text compression
++algorithm, and Huffman coding. Compression is generally
++considerably better than that achieved by more conventional
++LZ77/LZ78-based compressors, and approaches the performance of
++the PPM family of statistical compressors.
++
++@samp{bzip2} is built on top of
++@samp{libbzip2}, a flexible library for
++handling compressed data in the
++@samp{bzip2} format. This manual
++describes both how to use the program and how to work with the
++library interface. Most of the manual is devoted to this
++library, not the program, which is good news if your interest is
++only in the program.
++
++@itemize @bullet{}
++
++@item
++@ref{How to use bzip2,,How to use bzip2}. describes how to use
++@samp{bzip2}; this is the only part
++you need to read if you just want to know how to operate the
++program.
++
++@item
++@ref{Programming with libbzip2,,Programming with libbzip2}. describes the
++programming interfaces in detail, and
++
++@item
++@ref{Miscellanea,,Miscellanea}. records some
++miscellaneous notes which I thought ought to be recorded
++somewhere.
++@end itemize
++
++@node How to use bzip2, Programming with libbzip2, Introduction, Top
++@chapter How to use bzip2
++
++This chapter contains a copy of the
++@samp{bzip2} man page, and nothing
++else.
++
++@menu
++* NAME::
++* SYNOPSIS::
++* DESCRIPTION::
++* OPTIONS::
++* MEMORY MANAGEMENT::
++* RECOVERING DATA FROM DAMAGED FILES::
++* PERFORMANCE NOTES::
++* CAVEATS::
++* AUTHOR::
++@end menu
++
++@node NAME, SYNOPSIS, , How to use bzip2
++@section NAME
++
++@itemize @bullet{}
++
++@item
++@samp{bzip2},
++@samp{bunzip2} - a block-sorting file
++compressor, v1.0.3
++
++@item
++@samp{bzcat} -
++decompresses files to stdout
++
++@item
++@samp{bzip2recover} -
++recovers data from damaged bzip2 files
++@end itemize
++
++@node SYNOPSIS, DESCRIPTION, NAME, How to use bzip2
++@section SYNOPSIS
++
++@itemize @bullet{}
++
++@item
++@samp{bzip2} [
++-cdfkqstvzVL123456789 ] [ filenames ... ]
++
++@item
++@samp{bunzip2} [
++-fkvsVL ] [ filenames ... ]
++
++@item
++@samp{bzcat} [ -s ] [
++filenames ... ]
++
++@item
++@samp{bzip2recover}
++filename
++@end itemize
++
++@node DESCRIPTION, OPTIONS, SYNOPSIS, How to use bzip2
++@section DESCRIPTION
++
++@samp{bzip2} compresses files
++using the Burrows-Wheeler block sorting text compression
++algorithm, and Huffman coding. Compression is generally
++considerably better than that achieved by more conventional
++LZ77/LZ78-based compressors, and approaches the performance of
++the PPM family of statistical compressors.
++
++The command-line options are deliberately very similar to
++those of GNU @samp{gzip}, but they are
++not identical.
++
++@samp{bzip2} expects a list of
++file names to accompany the command-line flags. Each file is
++replaced by a compressed version of itself, with the name
++@samp{original_name.bz2}. Each
++compressed file has the same modification date, permissions, and,
++when possible, ownership as the corresponding original, so that
++these properties can be correctly restored at decompression time.
++File name handling is naive in the sense that there is no
++mechanism for preserving original file names, permissions,
++ownerships or dates in filesystems which lack these concepts, or
++have serious file name length restrictions, such as
++MS-DOS.
++
++@samp{bzip2} and
++@samp{bunzip2} will by default not
++overwrite existing files. If you want this to happen, specify
++the @samp{-f} flag.
++
++If no file names are specified,
++@samp{bzip2} compresses from standard
++input to standard output. In this case,
++@samp{bzip2} will decline to write
++compressed output to a terminal, as this would be entirely
++incomprehensible and therefore pointless.
++
++@samp{bunzip2} (or
++@samp{bzip2 -d}) decompresses all
++specified files. Files which were not created by
++@samp{bzip2} will be detected and
++ignored, and a warning issued.
++@samp{bzip2} attempts to guess the
++filename for the decompressed file from that of the compressed
++file as follows:
++
++@itemize @bullet{}
++
++@item
++@samp{filename.bz2 }
++becomes
++@samp{filename}
++
++@item
++@samp{filename.bz }
++becomes
++@samp{filename}
++
++@item
++@samp{filename.tbz2}
++becomes
++@samp{filename.tar}
++
++@item
++@samp{filename.tbz }
++becomes
++@samp{filename.tar}
++
++@item
++@samp{anyothername }
++becomes
++@samp{anyothername.out}
++@end itemize
++
++If the file does not end in one of the recognised endings,
++@samp{.bz2},
++@samp{.bz},
++@samp{.tbz2} or
++@samp{.tbz},
++@samp{bzip2} complains that it cannot
++guess the name of the original file, and uses the original name
++with @samp{.out} appended.
++
++As with compression, supplying no filenames causes
++decompression from standard input to standard output.
++
++@samp{bunzip2} will correctly
++decompress a file which is the concatenation of two or more
++compressed files. The result is the concatenation of the
++corresponding uncompressed files. Integrity testing
++(@samp{-t}) of concatenated compressed
++files is also supported.
++
++You can also compress or decompress files to the standard
++output by giving the @samp{-c} flag.
++Multiple files may be compressed and decompressed like this. The
++resulting outputs are fed sequentially to stdout. Compression of
++multiple files in this manner generates a stream containing
++multiple compressed file representations. Such a stream can be
++decompressed correctly only by
++@samp{bzip2} version 0.9.0 or later.
++Earlier versions of @samp{bzip2} will
++stop after decompressing the first file in the stream.
++
++@samp{bzcat} (or
++@samp{bzip2 -dc}) decompresses all
++specified files to the standard output.
++
++@samp{bzip2} will read arguments
++from the environment variables
++@samp{BZIP2} and
++@samp{BZIP}, in that order, and will
++process them before any arguments read from the command line.
++This gives a convenient way to supply default arguments.
++
++Compression is always performed, even if the compressed
++file is slightly larger than the original. Files of less than
++about one hundred bytes tend to get larger, since the compression
++mechanism has a constant overhead in the region of 50 bytes.
++Random data (including the output of most file compressors) is
++coded at about 8.05 bits per byte, giving an expansion of around
++0.5%.
++
++As a self-check for your protection,
++@samp{bzip2} uses 32-bit CRCs to make
++sure that the decompressed version of a file is identical to the
++original. This guards against corruption of the compressed data,
++and against undetected bugs in
++@samp{bzip2} (hopefully very unlikely).
++The chances of data corruption going undetected is microscopic,
++about one chance in four billion for each file processed. Be
++aware, though, that the check occurs upon decompression, so it
++can only tell you that something is wrong. It can't help you
++recover the original uncompressed data. You can use
++@samp{bzip2recover} to try to recover
++data from damaged files.
++
++Return values: 0 for a normal exit, 1 for environmental
++problems (file not found, invalid flags, I/O errors, etc.), 2
++to indicate a corrupt compressed file, 3 for an internal
++consistency error (eg, bug) which caused
++@samp{bzip2} to panic.
++
++@node OPTIONS, MEMORY MANAGEMENT, DESCRIPTION, How to use bzip2
++@section OPTIONS
++
++@table @asis
++
++@item @samp{-c --stdout}
++Compress or decompress to standard
++output.
++
++@item @samp{-d --decompress}
++Force decompression.
++@samp{bzip2},
++@samp{bunzip2} and
++@samp{bzcat} are really the same
++program, and the decision about what actions to take is done on
++the basis of which name is used. This flag overrides that
++mechanism, and forces bzip2 to decompress.
++
++@item @samp{-z --compress}
++The complement to
++@samp{-d}: forces compression,
++regardless of the invokation name.
++
++@item @samp{-t --test}
++Check integrity of the specified file(s), but
++don't decompress them. This really performs a trial
++decompression and throws away the result.
++
++@item @samp{-f --force}
++Force overwrite of output files. Normally,
++@samp{bzip2} will not overwrite
++existing output files. Also forces
++@samp{bzip2} to break hard links to
++files, which it otherwise wouldn't do.
++
++@samp{bzip2} normally declines
++to decompress files which don't have the correct magic header
++bytes. If forced (@samp{-f}),
++however, it will pass such files through unmodified. This is
++how GNU @samp{gzip} behaves.
++
++@item @samp{-k --keep}
++Keep (don't delete) input files during
++compression or decompression.
++
++@item @samp{-s --small}
++Reduce memory usage, for compression,
++decompression and testing. Files are decompressed and tested
++using a modified algorithm which only requires 2.5 bytes per
++block byte. This means any file can be decompressed in 2300k
++of memory, albeit at about half the normal speed.
++
++During compression, @samp{-s}
++selects a block size of 200k, which limits memory use to around
++the same figure, at the expense of your compression ratio. In
++short, if your machine is low on memory (8 megabytes or less),
++use @samp{-s} for everything. See
++@ref{MEMORY MANAGEMENT,,MEMORY MANAGEMENT}. below.
++
++@item @samp{-q --quiet}
++Suppress non-essential warning messages.
++Messages pertaining to I/O errors and other critical events
++will not be suppressed.
++
++@item @samp{-v --verbose}
++Verbose mode -- show the compression ratio for
++each file processed. Further
++@samp{-v}'s increase the verbosity
++level, spewing out lots of information which is primarily of
++interest for diagnostic purposes.
++
++@item @samp{-L --license -V --version}
++Display the software version, license terms and
++conditions.
++
++@item @samp{-1} (or @samp{--fast}) to @samp{-9} (or @samp{-best})
++Set the block size to 100 k, 200 k ... 900 k
++when compressing. Has no effect when decompressing. See @ref{MEMORY MANAGEMENT,,MEMORY MANAGEMENT}. below. The
++@samp{--fast} and
++@samp{--best} aliases are primarily
++for GNU @samp{gzip} compatibility.
++In particular, @samp{--fast} doesn't
++make things significantly faster. And
++@samp{--best} merely selects the
++default behaviour.
++
++@item @samp{--}
++Treats all subsequent arguments as file names,
++even if they start with a dash. This is so you can handle
++files with names beginning with a dash, for example:
++@samp{bzip2 --
++-myfilename}.
++
++@item @samp{--repetitive-fast}
++@itemx @samp{--repetitive-best}
++These flags are redundant in versions 0.9.5 and
++above. They provided some coarse control over the behaviour of
++the sorting algorithm in earlier versions, which was sometimes
++useful. 0.9.5 and above have an improved algorithm which
++renders these flags irrelevant.
++@end table
++
++@node MEMORY MANAGEMENT, RECOVERING DATA FROM DAMAGED FILES, OPTIONS, How to use bzip2
++@section MEMORY MANAGEMENT
++
++@samp{bzip2} compresses large
++files in blocks. The block size affects both the compression
++ratio achieved, and the amount of memory needed for compression
++and decompression. The flags @samp{-1}
++through @samp{-9} specify the block
++size to be 100,000 bytes through 900,000 bytes (the default)
++respectively. At decompression time, the block size used for
++compression is read from the header of the compressed file, and
++@samp{bunzip2} then allocates itself
++just enough memory to decompress the file. Since block sizes are
++stored in compressed files, it follows that the flags
++@samp{-1} to
++@samp{-9} are irrelevant to and so
++ignored during decompression.
++
++Compression and decompression requirements, in bytes, can be
++estimated as:
++
++@example
++
++Compression: 400k + ( 8 x block size )
++
++Decompression: 100k + ( 4 x block size ), or
++ 100k + ( 2.5 x block size )
++@end example
++
++Larger block sizes give rapidly diminishing marginal
++returns. Most of the compression comes from the first two or
++three hundred k of block size, a fact worth bearing in mind when
++using @samp{bzip2} on small machines.
++It is also important to appreciate that the decompression memory
++requirement is set at compression time by the choice of block
++size.
++
++For files compressed with the default 900k block size,
++@samp{bunzip2} will require about 3700
++kbytes to decompress. To support decompression of any file on a
++4 megabyte machine, @samp{bunzip2} has
++an option to decompress using approximately half this amount of
++memory, about 2300 kbytes. Decompression speed is also halved,
++so you should use this option only where necessary. The relevant
++flag is @samp{-s}.
++
++In general, try and use the largest block size memory
++constraints allow, since that maximises the compression achieved.
++Compression and decompression speed are virtually unaffected by
++block size.
++
++Another significant point applies to files which fit in a
++single block -- that means most files you'd encounter using a
++large block size. The amount of real memory touched is
++proportional to the size of the file, since the file is smaller
++than a block. For example, compressing a file 20,000 bytes long
++with the flag @samp{-9} will cause the
++compressor to allocate around 7600k of memory, but only touch
++400k + 20000 * 8 = 560 kbytes of it. Similarly, the decompressor
++will allocate 3700k but only touch 100k + 20000 * 4 = 180
++kbytes.
++
++Here is a table which summarises the maximum memory usage
++for different block sizes. Also recorded is the total compressed
++size for 14 files of the Calgary Text Compression Corpus
++totalling 3,141,622 bytes. This column gives some feel for how
++compression varies with block size. These figures tend to
++understate the advantage of larger block sizes for larger files,
++since the Corpus is dominated by smaller files.
++
++@example
++
++ Compress Decompress Decompress Corpus
++Flag usage usage -s usage Size
++
++ -1 1200k 500k 350k 914704
++ -2 2000k 900k 600k 877703
++ -3 2800k 1300k 850k 860338
++ -4 3600k 1700k 1100k 846899
++ -5 4400k 2100k 1350k 845160
++ -6 5200k 2500k 1600k 838626
++ -7 6100k 2900k 1850k 834096
++ -8 6800k 3300k 2100k 828642
++ -9 7600k 3700k 2350k 828642
++@end example
++
++@node RECOVERING DATA FROM DAMAGED FILES, PERFORMANCE NOTES, MEMORY MANAGEMENT, How to use bzip2
++@section RECOVERING DATA FROM DAMAGED FILES
++
++@samp{bzip2} compresses files in
++blocks, usually 900kbytes long. Each block is handled
++independently. If a media or transmission error causes a
++multi-block @samp{.bz2} file to become
++damaged, it may be possible to recover data from the undamaged
++blocks in the file.
++
++The compressed representation of each block is delimited by
++a 48-bit pattern, which makes it possible to find the block
++boundaries with reasonable certainty. Each block also carries
++its own 32-bit CRC, so damaged blocks can be distinguished from
++undamaged ones.
++
++@samp{bzip2recover} is a simple
++program whose purpose is to search for blocks in
++@samp{.bz2} files, and write each block
++out into its own @samp{.bz2} file. You
++can then use @samp{bzip2 -t} to test
++the integrity of the resulting files, and decompress those which
++are undamaged.
++
++@samp{bzip2recover} takes a
++single argument, the name of the damaged file, and writes a
++number of files @samp{rec0001file.bz2},
++@samp{rec0002file.bz2}, etc, containing
++the extracted blocks. The output filenames are designed so that
++the use of wildcards in subsequent processing -- for example,
++@samp{bzip2 -dc rec*file.bz2 >
++recovered_data} -- lists the files in the correct
++order.
++
++@samp{bzip2recover} should be of
++most use dealing with large @samp{.bz2}
++files, as these will contain many blocks. It is clearly futile
++to use it on damaged single-block files, since a damaged block
++cannot be recovered. If you wish to minimise any potential data
++loss through media or transmission errors, you might consider
++compressing with a smaller block size.
++
++@node PERFORMANCE NOTES, CAVEATS, RECOVERING DATA FROM DAMAGED FILES, How to use bzip2
++@section PERFORMANCE NOTES
++
++The sorting phase of compression gathers together similar
++strings in the file. Because of this, files containing very long
++runs of repeated symbols, like "aabaabaabaab ..." (repeated
++several hundred times) may compress more slowly than normal.
++Versions 0.9.5 and above fare much better than previous versions
++in this respect. The ratio between worst-case and average-case
++compression time is in the region of 10:1. For previous
++versions, this figure was more like 100:1. You can use the
++@samp{-vvvv} option to monitor progress
++in great detail, if you want.
++
++Decompression speed is unaffected by these
++phenomena.
++
++@samp{bzip2} usually allocates
++several megabytes of memory to operate in, and then charges all
++over it in a fairly random fashion. This means that performance,
++both for compressing and decompressing, is largely determined by
++the speed at which your machine can service cache misses.
++Because of this, small changes to the code to reduce the miss
++rate have been observed to give disproportionately large
++performance improvements. I imagine
++@samp{bzip2} will perform best on
++machines with very large caches.
++
++@node CAVEATS, AUTHOR, PERFORMANCE NOTES, How to use bzip2
++@section CAVEATS
++
++I/O error messages are not as helpful as they could be.
++@samp{bzip2} tries hard to detect I/O
++errors and exit cleanly, but the details of what the problem is
++sometimes seem rather misleading.
++
++This manual page pertains to version 1.0.3 of
++@samp{bzip2}. Compressed data created
++by this version is entirely forwards and backwards compatible
++with the previous public releases, versions 0.1pl2, 0.9.0 and
++0.9.5, 1.0.0, 1.0.1 and 1.0.2, but with the following exception: 0.9.0
++and above can correctly decompress multiple concatenated
++compressed files. 0.1pl2 cannot do this; it will stop after
++decompressing just the first file in the stream.
++
++@samp{bzip2recover} versions
++prior to 1.0.2 used 32-bit integers to represent bit positions in
++compressed files, so it could not handle compressed files more
++than 512 megabytes long. Versions 1.0.2 and above use 64-bit ints
++on some platforms which support them (GNU supported targets, and
++Windows). To establish whether or not
++@samp{bzip2recover} was built with such
++a limitation, run it without arguments. In any event you can
++build yourself an unlimited version if you can recompile it with
++@samp{MaybeUInt64} set to be an
++unsigned 64-bit integer.
++
++@node AUTHOR, , CAVEATS, How to use bzip2
++@section AUTHOR
++
++Julian Seward,
++@samp{jseward@@bzip.org}
++
++The ideas embodied in
++@samp{bzip2} are due to (at least) the
++following people: Michael Burrows and David Wheeler (for the
++block sorting transformation), David Wheeler (again, for the
++Huffman coder), Peter Fenwick (for the structured coding model in
++the original @samp{bzip}, and many
++refinements), and Alistair Moffat, Radford Neal and Ian Witten
++(for the arithmetic coder in the original
++@samp{bzip}). I am much indebted for
++their help, support and advice. See the manual in the source
++distribution for pointers to sources of documentation. Christian
++von Roques encouraged me to look for faster sorting algorithms,
++so as to speed up compression. Bela Lubkin encouraged me to
++improve the worst-case compression performance.
++Donna Robinson XMLised the documentation.
++Many people sent
++patches, helped with portability problems, lent machines, gave
++advice and were generally helpful.
++
++@node Programming with libbzip2, Miscellanea, How to use bzip2, Top
++@chapter Programming with libbzip2
++
++This chapter describes the programming interface to
++@samp{libbzip2}.
++
++For general background information, particularly about
++memory use and performance aspects, you'd be well advised to read
++@ref{How to use bzip2,,How to use bzip2}. as well.
++
++@menu
++* Top-level structure::
++* Error handling::
++* Low-level interface: >Low-level interface.
++* High-level interface::
++* Utility functions::
++* zlib compatibility functions::
++* Using the library in a stdio-free environment::
++* Making a Windows DLL::
++@end menu
++
++@node Top-level structure, Error handling, , Programming with libbzip2
++@section Top-level structure
++
++@samp{libbzip2} is a flexible
++library for compressing and decompressing data in the
++@samp{bzip2} data format. Although
++packaged as a single entity, it helps to regard the library as
++three separate parts: the low level interface, and the high level
++interface, and some utility functions.
++
++The structure of
++@samp{libbzip2}'s interfaces is similar
++to that of Jean-loup Gailly's and Mark Adler's excellent
++@samp{zlib} library.
++
++All externally visible symbols have names beginning
++@samp{BZ2_}. This is new in version
++1.0. The intention is to minimise pollution of the namespaces of
++library clients.
++
++To use any part of the library, you need to
++@samp{#include <bzlib.h>}
++into your sources.
++
++@menu
++* Low-level summary::
++* High-level summary::
++* Utility functions summary::
++@end menu
++
++@node Low-level summary, High-level summary, , Top-level structure
++@subsection Low-level summary
++
++This interface provides services for compressing and
++decompressing data in memory. There's no provision for dealing
++with files, streams or any other I/O mechanisms, just straight
++memory-to-memory work. In fact, this part of the library can be
++compiled without inclusion of
++@samp{stdio.h}, which may be helpful
++for embedded applications.
++
++The low-level part of the library has no global variables
++and is therefore thread-safe.
++
++Six routines make up the low level interface:
++@samp{BZ2_bzCompressInit},
++@samp{BZ2_bzCompress}, and
++@samp{BZ2_bzCompressEnd} for
++compression, and a corresponding trio
++@samp{BZ2_bzDecompressInit},
++@samp{BZ2_bzDecompress} and
++@samp{BZ2_bzDecompressEnd} for
++decompression. The @samp{*Init}
++functions allocate memory for compression/decompression and do
++other initialisations, whilst the
++@samp{*End} functions close down
++operations and release memory.
++
++The real work is done by
++@samp{BZ2_bzCompress} and
++@samp{BZ2_bzDecompress}. These
++compress and decompress data from a user-supplied input buffer to
++a user-supplied output buffer. These buffers can be any size;
++arbitrary quantities of data are handled by making repeated calls
++to these functions. This is a flexible mechanism allowing a
++consumer-pull style of activity, or producer-push, or a mixture
++of both.
++
++@node High-level summary, Utility functions summary, Low-level summary, Top-level structure
++@subsection High-level summary
++
++This interface provides some handy wrappers around the
++low-level interface to facilitate reading and writing
++@samp{bzip2} format files
++(@samp{.bz2} files). The routines
++provide hooks to facilitate reading files in which the
++@samp{bzip2} data stream is embedded
++within some larger-scale file structure, or where there are
++multiple @samp{bzip2} data streams
++concatenated end-to-end.
++
++For reading files,
++@samp{BZ2_bzReadOpen},
++@samp{BZ2_bzRead},
++@samp{BZ2_bzReadClose} and
++@samp{BZ2_bzReadGetUnused} are
++supplied. For writing files,
++@samp{BZ2_bzWriteOpen},
++@samp{BZ2_bzWrite} and
++@samp{BZ2_bzWriteFinish} are
++available.
++
++As with the low-level library, no global variables are used
++so the library is per se thread-safe. However, if I/O errors
++occur whilst reading or writing the underlying compressed files,
++you may have to consult @samp{errno} to
++determine the cause of the error. In that case, you'd need a C
++library which correctly supports
++@samp{errno} in a multithreaded
++environment.
++
++To make the library a little simpler and more portable,
++@samp{BZ2_bzReadOpen} and
++@samp{BZ2_bzWriteOpen} require you to
++pass them file handles (@samp{FILE*}s)
++which have previously been opened for reading or writing
++respectively. That avoids portability problems associated with
++file operations and file attributes, whilst not being much of an
++imposition on the programmer.
++
++@node Utility functions summary, , High-level summary, Top-level structure
++@subsection Utility functions summary
++
++For very simple needs,
++@samp{BZ2_bzBuffToBuffCompress} and
++@samp{BZ2_bzBuffToBuffDecompress} are
++provided. These compress data in memory from one buffer to
++another buffer in a single function call. You should assess
++whether these functions fulfill your memory-to-memory
++compression/decompression requirements before investing effort in
++understanding the more general but more complex low-level
++interface.
++
++Yoshioka Tsuneo
++(@samp{QWF00133@@niftyserve.or.jp} /
++@samp{tsuneo-y@@is.aist-nara.ac.jp}) has
++contributed some functions to give better
++@samp{zlib} compatibility. These
++functions are @samp{BZ2_bzopen},
++@samp{BZ2_bzread},
++@samp{BZ2_bzwrite},
++@samp{BZ2_bzflush},
++@samp{BZ2_bzclose},
++@samp{BZ2_bzerror} and
++@samp{BZ2_bzlibVersion}. You may find
++these functions more convenient for simple file reading and
++writing, than those in the high-level interface. These functions
++are not (yet) officially part of the library, and are minimally
++documented here. If they break, you get to keep all the pieces.
++I hope to document them properly when time permits.
++
++Yoshioka also contributed modifications to allow the
++library to be built as a Windows DLL.
++
++@node Error handling, >Low-level interface, Top-level structure, Programming with libbzip2
++@section Error handling
++
++The library is designed to recover cleanly in all
++situations, including the worst-case situation of decompressing
++random data. I'm not 100% sure that it can always do this, so
++you might want to add a signal handler to catch segmentation
++violations during decompression if you are feeling especially
++paranoid. I would be interested in hearing more about the
++robustness of the library to corrupted compressed data.
++
++Version 1.0.3 more robust in this respect than any
++previous version. Investigations with Valgrind (a tool for detecting
++problems with memory management) indicate
++that, at least for the few files I tested, all single-bit errors
++in the decompressed data are caught properly, with no
++segmentation faults, no uses of uninitialised data, no out of
++range reads or writes, and no infinite looping in the decompressor.
++So it's certainly pretty robust, although
++I wouldn't claim it to be totally bombproof.
++
++The file @samp{bzlib.h} contains
++all definitions needed to use the library. In particular, you
++should definitely not include
++@samp{bzlib_private.h}.
++
++In @samp{bzlib.h}, the various
++return values are defined. The following list is not intended as
++an exhaustive description of the circumstances in which a given
++value may be returned -- those descriptions are given later.
++Rather, it is intended to convey the rough meaning of each return
++value. The first five actions are normal and not intended to
++denote an error situation.
++
++@table @asis
++
++@item @samp{BZ_OK}
++The requested action was completed
++successfully.
++
++@item @samp{BZ_RUN_OK, BZ_FLUSH_OK, BZ_FINISH_OK}
++In
++@samp{BZ2_bzCompress}, the requested
++flush/finish/nothing-special action was completed
++successfully.
++
++@item @samp{BZ_STREAM_END}
++Compression of data was completed, or the
++logical stream end was detected during
++decompression.
++@end table
++
++The following return values indicate an error of some
++kind.
++
++@table @asis
++
++@item @samp{BZ_CONFIG_ERROR}
++Indicates that the library has been improperly
++compiled on your platform -- a major configuration error.
++Specifically, it means that
++@samp{sizeof(char)},
++@samp{sizeof(short)} and
++@samp{sizeof(int)} are not 1, 2 and
++4 respectively, as they should be. Note that the library
++should still work properly on 64-bit platforms which follow
++the LP64 programming model -- that is, where
++@samp{sizeof(long)} and
++@samp{sizeof(void*)} are 8. Under
++LP64, @samp{sizeof(int)} is still 4,
++so @samp{libbzip2}, which doesn't
++use the @samp{long} type, is
++OK.
++
++@item @samp{BZ_SEQUENCE_ERROR}
++When using the library, it is important to call
++the functions in the correct sequence and with data structures
++(buffers etc) in the correct states.
++@samp{libbzip2} checks as much as it
++can to ensure this is happening, and returns
++@samp{BZ_SEQUENCE_ERROR} if not.
++Code which complies precisely with the function semantics, as
++detailed below, should never receive this value; such an event
++denotes buggy code which you should
++investigate.
++
++@item @samp{BZ_PARAM_ERROR}
++Returned when a parameter to a function call is
++out of range or otherwise manifestly incorrect. As with
++@samp{BZ_SEQUENCE_ERROR}, this
++denotes a bug in the client code. The distinction between
++@samp{BZ_PARAM_ERROR} and
++@samp{BZ_SEQUENCE_ERROR} is a bit
++hazy, but still worth making.
++
++@item @samp{BZ_MEM_ERROR}
++Returned when a request to allocate memory
++failed. Note that the quantity of memory needed to decompress
++a stream cannot be determined until the stream's header has
++been read. So
++@samp{BZ2_bzDecompress} and
++@samp{BZ2_bzRead} may return
++@samp{BZ_MEM_ERROR} even though some
++of the compressed data has been read. The same is not true
++for compression; once
++@samp{BZ2_bzCompressInit} or
++@samp{BZ2_bzWriteOpen} have
++successfully completed,
++@samp{BZ_MEM_ERROR} cannot
++occur.
++
++@item @samp{BZ_DATA_ERROR}
++Returned when a data integrity error is
++detected during decompression. Most importantly, this means
++when stored and computed CRCs for the data do not match. This
++value is also returned upon detection of any other anomaly in
++the compressed data.
++
++@item @samp{BZ_DATA_ERROR_MAGIC}
++As a special case of
++@samp{BZ_DATA_ERROR}, it is
++sometimes useful to know when the compressed stream does not
++start with the correct magic bytes (@samp{'B' 'Z'
++'h'}).
++
++@item @samp{BZ_IO_ERROR}
++Returned by
++@samp{BZ2_bzRead} and
++@samp{BZ2_bzWrite} when there is an
++error reading or writing in the compressed file, and by
++@samp{BZ2_bzReadOpen} and
++@samp{BZ2_bzWriteOpen} for attempts
++to use a file for which the error indicator (viz,
++@samp{ferror(f)}) is set. On
++receipt of @samp{BZ_IO_ERROR}, the
++caller should consult @samp{errno}
++and/or @samp{perror} to acquire
++operating-system specific information about the
++problem.
++
++@item @samp{BZ_UNEXPECTED_EOF}
++Returned by
++@samp{BZ2_bzRead} when the
++compressed file finishes before the logical end of stream is
++detected.
++
++@item @samp{BZ_OUTBUFF_FULL}
++Returned by
++@samp{BZ2_bzBuffToBuffCompress} and
++@samp{BZ2_bzBuffToBuffDecompress} to
++indicate that the output data will not fit into the output
++buffer provided.
++@end table
++
++@node >Low-level interface, High-level interface, Error handling, Programming with libbzip2
++@section Low-level interface
++
++@menu
++* BZ2_bzCompressInit::
++* BZ2_bzCompress::
++* BZ2_bzCompressEnd::
++* BZ2_bzDecompressInit::
++* BZ2_bzDecompress::
++* BZ2_bzDecompressEnd::
++@end menu
++
++@node BZ2_bzCompressInit, BZ2_bzCompress, , >Low-level interface
++@subsection BZ2_bzCompressInit
++
++@example
++
++typedef struct @{
++ char *next_in;
++ unsigned int avail_in;
++ unsigned int total_in_lo32;
++ unsigned int total_in_hi32;
++
++ char *next_out;
++ unsigned int avail_out;
++ unsigned int total_out_lo32;
++ unsigned int total_out_hi32;
++
++ void *state;
++
++ void *(*bzalloc)(void *,int,int);
++ void (*bzfree)(void *,void *);
++ void *opaque;
++@} bz_stream;
++
++int BZ2_bzCompressInit ( bz_stream *strm,
++ int blockSize100k,
++ int verbosity,
++ int workFactor );
++@end example
++
++Prepares for compression. The
++@samp{bz_stream} structure holds all
++data pertaining to the compression activity. A
++@samp{bz_stream} structure should be
++allocated and initialised prior to the call. The fields of
++@samp{bz_stream} comprise the entirety
++of the user-visible data. @samp{state}
++is a pointer to the private data structures required for
++compression.
++
++Custom memory allocators are supported, via fields
++@samp{bzalloc},
++@samp{bzfree}, and
++@samp{opaque}. The value
++@samp{opaque} is passed to as the first
++argument to all calls to @samp{bzalloc}
++and @samp{bzfree}, but is otherwise
++ignored by the library. The call @samp{bzalloc (
++opaque, n, m )} is expected to return a pointer
++@samp{p} to @samp{n *
++m} bytes of memory, and @samp{bzfree (
++opaque, p )} should free that memory.
++
++If you don't want to use a custom memory allocator, set
++@samp{bzalloc},
++@samp{bzfree} and
++@samp{opaque} to
++@samp{NULL}, and the library will then
++use the standard @samp{malloc} /
++@samp{free} routines.
++
++Before calling
++@samp{BZ2_bzCompressInit}, fields
++@samp{bzalloc},
++@samp{bzfree} and
++@samp{opaque} should be filled
++appropriately, as just described. Upon return, the internal
++state will have been allocated and initialised, and
++@samp{total_in_lo32},
++@samp{total_in_hi32},
++@samp{total_out_lo32} and
++@samp{total_out_hi32} will have been
++set to zero. These four fields are used by the library to inform
++the caller of the total amount of data passed into and out of the
++library, respectively. You should not try to change them. As of
++version 1.0, 64-bit counts are maintained, even on 32-bit
++platforms, using the @samp{_hi32}
++fields to store the upper 32 bits of the count. So, for example,
++the total amount of data in is @samp{(total_in_hi32
++<< 32) + total_in_lo32}.
++
++Parameter @samp{blockSize100k}
++specifies the block size to be used for compression. It should
++be a value between 1 and 9 inclusive, and the actual block size
++used is 100000 x this figure. 9 gives the best compression but
++takes most memory.
++
++Parameter @samp{verbosity} should
++be set to a number between 0 and 4 inclusive. 0 is silent, and
++greater numbers give increasingly verbose monitoring/debugging
++output. If the library has been compiled with
++@samp{-DBZ_NO_STDIO}, no such output
++will appear for any verbosity setting.
++
++Parameter @samp{workFactor}
++controls how the compression phase behaves when presented with
++worst case, highly repetitive, input data. If compression runs
++into difficulties caused by repetitive data, the library switches
++from the standard sorting algorithm to a fallback algorithm. The
++fallback is slower than the standard algorithm by perhaps a
++factor of three, but always behaves reasonably, no matter how bad
++the input.
++
++Lower values of @samp{workFactor}
++reduce the amount of effort the standard algorithm will expend
++before resorting to the fallback. You should set this parameter
++carefully; too low, and many inputs will be handled by the
++fallback algorithm and so compress rather slowly, too high, and
++your average-to-worst case compression times can become very
++large. The default value of 30 gives reasonable behaviour over a
++wide range of circumstances.
++
++Allowable values range from 0 to 250 inclusive. 0 is a
++special case, equivalent to using the default value of 30.
++
++Note that the compressed output generated is the same
++regardless of whether or not the fallback algorithm is
++used.
++
++Be aware also that this parameter may disappear entirely in
++future versions of the library. In principle it should be
++possible to devise a good way to automatically choose which
++algorithm to use. Such a mechanism would render the parameter
++obsolete.
++
++Possible return values:
++
++@example
++
++BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++BZ_PARAM_ERROR
++ if strm is NULL
++ or blockSize < 1 or blockSize > 9
++ or verbosity < 0 or verbosity > 4
++ or workFactor < 0 or workFactor > 250
++BZ_MEM_ERROR
++ if not enough memory is available
++BZ_OK
++ otherwise
++@end example
++
++Allowable next actions:
++
++@example
++
++BZ2_bzCompress
++ if BZ_OK is returned
++ no specific action needed in case of error
++@end example
++
++@node BZ2_bzCompress, BZ2_bzCompressEnd, BZ2_bzCompressInit, >Low-level interface
++@subsection BZ2_bzCompress
++
++@example
++
++int BZ2_bzCompress ( bz_stream *strm, int action );
++@end example
++
++Provides more input and/or output buffer space for the
++library. The caller maintains input and output buffers, and
++calls @samp{BZ2_bzCompress} to transfer
++data between them.
++
++Before each call to
++@samp{BZ2_bzCompress},
++@samp{next_in} should point at the data
++to be compressed, and @samp{avail_in}
++should indicate how many bytes the library may read.
++@samp{BZ2_bzCompress} updates
++@samp{next_in},
++@samp{avail_in} and
++@samp{total_in} to reflect the number
++of bytes it has read.
++
++Similarly, @samp{next_out} should
++point to a buffer in which the compressed data is to be placed,
++with @samp{avail_out} indicating how
++much output space is available.
++@samp{BZ2_bzCompress} updates
++@samp{next_out},
++@samp{avail_out} and
++@samp{total_out} to reflect the number
++of bytes output.
++
++You may provide and remove as little or as much data as you
++like on each call of
++@samp{BZ2_bzCompress}. In the limit,
++it is acceptable to supply and remove data one byte at a time,
++although this would be terribly inefficient. You should always
++ensure that at least one byte of output space is available at
++each call.
++
++A second purpose of
++@samp{BZ2_bzCompress} is to request a
++change of mode of the compressed stream.
++
++Conceptually, a compressed stream can be in one of four
++states: IDLE, RUNNING, FLUSHING and FINISHING. Before
++initialisation
++(@samp{BZ2_bzCompressInit}) and after
++termination (@samp{BZ2_bzCompressEnd}),
++a stream is regarded as IDLE.
++
++Upon initialisation
++(@samp{BZ2_bzCompressInit}), the stream
++is placed in the RUNNING state. Subsequent calls to
++@samp{BZ2_bzCompress} should pass
++@samp{BZ_RUN} as the requested action;
++other actions are illegal and will result in
++@samp{BZ_SEQUENCE_ERROR}.
++
++At some point, the calling program will have provided all
++the input data it wants to. It will then want to finish up -- in
++effect, asking the library to process any data it might have
++buffered internally. In this state,
++@samp{BZ2_bzCompress} will no longer
++attempt to read data from
++@samp{next_in}, but it will want to
++write data to @samp{next_out}. Because
++the output buffer supplied by the user can be arbitrarily small,
++the finishing-up operation cannot necessarily be done with a
++single call of
++@samp{BZ2_bzCompress}.
++
++Instead, the calling program passes
++@samp{BZ_FINISH} as an action to
++@samp{BZ2_bzCompress}. This changes
++the stream's state to FINISHING. Any remaining input (ie,
++@samp{next_in[0 .. avail_in-1]}) is
++compressed and transferred to the output buffer. To do this,
++@samp{BZ2_bzCompress} must be called
++repeatedly until all the output has been consumed. At that
++point, @samp{BZ2_bzCompress} returns
++@samp{BZ_STREAM_END}, and the stream's
++state is set back to IDLE.
++@samp{BZ2_bzCompressEnd} should then be
++called.
++
++Just to make sure the calling program does not cheat, the
++library makes a note of @samp{avail_in}
++at the time of the first call to
++@samp{BZ2_bzCompress} which has
++@samp{BZ_FINISH} as an action (ie, at
++the time the program has announced its intention to not supply
++any more input). By comparing this value with that of
++@samp{avail_in} over subsequent calls
++to @samp{BZ2_bzCompress}, the library
++can detect any attempts to slip in more data to compress. Any
++calls for which this is detected will return
++@samp{BZ_SEQUENCE_ERROR}. This
++indicates a programming mistake which should be corrected.
++
++Instead of asking to finish, the calling program may ask
++@samp{BZ2_bzCompress} to take all the
++remaining input, compress it and terminate the current
++(Burrows-Wheeler) compression block. This could be useful for
++error control purposes. The mechanism is analogous to that for
++finishing: call @samp{BZ2_bzCompress}
++with an action of @samp{BZ_FLUSH},
++remove output data, and persist with the
++@samp{BZ_FLUSH} action until the value
++@samp{BZ_RUN} is returned. As with
++finishing, @samp{BZ2_bzCompress}
++detects any attempt to provide more input data once the flush has
++begun.
++
++Once the flush is complete, the stream returns to the
++normal RUNNING state.
++
++This all sounds pretty complex, but isn't really. Here's a
++table which shows which actions are allowable in each state, what
++action will be taken, what the next state is, and what the
++non-error return values are. Note that you can't explicitly ask
++what state the stream is in, but nor do you need to -- it can be
++inferred from the values returned by
++@samp{BZ2_bzCompress}.
++
++@example
++
++IDLE/any
++ Illegal. IDLE state only exists after BZ2_bzCompressEnd or
++ before BZ2_bzCompressInit.
++ Return value = BZ_SEQUENCE_ERROR
++
++RUNNING/BZ_RUN
++ Compress from next_in to next_out as much as possible.
++ Next state = RUNNING
++ Return value = BZ_RUN_OK
++
++RUNNING/BZ_FLUSH
++ Remember current value of next_in. Compress from next_in
++ to next_out as much as possible, but do not accept any more input.
++ Next state = FLUSHING
++ Return value = BZ_FLUSH_OK
++
++RUNNING/BZ_FINISH
++ Remember current value of next_in. Compress from next_in
++ to next_out as much as possible, but do not accept any more input.
++ Next state = FINISHING
++ Return value = BZ_FINISH_OK
++
++FLUSHING/BZ_FLUSH
++ Compress from next_in to next_out as much as possible,
++ but do not accept any more input.
++ If all the existing input has been used up and all compressed
++ output has been removed
++ Next state = RUNNING; Return value = BZ_RUN_OK
++ else
++ Next state = FLUSHING; Return value = BZ_FLUSH_OK
++
++FLUSHING/other
++ Illegal.
++ Return value = BZ_SEQUENCE_ERROR
++
++FINISHING/BZ_FINISH
++ Compress from next_in to next_out as much as possible,
++ but to not accept any more input.
++ If all the existing input has been used up and all compressed
++ output has been removed
++ Next state = IDLE; Return value = BZ_STREAM_END
++ else
++ Next state = FINISHING; Return value = BZ_FINISHING
++
++FINISHING/other
++ Illegal.
++ Return value = BZ_SEQUENCE_ERROR
++@end example
++
++That still looks complicated? Well, fair enough. The
++usual sequence of calls for compressing a load of data is:
++
++@enumerate
++
++@item
++Get started with
++@samp{BZ2_bzCompressInit}.
++
++@item
++Shovel data in and shlurp out its compressed form
++using zero or more calls of
++@samp{BZ2_bzCompress} with action =
++@samp{BZ_RUN}.
++
++@item
++Finish up. Repeatedly call
++@samp{BZ2_bzCompress} with action =
++@samp{BZ_FINISH}, copying out the
++compressed output, until
++@samp{BZ_STREAM_END} is
++returned.
++
++@item
++Close up and go home. Call
++@samp{BZ2_bzCompressEnd}.
++@end enumerate
++
++If the data you want to compress fits into your input
++buffer all at once, you can skip the calls of
++@samp{BZ2_bzCompress ( ..., BZ_RUN )}
++and just do the @samp{BZ2_bzCompress ( ..., BZ_FINISH
++)} calls.
++
++All required memory is allocated by
++@samp{BZ2_bzCompressInit}. The
++compression library can accept any data at all (obviously). So
++you shouldn't get any error return values from the
++@samp{BZ2_bzCompress} calls. If you
++do, they will be
++@samp{BZ_SEQUENCE_ERROR}, and indicate
++a bug in your programming.
++
++Trivial other possible return values:
++
++@example
++
++BZ_PARAM_ERROR
++ if strm is NULL, or strm->s is NULL
++@end example
++
++@node BZ2_bzCompressEnd, BZ2_bzDecompressInit, BZ2_bzCompress, >Low-level interface
++@subsection BZ2_bzCompressEnd
++
++@example
++
++int BZ2_bzCompressEnd ( bz_stream *strm );
++@end example
++
++Releases all memory associated with a compression
++stream.
++
++Possible return values:
++
++@example
++
++BZ_PARAM_ERROR if strm is NULL or strm->s is NULL
++BZ_OK otherwise
++@end example
++
++@node BZ2_bzDecompressInit, BZ2_bzDecompress, BZ2_bzCompressEnd, >Low-level interface
++@subsection BZ2_bzDecompressInit
++
++@example
++
++int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small );
++@end example
++
++Prepares for decompression. As with
++@samp{BZ2_bzCompressInit}, a
++@samp{bz_stream} record should be
++allocated and initialised before the call. Fields
++@samp{bzalloc},
++@samp{bzfree} and
++@samp{opaque} should be set if a custom
++memory allocator is required, or made
++@samp{NULL} for the normal
++@samp{malloc} /
++@samp{free} routines. Upon return, the
++internal state will have been initialised, and
++@samp{total_in} and
++@samp{total_out} will be zero.
++
++For the meaning of parameter
++@samp{verbosity}, see
++@samp{BZ2_bzCompressInit}.
++
++If @samp{small} is nonzero, the
++library will use an alternative decompression algorithm which
++uses less memory but at the cost of decompressing more slowly
++(roughly speaking, half the speed, but the maximum memory
++requirement drops to around 2300k). See @ref{How to use bzip2,,How to use bzip2}.
++for more information on memory management.
++
++Note that the amount of memory needed to decompress a
++stream cannot be determined until the stream's header has been
++read, so even if
++@samp{BZ2_bzDecompressInit} succeeds, a
++subsequent @samp{BZ2_bzDecompress}
++could fail with
++@samp{BZ_MEM_ERROR}.
++
++Possible return values:
++
++@example
++
++BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++BZ_PARAM_ERROR
++ if ( small != 0 && small != 1 )
++ or (verbosity < 0 || verbosity > 4)
++BZ_MEM_ERROR
++ if insufficient memory is available
++@end example
++
++Allowable next actions:
++
++@example
++
++BZ2_bzDecompress
++ if BZ_OK was returned
++ no specific action required in case of error
++@end example
++
++@node BZ2_bzDecompress, BZ2_bzDecompressEnd, BZ2_bzDecompressInit, >Low-level interface
++@subsection BZ2_bzDecompress
++
++@example
++
++int BZ2_bzDecompress ( bz_stream *strm );
++@end example
++
++Provides more input and/out output buffer space for the
++library. The caller maintains input and output buffers, and uses
++@samp{BZ2_bzDecompress} to transfer
++data between them.
++
++Before each call to
++@samp{BZ2_bzDecompress},
++@samp{next_in} should point at the
++compressed data, and @samp{avail_in}
++should indicate how many bytes the library may read.
++@samp{BZ2_bzDecompress} updates
++@samp{next_in},
++@samp{avail_in} and
++@samp{total_in} to reflect the number
++of bytes it has read.
++
++Similarly, @samp{next_out} should
++point to a buffer in which the uncompressed output is to be
++placed, with @samp{avail_out}
++indicating how much output space is available.
++@samp{BZ2_bzCompress} updates
++@samp{next_out},
++@samp{avail_out} and
++@samp{total_out} to reflect the number
++of bytes output.
++
++You may provide and remove as little or as much data as you
++like on each call of
++@samp{BZ2_bzDecompress}. In the limit,
++it is acceptable to supply and remove data one byte at a time,
++although this would be terribly inefficient. You should always
++ensure that at least one byte of output space is available at
++each call.
++
++Use of @samp{BZ2_bzDecompress} is
++simpler than
++@samp{BZ2_bzCompress}.
++
++You should provide input and remove output as described
++above, and repeatedly call
++@samp{BZ2_bzDecompress} until
++@samp{BZ_STREAM_END} is returned.
++Appearance of @samp{BZ_STREAM_END}
++denotes that @samp{BZ2_bzDecompress}
++has detected the logical end of the compressed stream.
++@samp{BZ2_bzDecompress} will not
++produce @samp{BZ_STREAM_END} until all
++output data has been placed into the output buffer, so once
++@samp{BZ_STREAM_END} appears, you are
++guaranteed to have available all the decompressed output, and
++@samp{BZ2_bzDecompressEnd} can safely
++be called.
++
++If case of an error return value, you should call
++@samp{BZ2_bzDecompressEnd} to clean up
++and release memory.
++
++Possible return values:
++
++@example
++
++BZ_PARAM_ERROR
++ if strm is NULL or strm->s is NULL
++ or strm->avail_out < 1
++BZ_DATA_ERROR
++ if a data integrity error is detected in the compressed stream
++BZ_DATA_ERROR_MAGIC
++ if the compressed stream doesn't begin with the right magic bytes
++BZ_MEM_ERROR
++ if there wasn't enough memory available
++BZ_STREAM_END
++ if the logical end of the data stream was detected and all
++ output in has been consumed, eg s-->avail_out > 0
++BZ_OK
++ otherwise
++@end example
++
++Allowable next actions:
++
++@example
++
++BZ2_bzDecompress
++ if BZ_OK was returned
++BZ2_bzDecompressEnd
++ otherwise
++@end example
++
++@node BZ2_bzDecompressEnd, , BZ2_bzDecompress, >Low-level interface
++@subsection BZ2_bzDecompressEnd
++
++@example
++
++int BZ2_bzDecompressEnd ( bz_stream *strm );
++@end example
++
++Releases all memory associated with a decompression
++stream.
++
++Possible return values:
++
++@example
++
++BZ_PARAM_ERROR
++ if strm is NULL or strm->s is NULL
++BZ_OK
++ otherwise
++@end example
++
++Allowable next actions:
++
++@example
++
++ None.
++@end example
++
++@node High-level interface, Utility functions, >Low-level interface, Programming with libbzip2
++@section High-level interface
++
++This interface provides functions for reading and writing
++@samp{bzip2} format files. First, some
++general points.
++
++@itemize @bullet{}
++
++@item
++All of the functions take an
++@samp{int*} first argument,
++@samp{bzerror}. After each call,
++@samp{bzerror} should be consulted
++first to determine the outcome of the call. If
++@samp{bzerror} is
++@samp{BZ_OK}, the call completed
++successfully, and only then should the return value of the
++function (if any) be consulted. If
++@samp{bzerror} is
++@samp{BZ_IO_ERROR}, there was an
++error reading/writing the underlying compressed file, and you
++should then consult @samp{errno} /
++@samp{perror} to determine the cause
++of the difficulty. @samp{bzerror}
++may also be set to various other values; precise details are
++given on a per-function basis below.
++
++@item
++If @samp{bzerror} indicates
++an error (ie, anything except
++@samp{BZ_OK} and
++@samp{BZ_STREAM_END}), you should
++immediately call
++@samp{BZ2_bzReadClose} (or
++@samp{BZ2_bzWriteClose}, depending on
++whether you are attempting to read or to write) to free up all
++resources associated with the stream. Once an error has been
++indicated, behaviour of all calls except
++@samp{BZ2_bzReadClose}
++(@samp{BZ2_bzWriteClose}) is
++undefined. The implication is that (1)
++@samp{bzerror} should be checked
++after each call, and (2) if
++@samp{bzerror} indicates an error,
++@samp{BZ2_bzReadClose}
++(@samp{BZ2_bzWriteClose}) should then
++be called to clean up.
++
++@item
++The @samp{FILE*} arguments
++passed to @samp{BZ2_bzReadOpen} /
++@samp{BZ2_bzWriteOpen} should be set
++to binary mode. Most Unix systems will do this by default, but
++other platforms, including Windows and Mac, will not. If you
++omit this, you may encounter problems when moving code to new
++platforms.
++
++@item
++Memory allocation requests are handled by
++@samp{malloc} /
++@samp{free}. At present there is no
++facility for user-defined memory allocators in the file I/O
++functions (could easily be added, though).
++@end itemize
++
++@menu
++* BZ2_bzReadOpen::
++* BZ2_bzRead::
++* BZ2_bzReadGetUnused::
++* BZ2_bzReadClose::
++* BZ2_bzWriteOpen::
++* BZ2_bzWrite::
++* BZ2_bzWriteClose::
++* Handling embedded compressed data streams::
++* Standard file-reading/writing code::
++@end menu
++
++@node BZ2_bzReadOpen, BZ2_bzRead, , High-level interface
++@subsection BZ2_bzReadOpen
++
++@example
++
++typedef void BZFILE;
++
++BZFILE *BZ2_bzReadOpen( int *bzerror, FILE *f,
++ int verbosity, int small,
++ void *unused, int nUnused );
++@end example
++
++Prepare to read compressed data from file handle
++@samp{f}.
++@samp{f} should refer to a file which
++has been opened for reading, and for which the error indicator
++(@samp{ferror(f)})is not set. If
++@samp{small} is 1, the library will try
++to decompress using less memory, at the expense of speed.
++
++For reasons explained below,
++@samp{BZ2_bzRead} will decompress the
++@samp{nUnused} bytes starting at
++@samp{unused}, before starting to read
++from the file @samp{f}. At most
++@samp{BZ_MAX_UNUSED} bytes may be
++supplied like this. If this facility is not required, you should
++pass @samp{NULL} and
++@samp{0} for
++@samp{unused} and
++n@samp{Unused} respectively.
++
++For the meaning of parameters
++@samp{small} and
++@samp{verbosity}, see
++@samp{BZ2_bzDecompressInit}.
++
++The amount of memory needed to decompress a file cannot be
++determined until the file's header has been read. So it is
++possible that @samp{BZ2_bzReadOpen}
++returns @samp{BZ_OK} but a subsequent
++call of @samp{BZ2_bzRead} will return
++@samp{BZ_MEM_ERROR}.
++
++Possible assignments to
++@samp{bzerror}:
++
++@example
++
++BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++BZ_PARAM_ERROR
++ if f is NULL
++ or small is neither 0 nor 1
++ or ( unused == NULL && nUnused != 0 )
++ or ( unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED) )
++BZ_IO_ERROR
++ if ferror(f) is nonzero
++BZ_MEM_ERROR
++ if insufficient memory is available
++BZ_OK
++ otherwise.
++@end example
++
++Possible return values:
++
++@example
++
++Pointer to an abstract BZFILE
++ if bzerror is BZ_OK
++NULL
++ otherwise
++@end example
++
++Allowable next actions:
++
++@example
++
++BZ2_bzRead
++ if bzerror is BZ_OK
++BZ2_bzClose
++ otherwise
++@end example
++
++@node BZ2_bzRead, BZ2_bzReadGetUnused, BZ2_bzReadOpen, High-level interface
++@subsection BZ2_bzRead
++
++@example
++
++int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
++@end example
++
++Reads up to @samp{len}
++(uncompressed) bytes from the compressed file
++@samp{b} into the buffer
++@samp{buf}. If the read was
++successful, @samp{bzerror} is set to
++@samp{BZ_OK} and the number of bytes
++read is returned. If the logical end-of-stream was detected,
++@samp{bzerror} will be set to
++@samp{BZ_STREAM_END}, and the number of
++bytes read is returned. All other
++@samp{bzerror} values denote an
++error.
++
++@samp{BZ2_bzRead} will supply
++@samp{len} bytes, unless the logical
++stream end is detected or an error occurs. Because of this, it
++is possible to detect the stream end by observing when the number
++of bytes returned is less than the number requested.
++Nevertheless, this is regarded as inadvisable; you should instead
++check @samp{bzerror} after every call
++and watch out for
++@samp{BZ_STREAM_END}.
++
++Internally, @samp{BZ2_bzRead}
++copies data from the compressed file in chunks of size
++@samp{BZ_MAX_UNUSED} bytes before
++decompressing it. If the file contains more bytes than strictly
++needed to reach the logical end-of-stream,
++@samp{BZ2_bzRead} will almost certainly
++read some of the trailing data before signalling
++@samp{BZ_SEQUENCE_END}. To collect the
++read but unused data once
++@samp{BZ_SEQUENCE_END} has appeared,
++call @samp{BZ2_bzReadGetUnused}
++immediately before
++@samp{BZ2_bzReadClose}.
++
++Possible assignments to
++@samp{bzerror}:
++
++@example
++
++BZ_PARAM_ERROR
++ if b is NULL or buf is NULL or len < 0
++BZ_SEQUENCE_ERROR
++ if b was opened with BZ2_bzWriteOpen
++BZ_IO_ERROR
++ if there is an error reading from the compressed file
++BZ_UNEXPECTED_EOF
++ if the compressed file ended before
++ the logical end-of-stream was detected
++BZ_DATA_ERROR
++ if a data integrity error was detected in the compressed stream
++BZ_DATA_ERROR_MAGIC
++ if the stream does not begin with the requisite header bytes
++ (ie, is not a bzip2 data file). This is really
++ a special case of BZ_DATA_ERROR.
++BZ_MEM_ERROR
++ if insufficient memory was available
++BZ_STREAM_END
++ if the logical end of stream was detected.
++BZ_OK
++ otherwise.
++@end example
++
++Possible return values:
++
++@example
++
++number of bytes read
++ if bzerror is BZ_OK or BZ_STREAM_END
++undefined
++ otherwise
++@end example
++
++Allowable next actions:
++
++@example
++
++collect data from buf, then BZ2_bzRead or BZ2_bzReadClose
++ if bzerror is BZ_OK
++collect data from buf, then BZ2_bzReadClose or BZ2_bzReadGetUnused
++ if bzerror is BZ_SEQUENCE_END
++BZ2_bzReadClose
++ otherwise
++@end example
++
++@node BZ2_bzReadGetUnused, BZ2_bzReadClose, BZ2_bzRead, High-level interface
++@subsection BZ2_bzReadGetUnused
++
++@example
++
++void BZ2_bzReadGetUnused( int* bzerror, BZFILE *b,
++ void** unused, int* nUnused );
++@end example
++
++Returns data which was read from the compressed file but
++was not needed to get to the logical end-of-stream.
++@samp{*unused} is set to the address of
++the data, and @samp{*nUnused} to the
++number of bytes. @samp{*nUnused} will
++be set to a value between @samp{0} and
++@samp{BZ_MAX_UNUSED} inclusive.
++
++This function may only be called once
++@samp{BZ2_bzRead} has signalled
++@samp{BZ_STREAM_END} but before
++@samp{BZ2_bzReadClose}.
++
++Possible assignments to
++@samp{bzerror}:
++
++@example
++
++BZ_PARAM_ERROR
++ if b is NULL
++ or unused is NULL or nUnused is NULL
++BZ_SEQUENCE_ERROR
++ if BZ_STREAM_END has not been signalled
++ or if b was opened with BZ2_bzWriteOpen
++BZ_OK
++ otherwise
++@end example
++
++Allowable next actions:
++
++@example
++
++BZ2_bzReadClose
++@end example
++
++@node BZ2_bzReadClose, BZ2_bzWriteOpen, BZ2_bzReadGetUnused, High-level interface
++@subsection BZ2_bzReadClose
++
++@example
++
++void BZ2_bzReadClose ( int *bzerror, BZFILE *b );
++@end example
++
++Releases all memory pertaining to the compressed file
++@samp{b}.
++@samp{BZ2_bzReadClose} does not call
++@samp{fclose} on the underlying file
++handle, so you should do that yourself if appropriate.
++@samp{BZ2_bzReadClose} should be called
++to clean up after all error situations.
++
++Possible assignments to
++@samp{bzerror}:
++
++@example
++
++BZ_SEQUENCE_ERROR
++ if b was opened with BZ2_bzOpenWrite
++BZ_OK
++ otherwise
++@end example
++
++Allowable next actions:
++
++@example
++
++none
++@end example
++
++@node BZ2_bzWriteOpen, BZ2_bzWrite, BZ2_bzReadClose, High-level interface
++@subsection BZ2_bzWriteOpen
++
++@example
++
++BZFILE *BZ2_bzWriteOpen( int *bzerror, FILE *f,
++ int blockSize100k, int verbosity,
++ int workFactor );
++@end example
++
++Prepare to write compressed data to file handle
++@samp{f}.
++@samp{f} should refer to a file which
++has been opened for writing, and for which the error indicator
++(@samp{ferror(f)})is not set.
++
++For the meaning of parameters
++@samp{blockSize100k},
++@samp{verbosity} and
++@samp{workFactor}, see
++@samp{BZ2_bzCompressInit}.
++
++All required memory is allocated at this stage, so if the
++call completes successfully,
++@samp{BZ_MEM_ERROR} cannot be signalled
++by a subsequent call to
++@samp{BZ2_bzWrite}.
++
++Possible assignments to
++@samp{bzerror}:
++
++@example
++
++BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++BZ_PARAM_ERROR
++ if f is NULL
++ or blockSize100k < 1 or blockSize100k > 9
++BZ_IO_ERROR
++ if ferror(f) is nonzero
++BZ_MEM_ERROR
++ if insufficient memory is available
++BZ_OK
++ otherwise
++@end example
++
++Possible return values:
++
++@example
++
++Pointer to an abstract BZFILE
++ if bzerror is BZ_OK
++NULL
++ otherwise
++@end example
++
++Allowable next actions:
++
++@example
++
++BZ2_bzWrite
++ if bzerror is BZ_OK
++ (you could go directly to BZ2_bzWriteClose, but this would be pretty pointless)
++BZ2_bzWriteClose
++ otherwise
++@end example
++
++@node BZ2_bzWrite, BZ2_bzWriteClose, BZ2_bzWriteOpen, High-level interface
++@subsection BZ2_bzWrite
++
++@example
++
++void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
++@end example
++
++Absorbs @samp{len} bytes from the
++buffer @samp{buf}, eventually to be
++compressed and written to the file.
++
++Possible assignments to
++@samp{bzerror}:
++
++@example
++
++BZ_PARAM_ERROR
++ if b is NULL or buf is NULL or len < 0
++BZ_SEQUENCE_ERROR
++ if b was opened with BZ2_bzReadOpen
++BZ_IO_ERROR
++ if there is an error writing the compressed file.
++BZ_OK
++ otherwise
++@end example
++
++@node BZ2_bzWriteClose, Handling embedded compressed data streams, BZ2_bzWrite, High-level interface
++@subsection BZ2_bzWriteClose
++
++@example
++
++void BZ2_bzWriteClose( int *bzerror, BZFILE* f,
++ int abandon,
++ unsigned int* nbytes_in,
++ unsigned int* nbytes_out );
++
++void BZ2_bzWriteClose64( int *bzerror, BZFILE* f,
++ int abandon,
++ unsigned int* nbytes_in_lo32,
++ unsigned int* nbytes_in_hi32,
++ unsigned int* nbytes_out_lo32,
++ unsigned int* nbytes_out_hi32 );
++@end example
++
++Compresses and flushes to the compressed file all data so
++far supplied by @samp{BZ2_bzWrite}.
++The logical end-of-stream markers are also written, so subsequent
++calls to @samp{BZ2_bzWrite} are
++illegal. All memory associated with the compressed file
++@samp{b} is released.
++@samp{fflush} is called on the
++compressed file, but it is not
++@samp{fclose}'d.
++
++If @samp{BZ2_bzWriteClose} is
++called to clean up after an error, the only action is to release
++the memory. The library records the error codes issued by
++previous calls, so this situation will be detected automatically.
++There is no attempt to complete the compression operation, nor to
++@samp{fflush} the compressed file. You
++can force this behaviour to happen even in the case of no error,
++by passing a nonzero value to
++@samp{abandon}.
++
++If @samp{nbytes_in} is non-null,
++@samp{*nbytes_in} will be set to be the
++total volume of uncompressed data handled. Similarly,
++@samp{nbytes_out} will be set to the
++total volume of compressed data written. For compatibility with
++older versions of the library,
++@samp{BZ2_bzWriteClose} only yields the
++lower 32 bits of these counts. Use
++@samp{BZ2_bzWriteClose64} if you want
++the full 64 bit counts. These two functions are otherwise
++absolutely identical.
++
++Possible assignments to
++@samp{bzerror}:
++
++@example
++
++BZ_SEQUENCE_ERROR
++ if b was opened with BZ2_bzReadOpen
++BZ_IO_ERROR
++ if there is an error writing the compressed file
++BZ_OK
++ otherwise
++@end example
++
++@node Handling embedded compressed data streams, Standard file-reading/writing code, BZ2_bzWriteClose, High-level interface
++@subsection Handling embedded compressed data streams
++
++The high-level library facilitates use of
++@samp{bzip2} data streams which form
++some part of a surrounding, larger data stream.
++
++@itemize @bullet{}
++
++@item
++For writing, the library takes an open file handle,
++writes compressed data to it,
++@samp{fflush}es it but does not
++@samp{fclose} it. The calling
++application can write its own data before and after the
++compressed data stream, using that same file handle.
++
++@item
++Reading is more complex, and the facilities are not as
++general as they could be since generality is hard to reconcile
++with efficiency. @samp{BZ2_bzRead}
++reads from the compressed file in blocks of size
++@samp{BZ_MAX_UNUSED} bytes, and in
++doing so probably will overshoot the logical end of compressed
++stream. To recover this data once decompression has ended,
++call @samp{BZ2_bzReadGetUnused} after
++the last call of @samp{BZ2_bzRead}
++(the one returning
++@samp{BZ_STREAM_END}) but before
++calling
++@samp{BZ2_bzReadClose}.
++@end itemize
++
++This mechanism makes it easy to decompress multiple
++@samp{bzip2} streams placed end-to-end.
++As the end of one stream, when
++@samp{BZ2_bzRead} returns
++@samp{BZ_STREAM_END}, call
++@samp{BZ2_bzReadGetUnused} to collect
++the unused data (copy it into your own buffer somewhere). That
++data forms the start of the next compressed stream. To start
++uncompressing that next stream, call
++@samp{BZ2_bzReadOpen} again, feeding in
++the unused data via the @samp{unused} /
++@samp{nUnused} parameters. Keep doing
++this until @samp{BZ_STREAM_END} return
++coincides with the physical end of file
++(@samp{feof(f)}). In this situation
++@samp{BZ2_bzReadGetUnused} will of
++course return no data.
++
++This should give some feel for how the high-level interface
++can be used. If you require extra flexibility, you'll have to
++bite the bullet and get to grips with the low-level
++interface.
++
++@node Standard file-reading/writing code, , Handling embedded compressed data streams, High-level interface
++@subsection Standard file-reading/writing code
++
++Here's how you'd write data to a compressed file:
++
++@example
++
++FILE* f;
++BZFILE* b;
++int nBuf;
++char buf[ /* whatever size you like */ ];
++int bzerror;
++int nWritten;
++
++f = fopen ( "myfile.bz2", "w" );
++if ( !f ) @{
++ /* handle error */
++@}
++b = BZ2_bzWriteOpen( &bzerror, f, 9 );
++if (bzerror != BZ_OK) @{
++ BZ2_bzWriteClose ( b );
++ /* handle error */
++@}
++
++while ( /* condition */ ) @{
++ /* get data to write into buf, and set nBuf appropriately */
++ nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf );
++ if (bzerror == BZ_IO_ERROR) @{
++ BZ2_bzWriteClose ( &bzerror, b );
++ /* handle error */
++ @}
++@}
++
++BZ2_bzWriteClose( &bzerror, b );
++if (bzerror == BZ_IO_ERROR) @{
++ /* handle error */
++@}
++@end example
++
++And to read from a compressed file:
++
++@example
++
++FILE* f;
++BZFILE* b;
++int nBuf;
++char buf[ /* whatever size you like */ ];
++int bzerror;
++int nWritten;
++
++f = fopen ( "myfile.bz2", "r" );
++if ( !f ) @{
++ /* handle error */
++@}
++b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 );
++if ( bzerror != BZ_OK ) @{
++ BZ2_bzReadClose ( &bzerror, b );
++ /* handle error */
++@}
++
++bzerror = BZ_OK;
++while ( bzerror == BZ_OK && /* arbitrary other conditions */) @{
++ nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ );
++ if ( bzerror == BZ_OK ) @{
++ /* do something with buf[0 .. nBuf-1] */
++ @}
++@}
++if ( bzerror != BZ_STREAM_END ) @{
++ BZ2_bzReadClose ( &bzerror, b );
++ /* handle error */
++@} else @{
++ BZ2_bzReadClose ( &bzerror );
++@}
++@end example
++
++@node Utility functions, zlib compatibility functions, High-level interface, Programming with libbzip2
++@section Utility functions
++
++@menu
++* BZ2_bzBuffToBuffCompress::
++* BZ2_bzBuffToBuffDecompress::
++@end menu
++
++@node BZ2_bzBuffToBuffCompress, BZ2_bzBuffToBuffDecompress, , Utility functions
++@subsection BZ2_bzBuffToBuffCompress
++
++@example
++
++int BZ2_bzBuffToBuffCompress( char* dest,
++ unsigned int* destLen,
++ char* source,
++ unsigned int sourceLen,
++ int blockSize100k,
++ int verbosity,
++ int workFactor );
++@end example
++
++Attempts to compress the data in @samp{source[0
++.. sourceLen-1]} into the destination buffer,
++@samp{dest[0 .. *destLen-1]}. If the
++destination buffer is big enough,
++@samp{*destLen} is set to the size of
++the compressed data, and @samp{BZ_OK}
++is returned. If the compressed data won't fit,
++@samp{*destLen} is unchanged, and
++@samp{BZ_OUTBUFF_FULL} is
++returned.
++
++Compression in this manner is a one-shot event, done with a
++single call to this function. The resulting compressed data is a
++complete @samp{bzip2} format data
++stream. There is no mechanism for making additional calls to
++provide extra input data. If you want that kind of mechanism,
++use the low-level interface.
++
++For the meaning of parameters
++@samp{blockSize100k},
++@samp{verbosity} and
++@samp{workFactor}, see
++@samp{BZ2_bzCompressInit}.
++
++To guarantee that the compressed data will fit in its
++buffer, allocate an output buffer of size 1% larger than the
++uncompressed data, plus six hundred extra bytes.
++
++@samp{BZ2_bzBuffToBuffDecompress}
++will not write data at or beyond
++@samp{dest[*destLen]}, even in case of
++buffer overflow.
++
++Possible return values:
++
++@example
++
++BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++BZ_PARAM_ERROR
++ if dest is NULL or destLen is NULL
++ or blockSize100k < 1 or blockSize100k > 9
++ or verbosity < 0 or verbosity > 4
++ or workFactor < 0 or workFactor > 250
++BZ_MEM_ERROR
++ if insufficient memory is available
++BZ_OUTBUFF_FULL
++ if the size of the compressed data exceeds *destLen
++BZ_OK
++ otherwise
++@end example
++
++@node BZ2_bzBuffToBuffDecompress, , BZ2_bzBuffToBuffCompress, Utility functions
++@subsection BZ2_bzBuffToBuffDecompress
++
++@example
++
++int BZ2_bzBuffToBuffDecompress( char* dest,
++ unsigned int* destLen,
++ char* source,
++ unsigned int sourceLen,
++ int small,
++ int verbosity );
++@end example
++
++Attempts to decompress the data in @samp{source[0
++.. sourceLen-1]} into the destination buffer,
++@samp{dest[0 .. *destLen-1]}. If the
++destination buffer is big enough,
++@samp{*destLen} is set to the size of
++the uncompressed data, and @samp{BZ_OK}
++is returned. If the compressed data won't fit,
++@samp{*destLen} is unchanged, and
++@samp{BZ_OUTBUFF_FULL} is
++returned.
++
++@samp{source} is assumed to hold
++a complete @samp{bzip2} format data
++stream.
++@samp{BZ2_bzBuffToBuffDecompress} tries
++to decompress the entirety of the stream into the output
++buffer.
++
++For the meaning of parameters
++@samp{small} and
++@samp{verbosity}, see
++@samp{BZ2_bzDecompressInit}.
++
++Because the compression ratio of the compressed data cannot
++be known in advance, there is no easy way to guarantee that the
++output buffer will be big enough. You may of course make
++arrangements in your code to record the size of the uncompressed
++data, but such a mechanism is beyond the scope of this
++library.
++
++@samp{BZ2_bzBuffToBuffDecompress}
++will not write data at or beyond
++@samp{dest[*destLen]}, even in case of
++buffer overflow.
++
++Possible return values:
++
++@example
++
++BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++BZ_PARAM_ERROR
++ if dest is NULL or destLen is NULL
++ or small != 0 && small != 1
++ or verbosity < 0 or verbosity > 4
++BZ_MEM_ERROR
++ if insufficient memory is available
++BZ_OUTBUFF_FULL
++ if the size of the compressed data exceeds *destLen
++BZ_DATA_ERROR
++ if a data integrity error was detected in the compressed data
++BZ_DATA_ERROR_MAGIC
++ if the compressed data doesn't begin with the right magic bytes
++BZ_UNEXPECTED_EOF
++ if the compressed data ends unexpectedly
++BZ_OK
++ otherwise
++@end example
++
++@node zlib compatibility functions, Using the library in a stdio-free environment, Utility functions, Programming with libbzip2
++@section zlib compatibility functions
++
++Yoshioka Tsuneo has contributed some functions to give
++better @samp{zlib} compatibility.
++These functions are @samp{BZ2_bzopen},
++@samp{BZ2_bzread},
++@samp{BZ2_bzwrite},
++@samp{BZ2_bzflush},
++@samp{BZ2_bzclose},
++@samp{BZ2_bzerror} and
++@samp{BZ2_bzlibVersion}. These
++functions are not (yet) officially part of the library. If they
++break, you get to keep all the pieces. Nevertheless, I think
++they work ok.
++
++@example
++
++typedef void BZFILE;
++
++const char * BZ2_bzlibVersion ( void );
++@end example
++
++Returns a string indicating the library version.
++
++@example
++
++BZFILE * BZ2_bzopen ( const char *path, const char *mode );
++BZFILE * BZ2_bzdopen ( int fd, const char *mode );
++@end example
++
++Opens a @samp{.bz2} file for
++reading or writing, using either its name or a pre-existing file
++descriptor. Analogous to @samp{fopen}
++and @samp{fdopen}.
++
++@example
++
++int BZ2_bzread ( BZFILE* b, void* buf, int len );
++int BZ2_bzwrite ( BZFILE* b, void* buf, int len );
++@end example
++
++Reads/writes data from/to a previously opened
++@samp{BZFILE}. Analogous to
++@samp{fread} and
++@samp{fwrite}.
++
++@example
++
++int BZ2_bzflush ( BZFILE* b );
++void BZ2_bzclose ( BZFILE* b );
++@end example
++
++Flushes/closes a @samp{BZFILE}.
++@samp{BZ2_bzflush} doesn't actually do
++anything. Analogous to @samp{fflush}
++and @samp{fclose}.
++
++@example
++
++const char * BZ2_bzerror ( BZFILE *b, int *errnum )
++@end example
++
++Returns a string describing the more recent error status of
++@samp{b}, and also sets
++@samp{*errnum} to its numerical
++value.
++
++@node Using the library in a stdio-free environment, Making a Windows DLL, zlib compatibility functions, Programming with libbzip2
++@section Using the library in a stdio-free environment
++
++@menu
++* Getting rid of stdio::
++* Critical error handling::
++@end menu
++
++@node Getting rid of stdio, Critical error handling, , Using the library in a stdio-free environment
++@subsection Getting rid of stdio
++
++In a deeply embedded application, you might want to use
++just the memory-to-memory functions. You can do this
++conveniently by compiling the library with preprocessor symbol
++@samp{BZ_NO_STDIO} defined. Doing this
++gives you a library containing only the following eight
++functions:
++
++@samp{BZ2_bzCompressInit},
++@samp{BZ2_bzCompress},
++@samp{BZ2_bzCompressEnd}
++@samp{BZ2_bzDecompressInit},
++@samp{BZ2_bzDecompress},
++@samp{BZ2_bzDecompressEnd}
++@samp{BZ2_bzBuffToBuffCompress},
++@samp{BZ2_bzBuffToBuffDecompress}
++
++When compiled like this, all functions will ignore
++@samp{verbosity} settings.
++
++@node Critical error handling, , Getting rid of stdio, Using the library in a stdio-free environment
++@subsection Critical error handling
++
++@samp{libbzip2} contains a number
++of internal assertion checks which should, needless to say, never
++be activated. Nevertheless, if an assertion should fail,
++behaviour depends on whether or not the library was compiled with
++@samp{BZ_NO_STDIO} set.
++
++For a normal compile, an assertion failure yields the
++message:
++
++@quotation
++
++bzip2/libbzip2: internal error number N.
++
++This is a bug in bzip2/libbzip2, 1.0.3 of 15 February 2005.
++Please report it to me at: jseward@@bzip.org. If this happened
++when you were using some program which uses libbzip2 as a
++component, you should also report this bug to the author(s)
++of that program. Please make an effort to report this bug;
++timely and accurate bug reports eventually lead to higher
++quality software. Thanks. Julian Seward, 15 February 2005.
++@end quotation
++
++where @samp{N} is some error code
++number. If @samp{N == 1007}, it also
++prints some extra text advising the reader that unreliable memory
++is often associated with internal error 1007. (This is a
++frequently-observed-phenomenon with versions 1.0.0/1.0.1).
++
++@samp{exit(3)} is then
++called.
++
++For a @samp{stdio}-free library,
++assertion failures result in a call to a function declared
++as:
++
++@example
++
++extern void bz_internal_error ( int errcode );
++@end example
++
++The relevant code is passed as a parameter. You should
++supply such a function.
++
++In either case, once an assertion failure has occurred, any
++@samp{bz_stream} records involved can
++be regarded as invalid. You should not attempt to resume normal
++operation with them.
++
++You may, of course, change critical error handling to suit
++your needs. As I said above, critical errors indicate bugs in
++the library and should not occur. All "normal" error situations
++are indicated via error return codes from functions, and can be
++recovered from.
++
++@node Making a Windows DLL, , Using the library in a stdio-free environment, Programming with libbzip2
++@section Making a Windows DLL
++
++Everything related to Windows has been contributed by
++Yoshioka Tsuneo
++(@samp{QWF00133@@niftyserve.or.jp} /
++@samp{tsuneo-y@@is.aist-nara.ac.jp}), so
++you should send your queries to him (but perhaps Cc: me,
++@samp{jseward@@bzip.org}).
++
++My vague understanding of what to do is: using Visual C++
++5.0, open the project file
++@samp{libbz2.dsp}, and build. That's
++all.
++
++If you can't open the project file for some reason, make a
++new one, naming these files:
++@samp{blocksort.c},
++@samp{bzlib.c},
++@samp{compress.c},
++@samp{crctable.c},
++@samp{decompress.c},
++@samp{huffman.c},
++@samp{randtable.c} and
++@samp{libbz2.def}. You will also need
++to name the header files @samp{bzlib.h}
++and @samp{bzlib_private.h}.
++
++If you don't use VC++, you may need to define the
++proprocessor symbol
++@samp{_WIN32}.
++
++Finally, @samp{dlltest.c} is a
++sample program using the DLL. It has a project file,
++@samp{dlltest.dsp}.
++
++If you just want a makefile for Visual C, have a look at
++@samp{makefile.msc}.
++
++Be aware that if you compile
++@samp{bzip2} itself on Win32, you must
++set @samp{BZ_UNIX} to 0 and
++@samp{BZ_LCCWIN32} to 1, in the file
++@samp{bzip2.c}, before compiling.
++Otherwise the resulting binary won't work correctly.
++
++I haven't tried any of this stuff myself, but it all looks
++plausible.
++
++@node Miscellanea, , Programming with libbzip2, Top
++@chapter Miscellanea
++
++These are just some random thoughts of mine. Your mileage
++may vary.
++
++@menu
++* Limitations of the compressed file format::
++* Portability issues::
++* Reporting bugs::
++* Did you get the right package?::
++* Further Reading::
++@end menu
++
++@node Limitations of the compressed file format, Portability issues, , Miscellanea
++@section Limitations of the compressed file format
++
++@samp{bzip2-1.0.X},
++@samp{0.9.5} and
++@samp{0.9.0} use exactly the same file
++format as the original version,
++@samp{bzip2-0.1}. This decision was
++made in the interests of stability. Creating yet another
++incompatible compressed file format would create further
++confusion and disruption for users.
++
++Nevertheless, this is not a painless decision. Development
++work since the release of
++@samp{bzip2-0.1} in August 1997 has
++shown complexities in the file format which slow down
++decompression and, in retrospect, are unnecessary. These
++are:
++
++@itemize @bullet{}
++
++@item
++The run-length encoder, which is the first of the
++compression transformations, is entirely irrelevant. The
++original purpose was to protect the sorting algorithm from the
++very worst case input: a string of repeated symbols. But
++algorithm steps Q6a and Q6b in the original Burrows-Wheeler
++technical report (SRC-124) show how repeats can be handled
++without difficulty in block sorting.
++
++@item
++The randomisation mechanism doesn't really need to be
++there. Udi Manber and Gene Myers published a suffix array
++construction algorithm a few years back, which can be employed
++to sort any block, no matter how repetitive, in O(N log N)
++time. Subsequent work by Kunihiko Sadakane has produced a
++derivative O(N (log N)^2) algorithm which usually outperforms
++the Manber-Myers algorithm.
++
++I could have changed to Sadakane's algorithm, but I find
++it to be slower than @samp{bzip2}'s
++existing algorithm for most inputs, and the randomisation
++mechanism protects adequately against bad cases. I didn't
++think it was a good tradeoff to make. Partly this is due to
++the fact that I was not flooded with email complaints about
++@samp{bzip2-0.1}'s performance on
++repetitive data, so perhaps it isn't a problem for real
++inputs.
++
++Probably the best long-term solution, and the one I have
++incorporated into 0.9.5 and above, is to use the existing
++sorting algorithm initially, and fall back to a O(N (log N)^2)
++algorithm if the standard algorithm gets into
++difficulties.
++
++@item
++The compressed file format was never designed to be
++handled by a library, and I have had to jump though some hoops
++to produce an efficient implementation of decompression. It's
++a bit hairy. Try passing
++@samp{decompress.c} through the C
++preprocessor and you'll see what I mean. Much of this
++complexity could have been avoided if the compressed size of
++each block of data was recorded in the data stream.
++
++@item
++An Adler-32 checksum, rather than a CRC32 checksum,
++would be faster to compute.
++@end itemize
++
++It would be fair to say that the
++@samp{bzip2} format was frozen before I
++properly and fully understood the performance consequences of
++doing so.
++
++Improvements which I was able to incorporate into 0.9.0,
++despite using the same file format, are:
++
++@itemize @bullet{}
++
++@item
++Single array implementation of the inverse BWT. This
++significantly speeds up decompression, presumably because it
++reduces the number of cache misses.
++
++@item
++Faster inverse MTF transform for large MTF values.
++The new implementation is based on the notion of sliding blocks
++of values.
++
++@item
++@samp{bzip2-0.9.0} now reads
++and writes files with @samp{fread}
++and @samp{fwrite}; version 0.1 used
++@samp{putc} and
++@samp{getc}. Duh! Well, you live
++and learn.
++@end itemize
++
++Further ahead, it would be nice to be able to do random
++access into files. This will require some careful design of
++compressed file formats.
++
++@node Portability issues, Reporting bugs, Limitations of the compressed file format, Miscellanea
++@section Portability issues
++
++After some consideration, I have decided not to use GNU
++@samp{autoconf} to configure 0.9.5 or
++1.0.
++
++@samp{autoconf}, admirable and
++wonderful though it is, mainly assists with portability problems
++between Unix-like platforms. But
++@samp{bzip2} doesn't have much in the
++way of portability problems on Unix; most of the difficulties
++appear when porting to the Mac, or to Microsoft's operating
++systems. @samp{autoconf} doesn't help
++in those cases, and brings in a whole load of new
++complexity.
++
++Most people should be able to compile the library and
++program under Unix straight out-of-the-box, so to speak,
++especially if you have a version of GNU C available.
++
++There are a couple of
++@samp{__inline__} directives in the
++code. GNU C (@samp{gcc}) should be
++able to handle them. If you're not using GNU C, your C compiler
++shouldn't see them at all. If your compiler does, for some
++reason, see them and doesn't like them, just
++@samp{#define}
++@samp{__inline__} to be
++@samp{/* */}. One easy way to do this
++is to compile with the flag
++@samp{-D__inline__=}, which should be
++understood by most Unix compilers.
++
++If you still have difficulties, try compiling with the
++macro @samp{BZ_STRICT_ANSI} defined.
++This should enable you to build the library in a strictly ANSI
++compliant environment. Building the program itself like this is
++dangerous and not supported, since you remove
++@samp{bzip2}'s checks against
++compressing directories, symbolic links, devices, and other
++not-really-a-file entities. This could cause filesystem
++corruption!
++
++One other thing: if you create a
++@samp{bzip2} binary for public distribution,
++please consider linking it statically (@samp{gcc
++-static}). This avoids all sorts of library-version
++issues that others may encounter later on.
++
++If you build @samp{bzip2} on
++Win32, you must set @samp{BZ_UNIX} to 0
++and @samp{BZ_LCCWIN32} to 1, in the
++file @samp{bzip2.c}, before compiling.
++Otherwise the resulting binary won't work correctly.
++
++@node Reporting bugs, Did you get the right package?, Portability issues, Miscellanea
++@section Reporting bugs
++
++I tried pretty hard to make sure
++@samp{bzip2} is bug free, both by
++design and by testing. Hopefully you'll never need to read this
++section for real.
++
++Nevertheless, if @samp{bzip2} dies
++with a segmentation fault, a bus error or an internal assertion
++failure, it will ask you to email me a bug report. Experience from
++years of feedback of bzip2 users indicates that almost all these
++problems can be traced to either compiler bugs or hardware
++problems.
++
++@itemize @bullet{}
++
++@item
++Recompile the program with no optimisation, and
++see if it works. And/or try a different compiler. I heard all
++sorts of stories about various flavours of GNU C (and other
++compilers) generating bad code for
++@samp{bzip2}, and I've run across two
++such examples myself.
++
++2.7.X versions of GNU C are known to generate bad code
++from time to time, at high optimisation levels. If you get
++problems, try using the flags
++@samp{-O2}
++@samp{-fomit-frame-pointer}
++@samp{-fno-strength-reduce}. You
++should specifically @i{not} use
++@samp{-funroll-loops}.
++
++You may notice that the Makefile runs six tests as part
++of the build process. If the program passes all of these, it's
++a pretty good (but not 100%) indication that the compiler has
++done its job correctly.
++
++@item
++If @samp{bzip2}
++crashes randomly, and the crashes are not repeatable, you may
++have a flaky memory subsystem.
++@samp{bzip2} really hammers your
++memory hierarchy, and if it's a bit marginal, you may get these
++problems. Ditto if your disk or I/O subsystem is slowly
++failing. Yup, this really does happen.
++
++Try using a different machine of the same type, and see
++if you can repeat the problem.
++
++@item
++This isn't really a bug, but ... If
++@samp{bzip2} tells you your file is
++corrupted on decompression, and you obtained the file via FTP,
++there is a possibility that you forgot to tell FTP to do a
++binary mode transfer. That absolutely will cause the file to
++be non-decompressible. You'll have to transfer it
++again.
++@end itemize
++
++If you've incorporated
++@samp{libbzip2} into your own program
++and are getting problems, please, please, please, check that the
++parameters you are passing in calls to the library, are correct,
++and in accordance with what the documentation says is allowable.
++I have tried to make the library robust against such problems,
++but I'm sure I haven't succeeded.
++
++Finally, if the above comments don't help, you'll have to
++send me a bug report. Now, it's just amazing how many people
++will send me a bug report saying something like:
++
++@example
++
++bzip2 crashed with segmentation fault on my machine
++@end example
++
++and absolutely nothing else. Needless to say, a such a
++report is @i{totally, utterly, completely and
++comprehensively 100% useless; a waste of your time, my time, and
++net bandwidth}. With no details at all, there's no way
++I can possibly begin to figure out what the problem is.
++
++The rules of the game are: facts, facts, facts. Don't omit
++them because "oh, they won't be relevant". At the bare
++minimum:
++
++@example
++
++Machine type. Operating system version.
++Exact version of bzip2 (do bzip2 -V).
++Exact version of the compiler used.
++Flags passed to the compiler.
++@end example
++
++However, the most important single thing that will help me
++is the file that you were trying to compress or decompress at the
++time the problem happened. Without that, my ability to do
++anything more than speculate about the cause, is limited.
++
++@node Did you get the right package?, Further Reading, Reporting bugs, Miscellanea
++@section Did you get the right package?
++
++@samp{bzip2} is a resource hog.
++It soaks up large amounts of CPU cycles and memory. Also, it
++gives very large latencies. In the worst case, you can feed many
++megabytes of uncompressed data into the library before getting
++any compressed output, so this probably rules out applications
++requiring interactive behaviour.
++
++These aren't faults of my implementation, I hope, but more
++an intrinsic property of the Burrows-Wheeler transform
++(unfortunately). Maybe this isn't what you want.
++
++If you want a compressor and/or library which is faster,
++uses less memory but gets pretty good compression, and has
++minimal latency, consider Jean-loup Gailly's and Mark Adler's
++work, @samp{zlib-1.2.1} and
++@samp{gzip-1.2.4}. Look for them at
++@uref{http://www.zlib.org,http://www.zlib.org} and
++@uref{http://www.gzip.org,http://www.gzip.org}
++respectively.
++
++For something faster and lighter still, you might try Markus F
++X J Oberhumer's @samp{LZO} real-time
++compression/decompression library, at
++@uref{http://www.oberhumer.com/opensource,http://www.oberhumer.com/opensource}.
++
++@node Further Reading, , Did you get the right package?, Miscellanea
++@section Further Reading
++
++@samp{bzip2} is not research
++work, in the sense that it doesn't present any new ideas.
++Rather, it's an engineering exercise based on existing
++ideas.
++
++Four documents describe essentially all the ideas behind
++@samp{bzip2}:
++
++@display
++Michael Burrows and D. J. Wheeler:
++ "A block-sorting lossless data compression algorithm"
++ 10th May 1994.
++ Digital SRC Research Report 124.
++ ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz
++ If you have trouble finding it, try searching at the
++ New Zealand Digital Library, http://www.nzdl.org.
++
++Daniel S. Hirschberg and Debra A. LeLewer
++ "Efficient Decoding of Prefix Codes"
++ Communications of the ACM, April 1990, Vol 33, Number 4.
++ You might be able to get an electronic copy of this
++ from the ACM Digital Library.
++
++David J. Wheeler
++ Program bred3.c and accompanying document bred3.ps.
++ This contains the idea behind the multi-table Huffman coding scheme.
++ ftp://ftp.cl.cam.ac.uk/users/djw3/
++
++Jon L. Bentley and Robert Sedgewick
++ "Fast Algorithms for Sorting and Searching Strings"
++ Available from Sedgewick's web page,
++ www.cs.princeton.edu/~rs
++@end display
++
++The following paper gives valuable additional insights into
++the algorithm, but is not immediately the basis of any code used
++in bzip2.
++
++@display
++Peter Fenwick:
++ Block Sorting Text Compression
++ Proceedings of the 19th Australasian Computer Science Conference,
++ Melbourne, Australia. Jan 31 - Feb 2, 1996.
++ ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps
++@end display
++
++Kunihiko Sadakane's sorting algorithm, mentioned above, is
++available from:
++
++@display
++http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz
++@end display
++
++The Manber-Myers suffix array construction algorithm is
++described in a paper available from:
++
++@display
++http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
++@end display
++
++Finally, the following papers document some
++investigations I made into the performance of sorting
++and decompression algorithms:
++
++@display
++Julian Seward
++ On the Performance of BWT Sorting Algorithms
++ Proceedings of the IEEE Data Compression Conference 2000
++ Snowbird, Utah. 28-30 March 2000.
++
++Julian Seward
++ Space-time Tradeoffs in the Inverse B-W Transform
++ Proceedings of the IEEE Data Compression Conference 2001
++ Snowbird, Utah. 27-29 March 2001.
++@end display
++
++@bye
+--- a/bzexe.1 2011-12-04 13:55:53.589856334 +1100
++++ b/bzexe.1 2011-12-04 18:16:28.000000000 +1100
+@@ -0,0 +1,43 @@
++.TH BZEXE 1
++.SH NAME
++bzexe \- compress executable files in place
++.SH SYNOPSIS
++.B bzexe
++[ name ... ]
++.SH DESCRIPTION
++The
++.I bzexe
++utility allows you to compress executables in place and have them
++automatically uncompress and execute when you run them (at a penalty
++in performance). For example if you execute ``bzexe /bin/cat'' it
++will create the following two files:
++.nf
++.br
++ -r-xr-xr-x 1 root bin 9644 Feb 11 11:16 /bin/cat
++ -r-xr-xr-x 1 bin bin 24576 Nov 23 13:21 /bin/cat~
++.fi
++/bin/cat~ is the original file and /bin/cat is the self-uncompressing
++executable file. You can remove /bin/cat~ once you are sure that
++/bin/cat works properly.
++.PP
++This utility is most useful on systems with very small disks.
++.SH OPTIONS
++.TP
++.B \-d
++Decompress the given executables instead of compressing them.
++.SH "SEE ALSO"
++bzip2(1), znew(1), zmore(1), zcmp(1), zforce(1)
++.SH CAVEATS
++The compressed executable is a shell script. This may create some
++security holes. In particular, the compressed executable relies
++on the PATH environment variable to find
++.I gzip
++and some other utilities
++.I (tail, chmod, ln, sleep).
++.SH "BUGS"
++.I bzexe
++attempts to retain the original file attributes on the compressed executable,
++but you may have to fix them manually in some cases, using
++.I chmod
++or
++.I chown.
+--- a/manual.info 2011-12-04 13:55:53.589856334 +1100
++++ b/manual.info 2011-12-04 18:16:28.000000000 +1100
+@@ -0,0 +1,2338 @@
++This is manual.info, produced by makeinfo version 4.8 from manual.texi.
++
++START-INFO-DIR-ENTRY
++* Bzip2: (bzip2). A program and library for data compression.
++END-INFO-DIR-ENTRY
++
++
++File: manual.info, Node: Top, Next: Introduction, Up: (dir)
++
++bzip2 and libbzip2, version 1.0.3
++*********************************
++
++* Menu:
++
++* Introduction::
++* How to use bzip2::
++* Programming with libbzip2::
++* Miscellanea::
++
++--- The Detailed Node Listing ---
++
++How to use bzip2
++
++* NAME::
++* SYNOPSIS::
++* DESCRIPTION::
++* OPTIONS::
++* MEMORY MANAGEMENT::
++* RECOVERING DATA FROM DAMAGED FILES::
++* PERFORMANCE NOTES::
++* CAVEATS::
++* AUTHOR::
++
++ Programming with libbzip2
++
++* Top-level structure::
++* Error handling::
++* Low-level interface: >Low-level interface.
++* High-level interface::
++* Utility functions::
++* zlib compatibility functions::
++* Using the library in a stdio-free environment::
++* Making a Windows DLL::
++
++Miscellanea
++
++* Limitations of the compressed file format::
++* Portability issues::
++* Reporting bugs::
++* Did you get the right package?::
++* Further Reading::
++
++
++File: manual.info, Node: Introduction, Next: How to use bzip2, Prev: Top, Up: Top
++
++1 Introduction
++**************
++
++`bzip2' compresses files using the Burrows-Wheeler block-sorting text
++compression algorithm, and Huffman coding. Compression is generally
++considerably better than that achieved by more conventional
++LZ77/LZ78-based compressors, and approaches the performance of the PPM
++family of statistical compressors.
++
++ `bzip2' is built on top of `libbzip2', a flexible library for
++handling compressed data in the `bzip2' format. This manual describes
++both how to use the program and how to work with the library interface.
++Most of the manual is devoted to this library, not the program, which
++is good news if your interest is only in the program.
++
++ * *Note How to use bzip2: How to use bzip2. describes how to use
++ `bzip2'; this is the only part you need to read if you just want
++ to know how to operate the program.
++
++ * *Note Programming with libbzip2: Programming with libbzip2.
++ describes the programming interfaces in detail, and
++
++ * *Note Miscellanea: Miscellanea. records some miscellaneous notes
++ which I thought ought to be recorded somewhere.
++
++
++File: manual.info, Node: How to use bzip2, Next: Programming with libbzip2, Prev: Introduction, Up: Top
++
++2 How to use bzip2
++******************
++
++This chapter contains a copy of the `bzip2' man page, and nothing else.
++
++* Menu:
++
++* NAME::
++* SYNOPSIS::
++* DESCRIPTION::
++* OPTIONS::
++* MEMORY MANAGEMENT::
++* RECOVERING DATA FROM DAMAGED FILES::
++* PERFORMANCE NOTES::
++* CAVEATS::
++* AUTHOR::
++
++
++File: manual.info, Node: NAME, Next: SYNOPSIS, Up: How to use bzip2
++
++2.1 NAME
++========
++
++ * `bzip2', `bunzip2' - a block-sorting file compressor, v1.0.3
++
++ * `bzcat' - decompresses files to stdout
++
++ * `bzip2recover' - recovers data from damaged bzip2 files
++
++
++File: manual.info, Node: SYNOPSIS, Next: DESCRIPTION, Prev: NAME, Up: How to use bzip2
++
++2.2 SYNOPSIS
++============
++
++ * `bzip2' [ -cdfkqstvzVL123456789 ] [ filenames ... ]
++
++ * `bunzip2' [ -fkvsVL ] [ filenames ... ]
++
++ * `bzcat' [ -s ] [ filenames ... ]
++
++ * `bzip2recover' filename
++
++
++File: manual.info, Node: DESCRIPTION, Next: OPTIONS, Prev: SYNOPSIS, Up: How to use bzip2
++
++2.3 DESCRIPTION
++===============
++
++`bzip2' compresses files using the Burrows-Wheeler block sorting text
++compression algorithm, and Huffman coding. Compression is generally
++considerably better than that achieved by more conventional
++LZ77/LZ78-based compressors, and approaches the performance of the PPM
++family of statistical compressors.
++
++ The command-line options are deliberately very similar to those of
++GNU `gzip', but they are not identical.
++
++ `bzip2' expects a list of file names to accompany the command-line
++flags. Each file is replaced by a compressed version of itself, with
++the name `original_name.bz2'. Each compressed file has the same
++modification date, permissions, and, when possible, ownership as the
++corresponding original, so that these properties can be correctly
++restored at decompression time. File name handling is naive in the
++sense that there is no mechanism for preserving original file names,
++permissions, ownerships or dates in filesystems which lack these
++concepts, or have serious file name length restrictions, such as MS-DOS.
++
++ `bzip2' and `bunzip2' will by default not overwrite existing files.
++If you want this to happen, specify the `-f' flag.
++
++ If no file names are specified, `bzip2' compresses from standard
++input to standard output. In this case, `bzip2' will decline to write
++compressed output to a terminal, as this would be entirely
++incomprehensible and therefore pointless.
++
++ `bunzip2' (or `bzip2 -d') decompresses all specified files. Files
++which were not created by `bzip2' will be detected and ignored, and a
++warning issued. `bzip2' attempts to guess the filename for the
++decompressed file from that of the compressed file as follows:
++
++ * `filename.bz2 ' becomes `filename'
++
++ * `filename.bz ' becomes `filename'
++
++ * `filename.tbz2' becomes `filename.tar'
++
++ * `filename.tbz ' becomes `filename.tar'
++
++ * `anyothername ' becomes `anyothername.out'
++
++ If the file does not end in one of the recognised endings, `.bz2',
++`.bz', `.tbz2' or `.tbz', `bzip2' complains that it cannot guess the
++name of the original file, and uses the original name with `.out'
++appended.
++
++ As with compression, supplying no filenames causes decompression
++from standard input to standard output.
++
++ `bunzip2' will correctly decompress a file which is the
++concatenation of two or more compressed files. The result is the
++concatenation of the corresponding uncompressed files. Integrity testing
++(`-t') of concatenated compressed files is also supported.
++
++ You can also compress or decompress files to the standard output by
++giving the `-c' flag. Multiple files may be compressed and
++decompressed like this. The resulting outputs are fed sequentially to
++stdout. Compression of multiple files in this manner generates a stream
++containing multiple compressed file representations. Such a stream can
++be decompressed correctly only by `bzip2' version 0.9.0 or later.
++Earlier versions of `bzip2' will stop after decompressing the first
++file in the stream.
++
++ `bzcat' (or `bzip2 -dc') decompresses all specified files to the
++standard output.
++
++ `bzip2' will read arguments from the environment variables `BZIP2'
++and `BZIP', in that order, and will process them before any arguments
++read from the command line. This gives a convenient way to supply
++default arguments.
++
++ Compression is always performed, even if the compressed file is
++slightly larger than the original. Files of less than about one hundred
++bytes tend to get larger, since the compression mechanism has a
++constant overhead in the region of 50 bytes. Random data (including
++the output of most file compressors) is coded at about 8.05 bits per
++byte, giving an expansion of around 0.5%.
++
++ As a self-check for your protection, `bzip2' uses 32-bit CRCs to make
++sure that the decompressed version of a file is identical to the
++original. This guards against corruption of the compressed data, and
++against undetected bugs in `bzip2' (hopefully very unlikely). The
++chances of data corruption going undetected is microscopic, about one
++chance in four billion for each file processed. Be aware, though, that
++the check occurs upon decompression, so it can only tell you that
++something is wrong. It can't help you recover the original uncompressed
++data. You can use `bzip2recover' to try to recover data from damaged
++files.
++
++ Return values: 0 for a normal exit, 1 for environmental problems
++(file not found, invalid flags, I/O errors, etc.), 2 to indicate a
++corrupt compressed file, 3 for an internal consistency error (eg, bug)
++which caused `bzip2' to panic.
++
++
++File: manual.info, Node: OPTIONS, Next: MEMORY MANAGEMENT, Prev: DESCRIPTION, Up: How to use bzip2
++
++2.4 OPTIONS
++===========
++
++`-c --stdout'
++ Compress or decompress to standard output.
++
++`-d --decompress'
++ Force decompression. `bzip2', `bunzip2' and `bzcat' are really
++ the same program, and the decision about what actions to take is
++ done on the basis of which name is used. This flag overrides that
++ mechanism, and forces bzip2 to decompress.
++
++`-z --compress'
++ The complement to `-d': forces compression, regardless of the
++ invokation name.
++
++`-t --test'
++ Check integrity of the specified file(s), but don't decompress
++ them. This really performs a trial decompression and throws away
++ the result.
++
++`-f --force'
++ Force overwrite of output files. Normally, `bzip2' will not
++ overwrite existing output files. Also forces `bzip2' to break hard
++ links to files, which it otherwise wouldn't do.
++
++ `bzip2' normally declines to decompress files which don't have the
++ correct magic header bytes. If forced (`-f'), however, it will
++ pass such files through unmodified. This is how GNU `gzip' behaves.
++
++`-k --keep'
++ Keep (don't delete) input files during compression or
++ decompression.
++
++`-s --small'
++ Reduce memory usage, for compression, decompression and testing.
++ Files are decompressed and tested using a modified algorithm which
++ only requires 2.5 bytes per block byte. This means any file can be
++ decompressed in 2300k of memory, albeit at about half the normal
++ speed.
++
++ During compression, `-s' selects a block size of 200k, which
++ limits memory use to around the same figure, at the expense of
++ your compression ratio. In short, if your machine is low on memory
++ (8 megabytes or less), use `-s' for everything. See *Note MEMORY
++ MANAGEMENT: MEMORY MANAGEMENT. below.
++
++`-q --quiet'
++ Suppress non-essential warning messages. Messages pertaining to
++ I/O errors and other critical events will not be suppressed.
++
++`-v --verbose'
++ Verbose mode - show the compression ratio for each file processed.
++ Further `-v''s increase the verbosity level, spewing out lots of
++ information which is primarily of interest for diagnostic purposes.
++
++`-L --license -V --version'
++ Display the software version, license terms and conditions.
++
++`-1' (or `--fast') to `-9' (or `-best')
++ Set the block size to 100 k, 200 k ... 900 k when compressing. Has
++ no effect when decompressing. See *Note MEMORY MANAGEMENT: MEMORY
++ MANAGEMENT. below. The `--fast' and `--best' aliases are primarily
++ for GNU `gzip' compatibility. In particular, `--fast' doesn't
++ make things significantly faster. And `--best' merely selects the
++ default behaviour.
++
++`--'
++ Treats all subsequent arguments as file names, even if they start
++ with a dash. This is so you can handle files with names beginning
++ with a dash, for example: `bzip2 -- -myfilename'.
++
++`--repetitive-fast'
++`--repetitive-best'
++ These flags are redundant in versions 0.9.5 and above. They
++ provided some coarse control over the behaviour of the sorting
++ algorithm in earlier versions, which was sometimes useful. 0.9.5
++ and above have an improved algorithm which renders these flags
++ irrelevant.
++
++
++File: manual.info, Node: MEMORY MANAGEMENT, Next: RECOVERING DATA FROM DAMAGED FILES, Prev: OPTIONS, Up: How to use bzip2
++
++2.5 MEMORY MANAGEMENT
++=====================
++
++`bzip2' compresses large files in blocks. The block size affects both
++the compression ratio achieved, and the amount of memory needed for
++compression and decompression. The flags `-1' through `-9' specify the
++block size to be 100,000 bytes through 900,000 bytes (the default)
++respectively. At decompression time, the block size used for
++compression is read from the header of the compressed file, and
++`bunzip2' then allocates itself just enough memory to decompress the
++file. Since block sizes are stored in compressed files, it follows that
++the flags `-1' to `-9' are irrelevant to and so ignored during
++decompression.
++
++ Compression and decompression requirements, in bytes, can be
++estimated as:
++
++
++ Compression: 400k + ( 8 x block size )
++
++ Decompression: 100k + ( 4 x block size ), or
++ 100k + ( 2.5 x block size )
++
++ Larger block sizes give rapidly diminishing marginal returns. Most
++of the compression comes from the first two or three hundred k of block
++size, a fact worth bearing in mind when using `bzip2' on small machines.
++It is also important to appreciate that the decompression memory
++requirement is set at compression time by the choice of block size.
++
++ For files compressed with the default 900k block size, `bunzip2'
++will require about 3700 kbytes to decompress. To support decompression
++of any file on a 4 megabyte machine, `bunzip2' has an option to
++decompress using approximately half this amount of memory, about 2300
++kbytes. Decompression speed is also halved, so you should use this
++option only where necessary. The relevant flag is `-s'.
++
++ In general, try and use the largest block size memory constraints
++allow, since that maximises the compression achieved. Compression and
++decompression speed are virtually unaffected by block size.
++
++ Another significant point applies to files which fit in a single
++block - that means most files you'd encounter using a large block size.
++The amount of real memory touched is proportional to the size of the
++file, since the file is smaller than a block. For example, compressing
++a file 20,000 bytes long with the flag `-9' will cause the compressor
++to allocate around 7600k of memory, but only touch 400k + 20000 * 8 =
++560 kbytes of it. Similarly, the decompressor will allocate 3700k but
++only touch 100k + 20000 * 4 = 180 kbytes.
++
++ Here is a table which summarises the maximum memory usage for
++different block sizes. Also recorded is the total compressed size for
++14 files of the Calgary Text Compression Corpus totalling 3,141,622
++bytes. This column gives some feel for how compression varies with
++block size. These figures tend to understate the advantage of larger
++block sizes for larger files, since the Corpus is dominated by smaller
++files.
++
++
++ Compress Decompress Decompress Corpus
++ Flag usage usage -s usage Size
++
++ -1 1200k 500k 350k 914704
++ -2 2000k 900k 600k 877703
++ -3 2800k 1300k 850k 860338
++ -4 3600k 1700k 1100k 846899
++ -5 4400k 2100k 1350k 845160
++ -6 5200k 2500k 1600k 838626
++ -7 6100k 2900k 1850k 834096
++ -8 6800k 3300k 2100k 828642
++ -9 7600k 3700k 2350k 828642
++
++
++File: manual.info, Node: RECOVERING DATA FROM DAMAGED FILES, Next: PERFORMANCE NOTES, Prev: MEMORY MANAGEMENT, Up: How to use bzip2
++
++2.6 RECOVERING DATA FROM DAMAGED FILES
++======================================
++
++`bzip2' compresses files in blocks, usually 900kbytes long. Each block
++is handled independently. If a media or transmission error causes a
++multi-block `.bz2' file to become damaged, it may be possible to
++recover data from the undamaged blocks in the file.
++
++ The compressed representation of each block is delimited by a 48-bit
++pattern, which makes it possible to find the block boundaries with
++reasonable certainty. Each block also carries its own 32-bit CRC, so
++damaged blocks can be distinguished from undamaged ones.
++
++ `bzip2recover' is a simple program whose purpose is to search for
++blocks in `.bz2' files, and write each block out into its own `.bz2'
++file. You can then use `bzip2 -t' to test the integrity of the
++resulting files, and decompress those which are undamaged.
++
++ `bzip2recover' takes a single argument, the name of the damaged
++file, and writes a number of files `rec0001file.bz2',
++`rec0002file.bz2', etc, containing the extracted blocks. The output
++filenames are designed so that the use of wildcards in subsequent
++processing - for example, `bzip2 -dc rec*file.bz2 > recovered_data' -
++lists the files in the correct order.
++
++ `bzip2recover' should be of most use dealing with large `.bz2'
++files, as these will contain many blocks. It is clearly futile to use
++it on damaged single-block files, since a damaged block cannot be
++recovered. If you wish to minimise any potential data loss through
++media or transmission errors, you might consider compressing with a
++smaller block size.
++
++
++File: manual.info, Node: PERFORMANCE NOTES, Next: CAVEATS, Prev: RECOVERING DATA FROM DAMAGED FILES, Up: How to use bzip2
++
++2.7 PERFORMANCE NOTES
++=====================
++
++The sorting phase of compression gathers together similar strings in
++the file. Because of this, files containing very long runs of repeated
++symbols, like "aabaabaabaab ..." (repeated several hundred times) may
++compress more slowly than normal. Versions 0.9.5 and above fare much
++better than previous versions in this respect. The ratio between
++worst-case and average-case compression time is in the region of 10:1.
++For previous versions, this figure was more like 100:1. You can use the
++`-vvvv' option to monitor progress in great detail, if you want.
++
++ Decompression speed is unaffected by these phenomena.
++
++ `bzip2' usually allocates several megabytes of memory to operate in,
++and then charges all over it in a fairly random fashion. This means
++that performance, both for compressing and decompressing, is largely
++determined by the speed at which your machine can service cache misses.
++Because of this, small changes to the code to reduce the miss rate have
++been observed to give disproportionately large performance
++improvements. I imagine `bzip2' will perform best on machines with very
++large caches.
++
++
++File: manual.info, Node: CAVEATS, Next: AUTHOR, Prev: PERFORMANCE NOTES, Up: How to use bzip2
++
++2.8 CAVEATS
++===========
++
++I/O error messages are not as helpful as they could be. `bzip2' tries
++hard to detect I/O errors and exit cleanly, but the details of what the
++problem is sometimes seem rather misleading.
++
++ This manual page pertains to version 1.0.3 of `bzip2'. Compressed
++data created by this version is entirely forwards and backwards
++compatible with the previous public releases, versions 0.1pl2, 0.9.0 and
++0.9.5, 1.0.0, 1.0.1 and 1.0.2, but with the following exception: 0.9.0
++and above can correctly decompress multiple concatenated compressed
++files. 0.1pl2 cannot do this; it will stop after decompressing just the
++first file in the stream.
++
++ `bzip2recover' versions prior to 1.0.2 used 32-bit integers to
++represent bit positions in compressed files, so it could not handle
++compressed files more than 512 megabytes long. Versions 1.0.2 and above
++use 64-bit ints on some platforms which support them (GNU supported
++targets, and Windows). To establish whether or not `bzip2recover' was
++built with such a limitation, run it without arguments. In any event
++you can build yourself an unlimited version if you can recompile it with
++`MaybeUInt64' set to be an unsigned 64-bit integer.
++
++
++File: manual.info, Node: AUTHOR, Prev: CAVEATS, Up: How to use bzip2
++
++2.9 AUTHOR
++==========
++
++Julian Seward, `jseward@bzip.org'
++
++ The ideas embodied in `bzip2' are due to (at least) the following
++people: Michael Burrows and David Wheeler (for the block sorting
++transformation), David Wheeler (again, for the Huffman coder), Peter
++Fenwick (for the structured coding model in the original `bzip', and
++many refinements), and Alistair Moffat, Radford Neal and Ian Witten
++(for the arithmetic coder in the original `bzip'). I am much indebted
++for their help, support and advice. See the manual in the source
++distribution for pointers to sources of documentation. Christian von
++Roques encouraged me to look for faster sorting algorithms, so as to
++speed up compression. Bela Lubkin encouraged me to improve the
++worst-case compression performance. Donna Robinson XMLised the
++documentation. Many people sent patches, helped with portability
++problems, lent machines, gave advice and were generally helpful.
++
++
++File: manual.info, Node: Programming with libbzip2, Next: Miscellanea, Prev: How to use bzip2, Up: Top
++
++3 Programming with libbzip2
++***************************
++
++This chapter describes the programming interface to `libbzip2'.
++
++ For general background information, particularly about memory use
++and performance aspects, you'd be well advised to read *Note How to use
++bzip2: How to use bzip2. as well.
++
++* Menu:
++
++* Top-level structure::
++* Error handling::
++* Low-level interface: >Low-level interface.
++* High-level interface::
++* Utility functions::
++* zlib compatibility functions::
++* Using the library in a stdio-free environment::
++* Making a Windows DLL::
++
++
++File: manual.info, Node: Top-level structure, Next: Error handling, Up: Programming with libbzip2
++
++3.1 Top-level structure
++=======================
++
++`libbzip2' is a flexible library for compressing and decompressing data
++in the `bzip2' data format. Although packaged as a single entity, it
++helps to regard the library as three separate parts: the low level
++interface, and the high level interface, and some utility functions.
++
++ The structure of `libbzip2''s interfaces is similar to that of
++Jean-loup Gailly's and Mark Adler's excellent `zlib' library.
++
++ All externally visible symbols have names beginning `BZ2_'. This is
++new in version 1.0. The intention is to minimise pollution of the
++namespaces of library clients.
++
++ To use any part of the library, you need to `#include <bzlib.h>'
++into your sources.
++
++* Menu:
++
++* Low-level summary::
++* High-level summary::
++* Utility functions summary::
++
++
++File: manual.info, Node: Low-level summary, Next: High-level summary, Up: Top-level structure
++
++3.1.1 Low-level summary
++-----------------------
++
++This interface provides services for compressing and decompressing data
++in memory. There's no provision for dealing with files, streams or any
++other I/O mechanisms, just straight memory-to-memory work. In fact,
++this part of the library can be compiled without inclusion of
++`stdio.h', which may be helpful for embedded applications.
++
++ The low-level part of the library has no global variables and is
++therefore thread-safe.
++
++ Six routines make up the low level interface: `BZ2_bzCompressInit',
++`BZ2_bzCompress', and `BZ2_bzCompressEnd' for compression, and a
++corresponding trio `BZ2_bzDecompressInit', `BZ2_bzDecompress' and
++`BZ2_bzDecompressEnd' for decompression. The `*Init' functions allocate
++memory for compression/decompression and do other initialisations,
++whilst the `*End' functions close down operations and release memory.
++
++ The real work is done by `BZ2_bzCompress' and `BZ2_bzDecompress'.
++These compress and decompress data from a user-supplied input buffer to
++a user-supplied output buffer. These buffers can be any size; arbitrary
++quantities of data are handled by making repeated calls to these
++functions. This is a flexible mechanism allowing a consumer-pull style
++of activity, or producer-push, or a mixture of both.
++
++
++File: manual.info, Node: High-level summary, Next: Utility functions summary, Prev: Low-level summary, Up: Top-level structure
++
++3.1.2 High-level summary
++------------------------
++
++This interface provides some handy wrappers around the low-level
++interface to facilitate reading and writing `bzip2' format files
++(`.bz2' files). The routines provide hooks to facilitate reading files
++in which the `bzip2' data stream is embedded within some larger-scale
++file structure, or where there are multiple `bzip2' data streams
++concatenated end-to-end.
++
++ For reading files, `BZ2_bzReadOpen', `BZ2_bzRead', `BZ2_bzReadClose'
++and `BZ2_bzReadGetUnused' are supplied. For writing files,
++`BZ2_bzWriteOpen', `BZ2_bzWrite' and `BZ2_bzWriteFinish' are available.
++
++ As with the low-level library, no global variables are used so the
++library is per se thread-safe. However, if I/O errors occur whilst
++reading or writing the underlying compressed files, you may have to
++consult `errno' to determine the cause of the error. In that case,
++you'd need a C library which correctly supports `errno' in a
++multithreaded environment.
++
++ To make the library a little simpler and more portable,
++`BZ2_bzReadOpen' and `BZ2_bzWriteOpen' require you to pass them file
++handles (`FILE*'s) which have previously been opened for reading or
++writing respectively. That avoids portability problems associated with
++file operations and file attributes, whilst not being much of an
++imposition on the programmer.
++
++
++File: manual.info, Node: Utility functions summary, Prev: High-level summary, Up: Top-level structure
++
++3.1.3 Utility functions summary
++-------------------------------
++
++For very simple needs, `BZ2_bzBuffToBuffCompress' and
++`BZ2_bzBuffToBuffDecompress' are provided. These compress data in
++memory from one buffer to another buffer in a single function call. You
++should assess whether these functions fulfill your memory-to-memory
++compression/decompression requirements before investing effort in
++understanding the more general but more complex low-level interface.
++
++ Yoshioka Tsuneo (`QWF00133@niftyserve.or.jp' /
++`tsuneo-y@is.aist-nara.ac.jp') has contributed some functions to give
++better `zlib' compatibility. These functions are `BZ2_bzopen',
++`BZ2_bzread', `BZ2_bzwrite', `BZ2_bzflush', `BZ2_bzclose',
++`BZ2_bzerror' and `BZ2_bzlibVersion'. You may find these functions more
++convenient for simple file reading and writing, than those in the
++high-level interface. These functions are not (yet) officially part of
++the library, and are minimally documented here. If they break, you get
++to keep all the pieces. I hope to document them properly when time
++permits.
++
++ Yoshioka also contributed modifications to allow the library to be
++built as a Windows DLL.
++
++
++File: manual.info, Node: Error handling, Next: >Low-level interface, Prev: Top-level structure, Up: Programming with libbzip2
++
++3.2 Error handling
++==================
++
++The library is designed to recover cleanly in all situations, including
++the worst-case situation of decompressing random data. I'm not 100%
++sure that it can always do this, so you might want to add a signal
++handler to catch segmentation violations during decompression if you
++are feeling especially paranoid. I would be interested in hearing more
++about the robustness of the library to corrupted compressed data.
++
++ Version 1.0.3 more robust in this respect than any previous version.
++Investigations with Valgrind (a tool for detecting problems with memory
++management) indicate that, at least for the few files I tested, all
++single-bit errors in the decompressed data are caught properly, with no
++segmentation faults, no uses of uninitialised data, no out of range
++reads or writes, and no infinite looping in the decompressor. So it's
++certainly pretty robust, although I wouldn't claim it to be totally
++bombproof.
++
++ The file `bzlib.h' contains all definitions needed to use the
++library. In particular, you should definitely not include
++`bzlib_private.h'.
++
++ In `bzlib.h', the various return values are defined. The following
++list is not intended as an exhaustive description of the circumstances
++in which a given value may be returned - those descriptions are given
++later. Rather, it is intended to convey the rough meaning of each
++return value. The first five actions are normal and not intended to
++denote an error situation.
++
++`BZ_OK'
++ The requested action was completed successfully.
++
++`BZ_RUN_OK, BZ_FLUSH_OK, BZ_FINISH_OK'
++ In `BZ2_bzCompress', the requested flush/finish/nothing-special
++ action was completed successfully.
++
++`BZ_STREAM_END'
++ Compression of data was completed, or the logical stream end was
++ detected during decompression.
++
++ The following return values indicate an error of some kind.
++
++`BZ_CONFIG_ERROR'
++ Indicates that the library has been improperly compiled on your
++ platform - a major configuration error. Specifically, it means
++ that `sizeof(char)', `sizeof(short)' and `sizeof(int)' are not 1,
++ 2 and 4 respectively, as they should be. Note that the library
++ should still work properly on 64-bit platforms which follow the
++ LP64 programming model - that is, where `sizeof(long)' and
++ `sizeof(void*)' are 8. Under LP64, `sizeof(int)' is still 4, so
++ `libbzip2', which doesn't use the `long' type, is OK.
++
++`BZ_SEQUENCE_ERROR'
++ When using the library, it is important to call the functions in
++ the correct sequence and with data structures (buffers etc) in the
++ correct states. `libbzip2' checks as much as it can to ensure
++ this is happening, and returns `BZ_SEQUENCE_ERROR' if not. Code
++ which complies precisely with the function semantics, as detailed
++ below, should never receive this value; such an event denotes
++ buggy code which you should investigate.
++
++`BZ_PARAM_ERROR'
++ Returned when a parameter to a function call is out of range or
++ otherwise manifestly incorrect. As with `BZ_SEQUENCE_ERROR', this
++ denotes a bug in the client code. The distinction between
++ `BZ_PARAM_ERROR' and `BZ_SEQUENCE_ERROR' is a bit hazy, but still
++ worth making.
++
++`BZ_MEM_ERROR'
++ Returned when a request to allocate memory failed. Note that the
++ quantity of memory needed to decompress a stream cannot be
++ determined until the stream's header has been read. So
++ `BZ2_bzDecompress' and `BZ2_bzRead' may return `BZ_MEM_ERROR' even
++ though some of the compressed data has been read. The same is not
++ true for compression; once `BZ2_bzCompressInit' or
++ `BZ2_bzWriteOpen' have successfully completed, `BZ_MEM_ERROR'
++ cannot occur.
++
++`BZ_DATA_ERROR'
++ Returned when a data integrity error is detected during
++ decompression. Most importantly, this means when stored and
++ computed CRCs for the data do not match. This value is also
++ returned upon detection of any other anomaly in the compressed
++ data.
++
++`BZ_DATA_ERROR_MAGIC'
++ As a special case of `BZ_DATA_ERROR', it is sometimes useful to
++ know when the compressed stream does not start with the correct
++ magic bytes (`'B' 'Z' 'h'').
++
++`BZ_IO_ERROR'
++ Returned by `BZ2_bzRead' and `BZ2_bzWrite' when there is an error
++ reading or writing in the compressed file, and by `BZ2_bzReadOpen'
++ and `BZ2_bzWriteOpen' for attempts to use a file for which the
++ error indicator (viz, `ferror(f)') is set. On receipt of
++ `BZ_IO_ERROR', the caller should consult `errno' and/or `perror'
++ to acquire operating-system specific information about the problem.
++
++`BZ_UNEXPECTED_EOF'
++ Returned by `BZ2_bzRead' when the compressed file finishes before
++ the logical end of stream is detected.
++
++`BZ_OUTBUFF_FULL'
++ Returned by `BZ2_bzBuffToBuffCompress' and
++ `BZ2_bzBuffToBuffDecompress' to indicate that the output data will
++ not fit into the output buffer provided.
++
++
++File: manual.info, Node: >Low-level interface, Next: High-level interface, Prev: Error handling, Up: Programming with libbzip2
++
++3.3 Low-level interface
++=======================
++
++* Menu:
++
++* BZ2_bzCompressInit::
++* BZ2_bzCompress::
++* BZ2_bzCompressEnd::
++* BZ2_bzDecompressInit::
++* BZ2_bzDecompress::
++* BZ2_bzDecompressEnd::
++
++
++File: manual.info, Node: BZ2_bzCompressInit, Next: BZ2_bzCompress, Up: >Low-level interface
++
++3.3.1 BZ2_bzCompressInit
++------------------------
++
++
++ typedef struct {
++ char *next_in;
++ unsigned int avail_in;
++ unsigned int total_in_lo32;
++ unsigned int total_in_hi32;
++
++ char *next_out;
++ unsigned int avail_out;
++ unsigned int total_out_lo32;
++ unsigned int total_out_hi32;
++
++ void *state;
++
++ void *(*bzalloc)(void *,int,int);
++ void (*bzfree)(void *,void *);
++ void *opaque;
++ } bz_stream;
++
++ int BZ2_bzCompressInit ( bz_stream *strm,
++ int blockSize100k,
++ int verbosity,
++ int workFactor );
++
++ Prepares for compression. The `bz_stream' structure holds all data
++pertaining to the compression activity. A `bz_stream' structure should
++be allocated and initialised prior to the call. The fields of
++`bz_stream' comprise the entirety of the user-visible data. `state' is
++a pointer to the private data structures required for compression.
++
++ Custom memory allocators are supported, via fields `bzalloc',
++`bzfree', and `opaque'. The value `opaque' is passed to as the first
++argument to all calls to `bzalloc' and `bzfree', but is otherwise
++ignored by the library. The call `bzalloc ( opaque, n, m )' is expected
++to return a pointer `p' to `n * m' bytes of memory, and `bzfree (
++opaque, p )' should free that memory.
++
++ If you don't want to use a custom memory allocator, set `bzalloc',
++`bzfree' and `opaque' to `NULL', and the library will then use the
++standard `malloc' / `free' routines.
++
++ Before calling `BZ2_bzCompressInit', fields `bzalloc', `bzfree' and
++`opaque' should be filled appropriately, as just described. Upon
++return, the internal state will have been allocated and initialised, and
++`total_in_lo32', `total_in_hi32', `total_out_lo32' and `total_out_hi32'
++will have been set to zero. These four fields are used by the library
++to inform the caller of the total amount of data passed into and out of
++the library, respectively. You should not try to change them. As of
++version 1.0, 64-bit counts are maintained, even on 32-bit platforms,
++using the `_hi32' fields to store the upper 32 bits of the count. So,
++for example, the total amount of data in is `(total_in_hi32 << 32) +
++total_in_lo32'.
++
++ Parameter `blockSize100k' specifies the block size to be used for
++compression. It should be a value between 1 and 9 inclusive, and the
++actual block size used is 100000 x this figure. 9 gives the best
++compression but takes most memory.
++
++ Parameter `verbosity' should be set to a number between 0 and 4
++inclusive. 0 is silent, and greater numbers give increasingly verbose
++monitoring/debugging output. If the library has been compiled with
++`-DBZ_NO_STDIO', no such output will appear for any verbosity setting.
++
++ Parameter `workFactor' controls how the compression phase behaves
++when presented with worst case, highly repetitive, input data. If
++compression runs into difficulties caused by repetitive data, the
++library switches from the standard sorting algorithm to a fallback
++algorithm. The fallback is slower than the standard algorithm by
++perhaps a factor of three, but always behaves reasonably, no matter how
++bad the input.
++
++ Lower values of `workFactor' reduce the amount of effort the
++standard algorithm will expend before resorting to the fallback. You
++should set this parameter carefully; too low, and many inputs will be
++handled by the fallback algorithm and so compress rather slowly, too
++high, and your average-to-worst case compression times can become very
++large. The default value of 30 gives reasonable behaviour over a wide
++range of circumstances.
++
++ Allowable values range from 0 to 250 inclusive. 0 is a special case,
++equivalent to using the default value of 30.
++
++ Note that the compressed output generated is the same regardless of
++whether or not the fallback algorithm is used.
++
++ Be aware also that this parameter may disappear entirely in future
++versions of the library. In principle it should be possible to devise a
++good way to automatically choose which algorithm to use. Such a
++mechanism would render the parameter obsolete.
++
++ Possible return values:
++
++
++ BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++ BZ_PARAM_ERROR
++ if strm is NULL
++ or blockSize < 1 or blockSize > 9
++ or verbosity < 0 or verbosity > 4
++ or workFactor < 0 or workFactor > 250
++ BZ_MEM_ERROR
++ if not enough memory is available
++ BZ_OK
++ otherwise
++
++ Allowable next actions:
++
++
++ BZ2_bzCompress
++ if BZ_OK is returned
++ no specific action needed in case of error
++
++
++File: manual.info, Node: BZ2_bzCompress, Next: BZ2_bzCompressEnd, Prev: BZ2_bzCompressInit, Up: >Low-level interface
++
++3.3.2 BZ2_bzCompress
++--------------------
++
++
++ int BZ2_bzCompress ( bz_stream *strm, int action );
++
++ Provides more input and/or output buffer space for the library. The
++caller maintains input and output buffers, and calls `BZ2_bzCompress'
++to transfer data between them.
++
++ Before each call to `BZ2_bzCompress', `next_in' should point at the
++data to be compressed, and `avail_in' should indicate how many bytes
++the library may read. `BZ2_bzCompress' updates `next_in', `avail_in'
++and `total_in' to reflect the number of bytes it has read.
++
++ Similarly, `next_out' should point to a buffer in which the
++compressed data is to be placed, with `avail_out' indicating how much
++output space is available. `BZ2_bzCompress' updates `next_out',
++`avail_out' and `total_out' to reflect the number of bytes output.
++
++ You may provide and remove as little or as much data as you like on
++each call of `BZ2_bzCompress'. In the limit, it is acceptable to supply
++and remove data one byte at a time, although this would be terribly
++inefficient. You should always ensure that at least one byte of output
++space is available at each call.
++
++ A second purpose of `BZ2_bzCompress' is to request a change of mode
++of the compressed stream.
++
++ Conceptually, a compressed stream can be in one of four states:
++IDLE, RUNNING, FLUSHING and FINISHING. Before initialisation
++(`BZ2_bzCompressInit') and after termination (`BZ2_bzCompressEnd'), a
++stream is regarded as IDLE.
++
++ Upon initialisation (`BZ2_bzCompressInit'), the stream is placed in
++the RUNNING state. Subsequent calls to `BZ2_bzCompress' should pass
++`BZ_RUN' as the requested action; other actions are illegal and will
++result in `BZ_SEQUENCE_ERROR'.
++
++ At some point, the calling program will have provided all the input
++data it wants to. It will then want to finish up - in effect, asking
++the library to process any data it might have buffered internally. In
++this state, `BZ2_bzCompress' will no longer attempt to read data from
++`next_in', but it will want to write data to `next_out'. Because the
++output buffer supplied by the user can be arbitrarily small, the
++finishing-up operation cannot necessarily be done with a single call of
++`BZ2_bzCompress'.
++
++ Instead, the calling program passes `BZ_FINISH' as an action to
++`BZ2_bzCompress'. This changes the stream's state to FINISHING. Any
++remaining input (ie, `next_in[0 .. avail_in-1]') is compressed and
++transferred to the output buffer. To do this, `BZ2_bzCompress' must be
++called repeatedly until all the output has been consumed. At that
++point, `BZ2_bzCompress' returns `BZ_STREAM_END', and the stream's state
++is set back to IDLE. `BZ2_bzCompressEnd' should then be called.
++
++ Just to make sure the calling program does not cheat, the library
++makes a note of `avail_in' at the time of the first call to
++`BZ2_bzCompress' which has `BZ_FINISH' as an action (ie, at the time
++the program has announced its intention to not supply any more input).
++By comparing this value with that of `avail_in' over subsequent calls
++to `BZ2_bzCompress', the library can detect any attempts to slip in
++more data to compress. Any calls for which this is detected will return
++`BZ_SEQUENCE_ERROR'. This indicates a programming mistake which should
++be corrected.
++
++ Instead of asking to finish, the calling program may ask
++`BZ2_bzCompress' to take all the remaining input, compress it and
++terminate the current (Burrows-Wheeler) compression block. This could
++be useful for error control purposes. The mechanism is analogous to
++that for finishing: call `BZ2_bzCompress' with an action of `BZ_FLUSH',
++remove output data, and persist with the `BZ_FLUSH' action until the
++value `BZ_RUN' is returned. As with finishing, `BZ2_bzCompress' detects
++any attempt to provide more input data once the flush has begun.
++
++ Once the flush is complete, the stream returns to the normal RUNNING
++state.
++
++ This all sounds pretty complex, but isn't really. Here's a table
++which shows which actions are allowable in each state, what action will
++be taken, what the next state is, and what the non-error return values
++are. Note that you can't explicitly ask what state the stream is in,
++but nor do you need to - it can be inferred from the values returned by
++`BZ2_bzCompress'.
++
++
++ IDLE/any
++ Illegal. IDLE state only exists after BZ2_bzCompressEnd or
++ before BZ2_bzCompressInit.
++ Return value = BZ_SEQUENCE_ERROR
++
++ RUNNING/BZ_RUN
++ Compress from next_in to next_out as much as possible.
++ Next state = RUNNING
++ Return value = BZ_RUN_OK
++
++ RUNNING/BZ_FLUSH
++ Remember current value of next_in. Compress from next_in
++ to next_out as much as possible, but do not accept any more input.
++ Next state = FLUSHING
++ Return value = BZ_FLUSH_OK
++
++ RUNNING/BZ_FINISH
++ Remember current value of next_in. Compress from next_in
++ to next_out as much as possible, but do not accept any more input.
++ Next state = FINISHING
++ Return value = BZ_FINISH_OK
++
++ FLUSHING/BZ_FLUSH
++ Compress from next_in to next_out as much as possible,
++ but do not accept any more input.
++ If all the existing input has been used up and all compressed
++ output has been removed
++ Next state = RUNNING; Return value = BZ_RUN_OK
++ else
++ Next state = FLUSHING; Return value = BZ_FLUSH_OK
++
++ FLUSHING/other
++ Illegal.
++ Return value = BZ_SEQUENCE_ERROR
++
++ FINISHING/BZ_FINISH
++ Compress from next_in to next_out as much as possible,
++ but to not accept any more input.
++ If all the existing input has been used up and all compressed
++ output has been removed
++ Next state = IDLE; Return value = BZ_STREAM_END
++ else
++ Next state = FINISHING; Return value = BZ_FINISHING
++
++ FINISHING/other
++ Illegal.
++ Return value = BZ_SEQUENCE_ERROR
++
++ That still looks complicated? Well, fair enough. The usual sequence
++of calls for compressing a load of data is:
++
++ 1. Get started with `BZ2_bzCompressInit'.
++
++ 2. Shovel data in and shlurp out its compressed form using zero or
++ more calls of `BZ2_bzCompress' with action = `BZ_RUN'.
++
++ 3. Finish up. Repeatedly call `BZ2_bzCompress' with action =
++ `BZ_FINISH', copying out the compressed output, until
++ `BZ_STREAM_END' is returned.
++
++ 4. Close up and go home. Call `BZ2_bzCompressEnd'.
++
++ If the data you want to compress fits into your input buffer all at
++once, you can skip the calls of `BZ2_bzCompress ( ..., BZ_RUN )' and
++just do the `BZ2_bzCompress ( ..., BZ_FINISH )' calls.
++
++ All required memory is allocated by `BZ2_bzCompressInit'. The
++compression library can accept any data at all (obviously). So you
++shouldn't get any error return values from the `BZ2_bzCompress' calls.
++If you do, they will be `BZ_SEQUENCE_ERROR', and indicate a bug in your
++programming.
++
++ Trivial other possible return values:
++
++
++ BZ_PARAM_ERROR
++ if strm is NULL, or strm->s is NULL
++
++
++File: manual.info, Node: BZ2_bzCompressEnd, Next: BZ2_bzDecompressInit, Prev: BZ2_bzCompress, Up: >Low-level interface
++
++3.3.3 BZ2_bzCompressEnd
++-----------------------
++
++
++ int BZ2_bzCompressEnd ( bz_stream *strm );
++
++ Releases all memory associated with a compression stream.
++
++ Possible return values:
++
++
++ BZ_PARAM_ERROR if strm is NULL or strm->s is NULL
++ BZ_OK otherwise
++
++
++File: manual.info, Node: BZ2_bzDecompressInit, Next: BZ2_bzDecompress, Prev: BZ2_bzCompressEnd, Up: >Low-level interface
++
++3.3.4 BZ2_bzDecompressInit
++--------------------------
++
++
++ int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small );
++
++ Prepares for decompression. As with `BZ2_bzCompressInit', a
++`bz_stream' record should be allocated and initialised before the call.
++Fields `bzalloc', `bzfree' and `opaque' should be set if a custom
++memory allocator is required, or made `NULL' for the normal `malloc' /
++`free' routines. Upon return, the internal state will have been
++initialised, and `total_in' and `total_out' will be zero.
++
++ For the meaning of parameter `verbosity', see `BZ2_bzCompressInit'.
++
++ If `small' is nonzero, the library will use an alternative
++decompression algorithm which uses less memory but at the cost of
++decompressing more slowly (roughly speaking, half the speed, but the
++maximum memory requirement drops to around 2300k). See *Note How to use
++bzip2: How to use bzip2. for more information on memory management.
++
++ Note that the amount of memory needed to decompress a stream cannot
++be determined until the stream's header has been read, so even if
++`BZ2_bzDecompressInit' succeeds, a subsequent `BZ2_bzDecompress' could
++fail with `BZ_MEM_ERROR'.
++
++ Possible return values:
++
++
++ BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++ BZ_PARAM_ERROR
++ if ( small != 0 && small != 1 )
++ or (verbosity < 0 || verbosity > 4)
++ BZ_MEM_ERROR
++ if insufficient memory is available
++
++ Allowable next actions:
++
++
++ BZ2_bzDecompress
++ if BZ_OK was returned
++ no specific action required in case of error
++
++
++File: manual.info, Node: BZ2_bzDecompress, Next: BZ2_bzDecompressEnd, Prev: BZ2_bzDecompressInit, Up: >Low-level interface
++
++3.3.5 BZ2_bzDecompress
++----------------------
++
++
++ int BZ2_bzDecompress ( bz_stream *strm );
++
++ Provides more input and/out output buffer space for the library. The
++caller maintains input and output buffers, and uses `BZ2_bzDecompress'
++to transfer data between them.
++
++ Before each call to `BZ2_bzDecompress', `next_in' should point at the
++compressed data, and `avail_in' should indicate how many bytes the
++library may read. `BZ2_bzDecompress' updates `next_in', `avail_in' and
++`total_in' to reflect the number of bytes it has read.
++
++ Similarly, `next_out' should point to a buffer in which the
++uncompressed output is to be placed, with `avail_out' indicating how
++much output space is available. `BZ2_bzCompress' updates `next_out',
++`avail_out' and `total_out' to reflect the number of bytes output.
++
++ You may provide and remove as little or as much data as you like on
++each call of `BZ2_bzDecompress'. In the limit, it is acceptable to
++supply and remove data one byte at a time, although this would be
++terribly inefficient. You should always ensure that at least one byte
++of output space is available at each call.
++
++ Use of `BZ2_bzDecompress' is simpler than `BZ2_bzCompress'.
++
++ You should provide input and remove output as described above, and
++repeatedly call `BZ2_bzDecompress' until `BZ_STREAM_END' is returned.
++Appearance of `BZ_STREAM_END' denotes that `BZ2_bzDecompress' has
++detected the logical end of the compressed stream. `BZ2_bzDecompress'
++will not produce `BZ_STREAM_END' until all output data has been placed
++into the output buffer, so once `BZ_STREAM_END' appears, you are
++guaranteed to have available all the decompressed output, and
++`BZ2_bzDecompressEnd' can safely be called.
++
++ If case of an error return value, you should call
++`BZ2_bzDecompressEnd' to clean up and release memory.
++
++ Possible return values:
++
++
++ BZ_PARAM_ERROR
++ if strm is NULL or strm->s is NULL
++ or strm->avail_out < 1
++ BZ_DATA_ERROR
++ if a data integrity error is detected in the compressed stream
++ BZ_DATA_ERROR_MAGIC
++ if the compressed stream doesn't begin with the right magic bytes
++ BZ_MEM_ERROR
++ if there wasn't enough memory available
++ BZ_STREAM_END
++ if the logical end of the data stream was detected and all
++ output in has been consumed, eg s-->avail_out > 0
++ BZ_OK
++ otherwise
++
++ Allowable next actions:
++
++
++ BZ2_bzDecompress
++ if BZ_OK was returned
++ BZ2_bzDecompressEnd
++ otherwise
++
++
++File: manual.info, Node: BZ2_bzDecompressEnd, Prev: BZ2_bzDecompress, Up: >Low-level interface
++
++3.3.6 BZ2_bzDecompressEnd
++-------------------------
++
++
++ int BZ2_bzDecompressEnd ( bz_stream *strm );
++
++ Releases all memory associated with a decompression stream.
++
++ Possible return values:
++
++
++ BZ_PARAM_ERROR
++ if strm is NULL or strm->s is NULL
++ BZ_OK
++ otherwise
++
++ Allowable next actions:
++
++
++ None.
++
++
++File: manual.info, Node: High-level interface, Next: Utility functions, Prev: >Low-level interface, Up: Programming with libbzip2
++
++3.4 High-level interface
++========================
++
++This interface provides functions for reading and writing `bzip2'
++format files. First, some general points.
++
++ * All of the functions take an `int*' first argument, `bzerror'.
++ After each call, `bzerror' should be consulted first to determine
++ the outcome of the call. If `bzerror' is `BZ_OK', the call
++ completed successfully, and only then should the return value of
++ the function (if any) be consulted. If `bzerror' is `BZ_IO_ERROR',
++ there was an error reading/writing the underlying compressed file,
++ and you should then consult `errno' / `perror' to determine the
++ cause of the difficulty. `bzerror' may also be set to various
++ other values; precise details are given on a per-function basis
++ below.
++
++ * If `bzerror' indicates an error (ie, anything except `BZ_OK' and
++ `BZ_STREAM_END'), you should immediately call `BZ2_bzReadClose' (or
++ `BZ2_bzWriteClose', depending on whether you are attempting to
++ read or to write) to free up all resources associated with the
++ stream. Once an error has been indicated, behaviour of all calls
++ except `BZ2_bzReadClose' (`BZ2_bzWriteClose') is undefined. The
++ implication is that (1) `bzerror' should be checked after each
++ call, and (2) if `bzerror' indicates an error, `BZ2_bzReadClose'
++ (`BZ2_bzWriteClose') should then be called to clean up.
++
++ * The `FILE*' arguments passed to `BZ2_bzReadOpen' /
++ `BZ2_bzWriteOpen' should be set to binary mode. Most Unix systems
++ will do this by default, but other platforms, including Windows
++ and Mac, will not. If you omit this, you may encounter problems
++ when moving code to new platforms.
++
++ * Memory allocation requests are handled by `malloc' / `free'. At
++ present there is no facility for user-defined memory allocators in
++ the file I/O functions (could easily be added, though).
++
++* Menu:
++
++* BZ2_bzReadOpen::
++* BZ2_bzRead::
++* BZ2_bzReadGetUnused::
++* BZ2_bzReadClose::
++* BZ2_bzWriteOpen::
++* BZ2_bzWrite::
++* BZ2_bzWriteClose::
++* Handling embedded compressed data streams::
++* Standard file-reading/writing code::
++
++
++File: manual.info, Node: BZ2_bzReadOpen, Next: BZ2_bzRead, Up: High-level interface
++
++3.4.1 BZ2_bzReadOpen
++--------------------
++
++
++ typedef void BZFILE;
++
++ BZFILE *BZ2_bzReadOpen( int *bzerror, FILE *f,
++ int verbosity, int small,
++ void *unused, int nUnused );
++
++ Prepare to read compressed data from file handle `f'. `f' should
++refer to a file which has been opened for reading, and for which the
++error indicator (`ferror(f)')is not set. If `small' is 1, the library
++will try to decompress using less memory, at the expense of speed.
++
++ For reasons explained below, `BZ2_bzRead' will decompress the
++`nUnused' bytes starting at `unused', before starting to read from the
++file `f'. At most `BZ_MAX_UNUSED' bytes may be supplied like this. If
++this facility is not required, you should pass `NULL' and `0' for
++`unused' and n`Unused' respectively.
++
++ For the meaning of parameters `small' and `verbosity', see
++`BZ2_bzDecompressInit'.
++
++ The amount of memory needed to decompress a file cannot be
++determined until the file's header has been read. So it is possible
++that `BZ2_bzReadOpen' returns `BZ_OK' but a subsequent call of
++`BZ2_bzRead' will return `BZ_MEM_ERROR'.
++
++ Possible assignments to `bzerror':
++
++
++ BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++ BZ_PARAM_ERROR
++ if f is NULL
++ or small is neither 0 nor 1
++ or ( unused == NULL && nUnused != 0 )
++ or ( unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED) )
++ BZ_IO_ERROR
++ if ferror(f) is nonzero
++ BZ_MEM_ERROR
++ if insufficient memory is available
++ BZ_OK
++ otherwise.
++
++ Possible return values:
++
++
++ Pointer to an abstract BZFILE
++ if bzerror is BZ_OK
++ NULL
++ otherwise
++
++ Allowable next actions:
++
++
++ BZ2_bzRead
++ if bzerror is BZ_OK
++ BZ2_bzClose
++ otherwise
++
++
++File: manual.info, Node: BZ2_bzRead, Next: BZ2_bzReadGetUnused, Prev: BZ2_bzReadOpen, Up: High-level interface
++
++3.4.2 BZ2_bzRead
++----------------
++
++
++ int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
++
++ Reads up to `len' (uncompressed) bytes from the compressed file `b'
++into the buffer `buf'. If the read was successful, `bzerror' is set to
++`BZ_OK' and the number of bytes read is returned. If the logical
++end-of-stream was detected, `bzerror' will be set to `BZ_STREAM_END',
++and the number of bytes read is returned. All other `bzerror' values
++denote an error.
++
++ `BZ2_bzRead' will supply `len' bytes, unless the logical stream end
++is detected or an error occurs. Because of this, it is possible to
++detect the stream end by observing when the number of bytes returned is
++less than the number requested. Nevertheless, this is regarded as
++inadvisable; you should instead check `bzerror' after every call and
++watch out for `BZ_STREAM_END'.
++
++ Internally, `BZ2_bzRead' copies data from the compressed file in
++chunks of size `BZ_MAX_UNUSED' bytes before decompressing it. If the
++file contains more bytes than strictly needed to reach the logical
++end-of-stream, `BZ2_bzRead' will almost certainly read some of the
++trailing data before signalling `BZ_SEQUENCE_END'. To collect the read
++but unused data once `BZ_SEQUENCE_END' has appeared, call
++`BZ2_bzReadGetUnused' immediately before `BZ2_bzReadClose'.
++
++ Possible assignments to `bzerror':
++
++
++ BZ_PARAM_ERROR
++ if b is NULL or buf is NULL or len < 0
++ BZ_SEQUENCE_ERROR
++ if b was opened with BZ2_bzWriteOpen
++ BZ_IO_ERROR
++ if there is an error reading from the compressed file
++ BZ_UNEXPECTED_EOF
++ if the compressed file ended before
++ the logical end-of-stream was detected
++ BZ_DATA_ERROR
++ if a data integrity error was detected in the compressed stream
++ BZ_DATA_ERROR_MAGIC
++ if the stream does not begin with the requisite header bytes
++ (ie, is not a bzip2 data file). This is really
++ a special case of BZ_DATA_ERROR.
++ BZ_MEM_ERROR
++ if insufficient memory was available
++ BZ_STREAM_END
++ if the logical end of stream was detected.
++ BZ_OK
++ otherwise.
++
++ Possible return values:
++
++
++ number of bytes read
++ if bzerror is BZ_OK or BZ_STREAM_END
++ undefined
++ otherwise
++
++ Allowable next actions:
++
++
++ collect data from buf, then BZ2_bzRead or BZ2_bzReadClose
++ if bzerror is BZ_OK
++ collect data from buf, then BZ2_bzReadClose or BZ2_bzReadGetUnused
++ if bzerror is BZ_SEQUENCE_END
++ BZ2_bzReadClose
++ otherwise
++
++
++File: manual.info, Node: BZ2_bzReadGetUnused, Next: BZ2_bzReadClose, Prev: BZ2_bzRead, Up: High-level interface
++
++3.4.3 BZ2_bzReadGetUnused
++-------------------------
++
++
++ void BZ2_bzReadGetUnused( int* bzerror, BZFILE *b,
++ void** unused, int* nUnused );
++
++ Returns data which was read from the compressed file but was not
++needed to get to the logical end-of-stream. `*unused' is set to the
++address of the data, and `*nUnused' to the number of bytes. `*nUnused'
++will be set to a value between `0' and `BZ_MAX_UNUSED' inclusive.
++
++ This function may only be called once `BZ2_bzRead' has signalled
++`BZ_STREAM_END' but before `BZ2_bzReadClose'.
++
++ Possible assignments to `bzerror':
++
++
++ BZ_PARAM_ERROR
++ if b is NULL
++ or unused is NULL or nUnused is NULL
++ BZ_SEQUENCE_ERROR
++ if BZ_STREAM_END has not been signalled
++ or if b was opened with BZ2_bzWriteOpen
++ BZ_OK
++ otherwise
++
++ Allowable next actions:
++
++
++ BZ2_bzReadClose
++
++
++File: manual.info, Node: BZ2_bzReadClose, Next: BZ2_bzWriteOpen, Prev: BZ2_bzReadGetUnused, Up: High-level interface
++
++3.4.4 BZ2_bzReadClose
++---------------------
++
++
++ void BZ2_bzReadClose ( int *bzerror, BZFILE *b );
++
++ Releases all memory pertaining to the compressed file `b'.
++`BZ2_bzReadClose' does not call `fclose' on the underlying file handle,
++so you should do that yourself if appropriate. `BZ2_bzReadClose'
++should be called to clean up after all error situations.
++
++ Possible assignments to `bzerror':
++
++
++ BZ_SEQUENCE_ERROR
++ if b was opened with BZ2_bzOpenWrite
++ BZ_OK
++ otherwise
++
++ Allowable next actions:
++
++
++ none
++
++
++File: manual.info, Node: BZ2_bzWriteOpen, Next: BZ2_bzWrite, Prev: BZ2_bzReadClose, Up: High-level interface
++
++3.4.5 BZ2_bzWriteOpen
++---------------------
++
++
++ BZFILE *BZ2_bzWriteOpen( int *bzerror, FILE *f,
++ int blockSize100k, int verbosity,
++ int workFactor );
++
++ Prepare to write compressed data to file handle `f'. `f' should
++refer to a file which has been opened for writing, and for which the
++error indicator (`ferror(f)')is not set.
++
++ For the meaning of parameters `blockSize100k', `verbosity' and
++`workFactor', see `BZ2_bzCompressInit'.
++
++ All required memory is allocated at this stage, so if the call
++completes successfully, `BZ_MEM_ERROR' cannot be signalled by a
++subsequent call to `BZ2_bzWrite'.
++
++ Possible assignments to `bzerror':
++
++
++ BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++ BZ_PARAM_ERROR
++ if f is NULL
++ or blockSize100k < 1 or blockSize100k > 9
++ BZ_IO_ERROR
++ if ferror(f) is nonzero
++ BZ_MEM_ERROR
++ if insufficient memory is available
++ BZ_OK
++ otherwise
++
++ Possible return values:
++
++
++ Pointer to an abstract BZFILE
++ if bzerror is BZ_OK
++ NULL
++ otherwise
++
++ Allowable next actions:
++
++
++ BZ2_bzWrite
++ if bzerror is BZ_OK
++ (you could go directly to BZ2_bzWriteClose, but this would be pretty pointless)
++ BZ2_bzWriteClose
++ otherwise
++
++
++File: manual.info, Node: BZ2_bzWrite, Next: BZ2_bzWriteClose, Prev: BZ2_bzWriteOpen, Up: High-level interface
++
++3.4.6 BZ2_bzWrite
++-----------------
++
++
++ void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
++
++ Absorbs `len' bytes from the buffer `buf', eventually to be
++compressed and written to the file.
++
++ Possible assignments to `bzerror':
++
++
++ BZ_PARAM_ERROR
++ if b is NULL or buf is NULL or len < 0
++ BZ_SEQUENCE_ERROR
++ if b was opened with BZ2_bzReadOpen
++ BZ_IO_ERROR
++ if there is an error writing the compressed file.
++ BZ_OK
++ otherwise
++
++
++File: manual.info, Node: BZ2_bzWriteClose, Next: Handling embedded compressed data streams, Prev: BZ2_bzWrite, Up: High-level interface
++
++3.4.7 BZ2_bzWriteClose
++----------------------
++
++
++ void BZ2_bzWriteClose( int *bzerror, BZFILE* f,
++ int abandon,
++ unsigned int* nbytes_in,
++ unsigned int* nbytes_out );
++
++ void BZ2_bzWriteClose64( int *bzerror, BZFILE* f,
++ int abandon,
++ unsigned int* nbytes_in_lo32,
++ unsigned int* nbytes_in_hi32,
++ unsigned int* nbytes_out_lo32,
++ unsigned int* nbytes_out_hi32 );
++
++ Compresses and flushes to the compressed file all data so far
++supplied by `BZ2_bzWrite'. The logical end-of-stream markers are also
++written, so subsequent calls to `BZ2_bzWrite' are illegal. All memory
++associated with the compressed file `b' is released. `fflush' is
++called on the compressed file, but it is not `fclose''d.
++
++ If `BZ2_bzWriteClose' is called to clean up after an error, the only
++action is to release the memory. The library records the error codes
++issued by previous calls, so this situation will be detected
++automatically. There is no attempt to complete the compression
++operation, nor to `fflush' the compressed file. You can force this
++behaviour to happen even in the case of no error, by passing a nonzero
++value to `abandon'.
++
++ If `nbytes_in' is non-null, `*nbytes_in' will be set to be the total
++volume of uncompressed data handled. Similarly, `nbytes_out' will be
++set to the total volume of compressed data written. For compatibility
++with older versions of the library, `BZ2_bzWriteClose' only yields the
++lower 32 bits of these counts. Use `BZ2_bzWriteClose64' if you want the
++full 64 bit counts. These two functions are otherwise absolutely
++identical.
++
++ Possible assignments to `bzerror':
++
++
++ BZ_SEQUENCE_ERROR
++ if b was opened with BZ2_bzReadOpen
++ BZ_IO_ERROR
++ if there is an error writing the compressed file
++ BZ_OK
++ otherwise
++
++
++File: manual.info, Node: Handling embedded compressed data streams, Next: Standard file-reading/writing code, Prev: BZ2_bzWriteClose, Up: High-level interface
++
++3.4.8 Handling embedded compressed data streams
++-----------------------------------------------
++
++The high-level library facilitates use of `bzip2' data streams which
++form some part of a surrounding, larger data stream.
++
++ * For writing, the library takes an open file handle, writes
++ compressed data to it, `fflush'es it but does not `fclose' it. The
++ calling application can write its own data before and after the
++ compressed data stream, using that same file handle.
++
++ * Reading is more complex, and the facilities are not as general as
++ they could be since generality is hard to reconcile with
++ efficiency. `BZ2_bzRead' reads from the compressed file in blocks
++ of size `BZ_MAX_UNUSED' bytes, and in doing so probably will
++ overshoot the logical end of compressed stream. To recover this
++ data once decompression has ended, call `BZ2_bzReadGetUnused' after
++ the last call of `BZ2_bzRead' (the one returning `BZ_STREAM_END')
++ but before calling `BZ2_bzReadClose'.
++
++ This mechanism makes it easy to decompress multiple `bzip2' streams
++placed end-to-end. As the end of one stream, when `BZ2_bzRead' returns
++`BZ_STREAM_END', call `BZ2_bzReadGetUnused' to collect the unused data
++(copy it into your own buffer somewhere). That data forms the start of
++the next compressed stream. To start uncompressing that next stream,
++call `BZ2_bzReadOpen' again, feeding in the unused data via the
++`unused' / `nUnused' parameters. Keep doing this until `BZ_STREAM_END'
++return coincides with the physical end of file (`feof(f)'). In this
++situation `BZ2_bzReadGetUnused' will of course return no data.
++
++ This should give some feel for how the high-level interface can be
++used. If you require extra flexibility, you'll have to bite the bullet
++and get to grips with the low-level interface.
++
++
++File: manual.info, Node: Standard file-reading/writing code, Prev: Handling embedded compressed data streams, Up: High-level interface
++
++3.4.9 Standard file-reading/writing code
++----------------------------------------
++
++Here's how you'd write data to a compressed file:
++
++
++ FILE* f;
++ BZFILE* b;
++ int nBuf;
++ char buf[ /* whatever size you like */ ];
++ int bzerror;
++ int nWritten;
++
++ f = fopen ( "myfile.bz2", "w" );
++ if ( !f ) {
++ /* handle error */
++ }
++ b = BZ2_bzWriteOpen( &bzerror, f, 9 );
++ if (bzerror != BZ_OK) {
++ BZ2_bzWriteClose ( b );
++ /* handle error */
++ }
++
++ while ( /* condition */ ) {
++ /* get data to write into buf, and set nBuf appropriately */
++ nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf );
++ if (bzerror == BZ_IO_ERROR) {
++ BZ2_bzWriteClose ( &bzerror, b );
++ /* handle error */
++ }
++ }
++
++ BZ2_bzWriteClose( &bzerror, b );
++ if (bzerror == BZ_IO_ERROR) {
++ /* handle error */
++ }
++
++ And to read from a compressed file:
++
++
++ FILE* f;
++ BZFILE* b;
++ int nBuf;
++ char buf[ /* whatever size you like */ ];
++ int bzerror;
++ int nWritten;
++
++ f = fopen ( "myfile.bz2", "r" );
++ if ( !f ) {
++ /* handle error */
++ }
++ b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 );
++ if ( bzerror != BZ_OK ) {
++ BZ2_bzReadClose ( &bzerror, b );
++ /* handle error */
++ }
++
++ bzerror = BZ_OK;
++ while ( bzerror == BZ_OK && /* arbitrary other conditions */) {
++ nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ );
++ if ( bzerror == BZ_OK ) {
++ /* do something with buf[0 .. nBuf-1] */
++ }
++ }
++ if ( bzerror != BZ_STREAM_END ) {
++ BZ2_bzReadClose ( &bzerror, b );
++ /* handle error */
++ } else {
++ BZ2_bzReadClose ( &bzerror );
++ }
++
++
++File: manual.info, Node: Utility functions, Next: zlib compatibility functions, Prev: High-level interface, Up: Programming with libbzip2
++
++3.5 Utility functions
++=====================
++
++* Menu:
++
++* BZ2_bzBuffToBuffCompress::
++* BZ2_bzBuffToBuffDecompress::
++
++
++File: manual.info, Node: BZ2_bzBuffToBuffCompress, Next: BZ2_bzBuffToBuffDecompress, Up: Utility functions
++
++3.5.1 BZ2_bzBuffToBuffCompress
++------------------------------
++
++
++ int BZ2_bzBuffToBuffCompress( char* dest,
++ unsigned int* destLen,
++ char* source,
++ unsigned int sourceLen,
++ int blockSize100k,
++ int verbosity,
++ int workFactor );
++
++ Attempts to compress the data in `source[0 .. sourceLen-1]' into the
++destination buffer, `dest[0 .. *destLen-1]'. If the destination buffer
++is big enough, `*destLen' is set to the size of the compressed data,
++and `BZ_OK' is returned. If the compressed data won't fit, `*destLen'
++is unchanged, and `BZ_OUTBUFF_FULL' is returned.
++
++ Compression in this manner is a one-shot event, done with a single
++call to this function. The resulting compressed data is a complete
++`bzip2' format data stream. There is no mechanism for making additional
++calls to provide extra input data. If you want that kind of mechanism,
++use the low-level interface.
++
++ For the meaning of parameters `blockSize100k', `verbosity' and
++`workFactor', see `BZ2_bzCompressInit'.
++
++ To guarantee that the compressed data will fit in its buffer,
++allocate an output buffer of size 1% larger than the uncompressed data,
++plus six hundred extra bytes.
++
++ `BZ2_bzBuffToBuffDecompress' will not write data at or beyond
++`dest[*destLen]', even in case of buffer overflow.
++
++ Possible return values:
++
++
++ BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++ BZ_PARAM_ERROR
++ if dest is NULL or destLen is NULL
++ or blockSize100k < 1 or blockSize100k > 9
++ or verbosity < 0 or verbosity > 4
++ or workFactor < 0 or workFactor > 250
++ BZ_MEM_ERROR
++ if insufficient memory is available
++ BZ_OUTBUFF_FULL
++ if the size of the compressed data exceeds *destLen
++ BZ_OK
++ otherwise
++
++
++File: manual.info, Node: BZ2_bzBuffToBuffDecompress, Prev: BZ2_bzBuffToBuffCompress, Up: Utility functions
++
++3.5.2 BZ2_bzBuffToBuffDecompress
++--------------------------------
++
++
++ int BZ2_bzBuffToBuffDecompress( char* dest,
++ unsigned int* destLen,
++ char* source,
++ unsigned int sourceLen,
++ int small,
++ int verbosity );
++
++ Attempts to decompress the data in `source[0 .. sourceLen-1]' into
++the destination buffer, `dest[0 .. *destLen-1]'. If the destination
++buffer is big enough, `*destLen' is set to the size of the uncompressed
++data, and `BZ_OK' is returned. If the compressed data won't fit,
++`*destLen' is unchanged, and `BZ_OUTBUFF_FULL' is returned.
++
++ `source' is assumed to hold a complete `bzip2' format data stream.
++`BZ2_bzBuffToBuffDecompress' tries to decompress the entirety of the
++stream into the output buffer.
++
++ For the meaning of parameters `small' and `verbosity', see
++`BZ2_bzDecompressInit'.
++
++ Because the compression ratio of the compressed data cannot be known
++in advance, there is no easy way to guarantee that the output buffer
++will be big enough. You may of course make arrangements in your code to
++record the size of the uncompressed data, but such a mechanism is
++beyond the scope of this library.
++
++ `BZ2_bzBuffToBuffDecompress' will not write data at or beyond
++`dest[*destLen]', even in case of buffer overflow.
++
++ Possible return values:
++
++
++ BZ_CONFIG_ERROR
++ if the library has been mis-compiled
++ BZ_PARAM_ERROR
++ if dest is NULL or destLen is NULL
++ or small != 0 && small != 1
++ or verbosity < 0 or verbosity > 4
++ BZ_MEM_ERROR
++ if insufficient memory is available
++ BZ_OUTBUFF_FULL
++ if the size of the compressed data exceeds *destLen
++ BZ_DATA_ERROR
++ if a data integrity error was detected in the compressed data
++ BZ_DATA_ERROR_MAGIC
++ if the compressed data doesn't begin with the right magic bytes
++ BZ_UNEXPECTED_EOF
++ if the compressed data ends unexpectedly
++ BZ_OK
++ otherwise
++
++
++File: manual.info, Node: zlib compatibility functions, Next: Using the library in a stdio-free environment, Prev: Utility functions, Up: Programming with libbzip2
++
++3.6 zlib compatibility functions
++================================
++
++Yoshioka Tsuneo has contributed some functions to give better `zlib'
++compatibility. These functions are `BZ2_bzopen', `BZ2_bzread',
++`BZ2_bzwrite', `BZ2_bzflush', `BZ2_bzclose', `BZ2_bzerror' and
++`BZ2_bzlibVersion'. These functions are not (yet) officially part of
++the library. If they break, you get to keep all the pieces.
++Nevertheless, I think they work ok.
++
++
++ typedef void BZFILE;
++
++ const char * BZ2_bzlibVersion ( void );
++
++ Returns a string indicating the library version.
++
++
++ BZFILE * BZ2_bzopen ( const char *path, const char *mode );
++ BZFILE * BZ2_bzdopen ( int fd, const char *mode );
++
++ Opens a `.bz2' file for reading or writing, using either its name or
++a pre-existing file descriptor. Analogous to `fopen' and `fdopen'.
++
++
++ int BZ2_bzread ( BZFILE* b, void* buf, int len );
++ int BZ2_bzwrite ( BZFILE* b, void* buf, int len );
++
++ Reads/writes data from/to a previously opened `BZFILE'. Analogous to
++`fread' and `fwrite'.
++
++
++ int BZ2_bzflush ( BZFILE* b );
++ void BZ2_bzclose ( BZFILE* b );
++
++ Flushes/closes a `BZFILE'. `BZ2_bzflush' doesn't actually do
++anything. Analogous to `fflush' and `fclose'.
++
++
++ const char * BZ2_bzerror ( BZFILE *b, int *errnum )
++
++ Returns a string describing the more recent error status of `b', and
++also sets `*errnum' to its numerical value.
++
++
++File: manual.info, Node: Using the library in a stdio-free environment, Next: Making a Windows DLL, Prev: zlib compatibility functions, Up: Programming with libbzip2
++
++3.7 Using the library in a stdio-free environment
++=================================================
++
++* Menu:
++
++* Getting rid of stdio::
++* Critical error handling::
++
++
++File: manual.info, Node: Getting rid of stdio, Next: Critical error handling, Up: Using the library in a stdio-free environment
++
++3.7.1 Getting rid of stdio
++--------------------------
++
++In a deeply embedded application, you might want to use just the
++memory-to-memory functions. You can do this conveniently by compiling
++the library with preprocessor symbol `BZ_NO_STDIO' defined. Doing this
++gives you a library containing only the following eight functions:
++
++ `BZ2_bzCompressInit', `BZ2_bzCompress', `BZ2_bzCompressEnd'
++`BZ2_bzDecompressInit', `BZ2_bzDecompress', `BZ2_bzDecompressEnd'
++`BZ2_bzBuffToBuffCompress', `BZ2_bzBuffToBuffDecompress'
++
++ When compiled like this, all functions will ignore `verbosity'
++settings.
++
++
++File: manual.info, Node: Critical error handling, Prev: Getting rid of stdio, Up: Using the library in a stdio-free environment
++
++3.7.2 Critical error handling
++-----------------------------
++
++`libbzip2' contains a number of internal assertion checks which should,
++needless to say, never be activated. Nevertheless, if an assertion
++should fail, behaviour depends on whether or not the library was
++compiled with `BZ_NO_STDIO' set.
++
++ For a normal compile, an assertion failure yields the message:
++
++ bzip2/libbzip2: internal error number N.
++
++ This is a bug in bzip2/libbzip2, 1.0.3 of 15 February 2005.
++ Please report it to me at: jseward@bzip.org. If this happened when
++ you were using some program which uses libbzip2 as a component,
++ you should also report this bug to the author(s) of that program.
++ Please make an effort to report this bug; timely and accurate bug
++ reports eventually lead to higher quality software. Thanks. Julian
++ Seward, 15 February 2005.
++
++ where `N' is some error code number. If `N == 1007', it also prints
++some extra text advising the reader that unreliable memory is often
++associated with internal error 1007. (This is a
++frequently-observed-phenomenon with versions 1.0.0/1.0.1).
++
++ `exit(3)' is then called.
++
++ For a `stdio'-free library, assertion failures result in a call to a
++function declared as:
++
++
++ extern void bz_internal_error ( int errcode );
++
++ The relevant code is passed as a parameter. You should supply such a
++function.
++
++ In either case, once an assertion failure has occurred, any
++`bz_stream' records involved can be regarded as invalid. You should not
++attempt to resume normal operation with them.
++
++ You may, of course, change critical error handling to suit your
++needs. As I said above, critical errors indicate bugs in the library
++and should not occur. All "normal" error situations are indicated via
++error return codes from functions, and can be recovered from.
++
++
++File: manual.info, Node: Making a Windows DLL, Prev: Using the library in a stdio-free environment, Up: Programming with libbzip2
++
++3.8 Making a Windows DLL
++========================
++
++Everything related to Windows has been contributed by Yoshioka Tsuneo
++(`QWF00133@niftyserve.or.jp' / `tsuneo-y@is.aist-nara.ac.jp'), so you
++should send your queries to him (but perhaps Cc: me,
++`jseward@bzip.org').
++
++ My vague understanding of what to do is: using Visual C++ 5.0, open
++the project file `libbz2.dsp', and build. That's all.
++
++ If you can't open the project file for some reason, make a new one,
++naming these files: `blocksort.c', `bzlib.c', `compress.c',
++`crctable.c', `decompress.c', `huffman.c', `randtable.c' and
++`libbz2.def'. You will also need to name the header files `bzlib.h' and
++`bzlib_private.h'.
++
++ If you don't use VC++, you may need to define the proprocessor symbol
++`_WIN32'.
++
++ Finally, `dlltest.c' is a sample program using the DLL. It has a
++project file, `dlltest.dsp'.
++
++ If you just want a makefile for Visual C, have a look at
++`makefile.msc'.
++
++ Be aware that if you compile `bzip2' itself on Win32, you must set
++`BZ_UNIX' to 0 and `BZ_LCCWIN32' to 1, in the file `bzip2.c', before
++compiling. Otherwise the resulting binary won't work correctly.
++
++ I haven't tried any of this stuff myself, but it all looks plausible.
++
++
++File: manual.info, Node: Miscellanea, Prev: Programming with libbzip2, Up: Top
++
++4 Miscellanea
++*************
++
++These are just some random thoughts of mine. Your mileage may vary.
++
++* Menu:
++
++* Limitations of the compressed file format::
++* Portability issues::
++* Reporting bugs::
++* Did you get the right package?::
++* Further Reading::
++
++
++File: manual.info, Node: Limitations of the compressed file format, Next: Portability issues, Up: Miscellanea
++
++4.1 Limitations of the compressed file format
++=============================================
++
++`bzip2-1.0.X', `0.9.5' and `0.9.0' use exactly the same file format as
++the original version, `bzip2-0.1'. This decision was made in the
++interests of stability. Creating yet another incompatible compressed
++file format would create further confusion and disruption for users.
++
++ Nevertheless, this is not a painless decision. Development work
++since the release of `bzip2-0.1' in August 1997 has shown complexities
++in the file format which slow down decompression and, in retrospect,
++are unnecessary. These are:
++
++ * The run-length encoder, which is the first of the compression
++ transformations, is entirely irrelevant. The original purpose was
++ to protect the sorting algorithm from the very worst case input: a
++ string of repeated symbols. But algorithm steps Q6a and Q6b in the
++ original Burrows-Wheeler technical report (SRC-124) show how
++ repeats can be handled without difficulty in block sorting.
++
++ * The randomisation mechanism doesn't really need to be there. Udi
++ Manber and Gene Myers published a suffix array construction
++ algorithm a few years back, which can be employed to sort any
++ block, no matter how repetitive, in O(N log N) time. Subsequent
++ work by Kunihiko Sadakane has produced a derivative O(N (log N)^2)
++ algorithm which usually outperforms the Manber-Myers algorithm.
++
++ I could have changed to Sadakane's algorithm, but I find it to be
++ slower than `bzip2''s existing algorithm for most inputs, and the
++ randomisation mechanism protects adequately against bad cases. I
++ didn't think it was a good tradeoff to make. Partly this is due to
++ the fact that I was not flooded with email complaints about
++ `bzip2-0.1''s performance on repetitive data, so perhaps it isn't
++ a problem for real inputs.
++
++ Probably the best long-term solution, and the one I have
++ incorporated into 0.9.5 and above, is to use the existing sorting
++ algorithm initially, and fall back to a O(N (log N)^2) algorithm
++ if the standard algorithm gets into difficulties.
++
++ * The compressed file format was never designed to be handled by a
++ library, and I have had to jump though some hoops to produce an
++ efficient implementation of decompression. It's a bit hairy. Try
++ passing `decompress.c' through the C preprocessor and you'll see
++ what I mean. Much of this complexity could have been avoided if
++ the compressed size of each block of data was recorded in the data
++ stream.
++
++ * An Adler-32 checksum, rather than a CRC32 checksum, would be
++ faster to compute.
++
++ It would be fair to say that the `bzip2' format was frozen before I
++properly and fully understood the performance consequences of doing so.
++
++ Improvements which I was able to incorporate into 0.9.0, despite
++using the same file format, are:
++
++ * Single array implementation of the inverse BWT. This significantly
++ speeds up decompression, presumably because it reduces the number
++ of cache misses.
++
++ * Faster inverse MTF transform for large MTF values. The new
++ implementation is based on the notion of sliding blocks of values.
++
++ * `bzip2-0.9.0' now reads and writes files with `fread' and
++ `fwrite'; version 0.1 used `putc' and `getc'. Duh! Well, you live
++ and learn.
++
++ Further ahead, it would be nice to be able to do random access into
++files. This will require some careful design of compressed file formats.
++
++
++File: manual.info, Node: Portability issues, Next: Reporting bugs, Prev: Limitations of the compressed file format, Up: Miscellanea
++
++4.2 Portability issues
++======================
++
++After some consideration, I have decided not to use GNU `autoconf' to
++configure 0.9.5 or 1.0.
++
++ `autoconf', admirable and wonderful though it is, mainly assists
++with portability problems between Unix-like platforms. But `bzip2'
++doesn't have much in the way of portability problems on Unix; most of
++the difficulties appear when porting to the Mac, or to Microsoft's
++operating systems. `autoconf' doesn't help in those cases, and brings
++in a whole load of new complexity.
++
++ Most people should be able to compile the library and program under
++Unix straight out-of-the-box, so to speak, especially if you have a
++version of GNU C available.
++
++ There are a couple of `__inline__' directives in the code. GNU C
++(`gcc') should be able to handle them. If you're not using GNU C, your
++C compiler shouldn't see them at all. If your compiler does, for some
++reason, see them and doesn't like them, just `#define' `__inline__' to
++be `/* */'. One easy way to do this is to compile with the flag
++`-D__inline__=', which should be understood by most Unix compilers.
++
++ If you still have difficulties, try compiling with the macro
++`BZ_STRICT_ANSI' defined. This should enable you to build the library
++in a strictly ANSI compliant environment. Building the program itself
++like this is dangerous and not supported, since you remove `bzip2''s
++checks against compressing directories, symbolic links, devices, and
++other not-really-a-file entities. This could cause filesystem
++corruption!
++
++ One other thing: if you create a `bzip2' binary for public
++distribution, please consider linking it statically (`gcc -static').
++This avoids all sorts of library-version issues that others may
++encounter later on.
++
++ If you build `bzip2' on Win32, you must set `BZ_UNIX' to 0 and
++`BZ_LCCWIN32' to 1, in the file `bzip2.c', before compiling. Otherwise
++the resulting binary won't work correctly.
++
++
++File: manual.info, Node: Reporting bugs, Next: Did you get the right package?, Prev: Portability issues, Up: Miscellanea
++
++4.3 Reporting bugs
++==================
++
++I tried pretty hard to make sure `bzip2' is bug free, both by design
++and by testing. Hopefully you'll never need to read this section for
++real.
++
++ Nevertheless, if `bzip2' dies with a segmentation fault, a bus error
++or an internal assertion failure, it will ask you to email me a bug
++report. Experience from years of feedback of bzip2 users indicates that
++almost all these problems can be traced to either compiler bugs or
++hardware problems.
++
++ * Recompile the program with no optimisation, and see if it works.
++ And/or try a different compiler. I heard all sorts of stories
++ about various flavours of GNU C (and other compilers) generating
++ bad code for `bzip2', and I've run across two such examples myself.
++
++ 2.7.X versions of GNU C are known to generate bad code from time
++ to time, at high optimisation levels. If you get problems, try
++ using the flags `-O2' `-fomit-frame-pointer'
++ `-fno-strength-reduce'. You should specifically not use
++ `-funroll-loops'.
++
++ You may notice that the Makefile runs six tests as part of the
++ build process. If the program passes all of these, it's a pretty
++ good (but not 100%) indication that the compiler has done its job
++ correctly.
++
++ * If `bzip2' crashes randomly, and the crashes are not repeatable,
++ you may have a flaky memory subsystem. `bzip2' really hammers your
++ memory hierarchy, and if it's a bit marginal, you may get these
++ problems. Ditto if your disk or I/O subsystem is slowly failing.
++ Yup, this really does happen.
++
++ Try using a different machine of the same type, and see if you can
++ repeat the problem.
++
++ * This isn't really a bug, but ... If `bzip2' tells you your file is
++ corrupted on decompression, and you obtained the file via FTP,
++ there is a possibility that you forgot to tell FTP to do a binary
++ mode transfer. That absolutely will cause the file to be
++ non-decompressible. You'll have to transfer it again.
++
++ If you've incorporated `libbzip2' into your own program and are
++getting problems, please, please, please, check that the parameters you
++are passing in calls to the library, are correct, and in accordance
++with what the documentation says is allowable. I have tried to make
++the library robust against such problems, but I'm sure I haven't
++succeeded.
++
++ Finally, if the above comments don't help, you'll have to send me a
++bug report. Now, it's just amazing how many people will send me a bug
++report saying something like:
++
++
++ bzip2 crashed with segmentation fault on my machine
++
++ and absolutely nothing else. Needless to say, a such a report is
++totally, utterly, completely and comprehensively 100% useless; a waste
++of your time, my time, and net bandwidth. With no details at all,
++there's no way I can possibly begin to figure out what the problem is.
++
++ The rules of the game are: facts, facts, facts. Don't omit them
++because "oh, they won't be relevant". At the bare minimum:
++
++
++ Machine type. Operating system version.
++ Exact version of bzip2 (do bzip2 -V).
++ Exact version of the compiler used.
++ Flags passed to the compiler.
++
++ However, the most important single thing that will help me is the
++file that you were trying to compress or decompress at the time the
++problem happened. Without that, my ability to do anything more than
++speculate about the cause, is limited.
++
++
++File: manual.info, Node: Did you get the right package?, Next: Further Reading, Prev: Reporting bugs, Up: Miscellanea
++
++4.4 Did you get the right package?
++==================================
++
++`bzip2' is a resource hog. It soaks up large amounts of CPU cycles and
++memory. Also, it gives very large latencies. In the worst case, you can
++feed many megabytes of uncompressed data into the library before getting
++any compressed output, so this probably rules out applications
++requiring interactive behaviour.
++
++ These aren't faults of my implementation, I hope, but more an
++intrinsic property of the Burrows-Wheeler transform (unfortunately).
++Maybe this isn't what you want.
++
++ If you want a compressor and/or library which is faster, uses less
++memory but gets pretty good compression, and has minimal latency,
++consider Jean-loup Gailly's and Mark Adler's work, `zlib-1.2.1' and
++`gzip-1.2.4'. Look for them at http://www.zlib.org
++(http://www.zlib.org) and http://www.gzip.org (http://www.gzip.org)
++respectively.
++
++ For something faster and lighter still, you might try Markus F X J
++Oberhumer's `LZO' real-time compression/decompression library, at
++http://www.oberhumer.com/opensource
++(http://www.oberhumer.com/opensource).
++
++
++File: manual.info, Node: Further Reading, Prev: Did you get the right package?, Up: Miscellanea
++
++4.5 Further Reading
++===================
++
++`bzip2' is not research work, in the sense that it doesn't present any
++new ideas. Rather, it's an engineering exercise based on existing
++ideas.
++
++ Four documents describe essentially all the ideas behind `bzip2':
++
++ Michael Burrows and D. J. Wheeler:
++ "A block-sorting lossless data compression algorithm"
++ 10th May 1994.
++ Digital SRC Research Report 124.
++ ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz
++ If you have trouble finding it, try searching at the
++ New Zealand Digital Library, http://www.nzdl.org.
++
++ Daniel S. Hirschberg and Debra A. LeLewer
++ "Efficient Decoding of Prefix Codes"
++ Communications of the ACM, April 1990, Vol 33, Number 4.
++ You might be able to get an electronic copy of this
++ from the ACM Digital Library.
++
++ David J. Wheeler
++ Program bred3.c and accompanying document bred3.ps.
++ This contains the idea behind the multi-table Huffman coding scheme.
++ ftp://ftp.cl.cam.ac.uk/users/djw3/
++
++ Jon L. Bentley and Robert Sedgewick
++ "Fast Algorithms for Sorting and Searching Strings"
++ Available from Sedgewick's web page,
++ www.cs.princeton.edu/~rs
++
++ The following paper gives valuable additional insights into the
++algorithm, but is not immediately the basis of any code used in bzip2.
++
++ Peter Fenwick:
++ Block Sorting Text Compression
++ Proceedings of the 19th Australasian Computer Science Conference,
++ Melbourne, Australia. Jan 31 - Feb 2, 1996.
++ ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps
++
++ Kunihiko Sadakane's sorting algorithm, mentioned above, is available
++from:
++
++ http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz
++
++ The Manber-Myers suffix array construction algorithm is described in
++a paper available from:
++
++ http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
++
++ Finally, the following papers document some investigations I made
++into the performance of sorting and decompression algorithms:
++
++ Julian Seward
++ On the Performance of BWT Sorting Algorithms
++ Proceedings of the IEEE Data Compression Conference 2000
++ Snowbird, Utah. 28-30 March 2000.
++
++ Julian Seward
++ Space-time Tradeoffs in the Inverse B-W Transform
++ Proceedings of the IEEE Data Compression Conference 2001
++ Snowbird, Utah. 27-29 March 2001.
++
++
++
++Tag Table:
++Node: Top190
++Node: Introduction1058
++Node: How to use bzip22242
++Node: NAME2631
++Node: SYNOPSIS2898
++Node: DESCRIPTION3192
++Node: OPTIONS7848
++Node: MEMORY MANAGEMENT11162
++Node: RECOVERING DATA FROM DAMAGED FILES14700
++Node: PERFORMANCE NOTES16424
++Node: CAVEATS17709
++Node: AUTHOR19007
++Node: Programming with libbzip220012
++Node: Top-level structure20673
++Node: Low-level summary21575
++Node: High-level summary22964
++Node: Utility functions summary24439
++Node: Error handling25703
++Node: >Low-level interface30791
++Node: BZ2_bzCompressInit31118
++Node: BZ2_bzCompress35825
++Node: BZ2_bzCompressEnd42895
++Node: BZ2_bzDecompressInit43299
++Node: BZ2_bzDecompress44993
++Node: BZ2_bzDecompressEnd47609
++Node: High-level interface48042
++Node: BZ2_bzReadOpen50331
++Node: BZ2_bzRead52221
++Node: BZ2_bzReadGetUnused54849
++Node: BZ2_bzReadClose55852
++Node: BZ2_bzWriteOpen56512
++Node: BZ2_bzWrite57942
++Node: BZ2_bzWriteClose58544
++Node: Handling embedded compressed data streams60676
++Node: Standard file-reading/writing code62663
++Node: Utility functions64550
++Node: BZ2_bzBuffToBuffCompress64810
++Node: BZ2_bzBuffToBuffDecompress66900
++Node: zlib compatibility functions69124
++Node: Using the library in a stdio-free environment70697
++Node: Getting rid of stdio71034
++Node: Critical error handling71761
++Node: Making a Windows DLL73720
++Node: Miscellanea75070
++Node: Limitations of the compressed file format75406
++Node: Portability issues79036
++Node: Reporting bugs81092
++Node: Did you get the right package?84632
++Node: Further Reading85859
++
++End Tag Table
+--- a/bzip2recover.c 2010-09-11 09:18:40.000000000 +1000
++++ b/bzip2recover.c 2011-12-04 18:16:28.000000000 +1100
+@@ -24,6 +24,8 @@
+ #include <errno.h>
+ #include <stdlib.h>
+ #include <string.h>
++#include <fcntl.h>
++#include <unistd.h>
+
+
+ /* This program records bit locations in the file to be recovered.
+@@ -269,6 +271,19 @@ static Bool endsInBz2 ( Char* name )
+ name[n-1] == '2');
+ }
+
++/*---------------------------------------------*/
++/* Open an output file safely with O_EXCL and good permissions */
++FILE* fopen_output( Char* name, const char* mode )
++{
++ FILE *fp;
++ int fh;
++
++ fh = open(name, O_WRONLY|O_CREAT|O_EXCL, 0600);
++ if (fh == -1) return NULL;
++ fp = fdopen(fh, mode);
++ if (fp == NULL) close(fh);
++ return fp;
++}
+
+ /*---------------------------------------------------*/
+ /*--- ---*/
+@@ -486,7 +501,7 @@ Int32 main ( Int32 argc, Char** argv )
+ fprintf ( stderr, " writing block %d to `%s' ...\n",
+ wrBlock+1, outFileName );
+
+- outFile = fopen ( outFileName, "wb" );
++ outFile = fopen_output ( outFileName, "wb" );
+ if (outFile == NULL) {
+ fprintf ( stderr, "%s: can't write `%s'\n",
+ progName, outFileName );
+--- a/bzgrep 2007-01-03 13:00:55.000000000 +1100
++++ b/bzgrep 2011-12-04 18:16:28.000000000 +1100
+@@ -1,27 +1,75 @@
+ #!/bin/sh
+
+-# Bzgrep wrapped for bzip2,
+-# adapted from zgrep by Philippe Troin <phil@fifi.org> for Debian GNU/Linux.
+-## zgrep notice:
+-## zgrep -- a wrapper around a grep program that decompresses files as needed
+-## Adapted from a version sent by Charles Levert <charles@comm.polymtl.ca>
++# bzgrep -- a wrapper around a grep program that decompresses files as needed
++# Adapted from zgrep of the Debian gzip package by Anibal Monsalve Salazar.
++# Adapted from a version sent by Charles Levert <charles@comm.polymtl.ca>
++
++# Copyright (C) 1998, 2001, 2002 Free Software Foundation
++# Copyright (C) 1993 Jean-loup Gailly
++
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2, or (at your option)
++# any later version.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++# 02111-1307, USA.
+
+ PATH="/usr/bin:$PATH"; export PATH
+
+-prog=`echo $0 | sed 's|.*/||'`
++prog=`echo "$0" | sed 's|.*/||'`
+ case "$prog" in
+ *egrep) grep=${EGREP-egrep} ;;
+ *fgrep) grep=${FGREP-fgrep} ;;
+ *) grep=${GREP-grep} ;;
+ esac
++
+ pat=""
++after_dash_dash=""
++files_with_matches=0
++files_without_matches=0
++no_filename=0
++with_filename=0
++
+ while test $# -ne 0; do
+- case "$1" in
+- -e | -f) opt="$opt $1"; shift; pat="$1"
++ case "$after_dash_dash$1" in
++ --d* | --rec*) echo >&2 "$0: $1: option not supported"; exit 2;;
++ --files-with-*) files_with_matches=1;;
++ --files-witho*) files_without_matches=1;;
++ --no-f*) no_filename=1;;
++ --wi*) with_filename=1;;
++ --*) ;;
++ -*)
++ case "$1" in
++ -*[dr]*) echo >&2 "$0: $1: option not supported"; exit 2;;
++ esac
++ case "$1" in
++ -*H*) with_filename=1;;
++ esac
++ case "$1" in
++ -*h*) no_filename=1;;
++ esac
++ case "$1" in
++ -*L*) files_without_matches=1;;
++ esac
++ case "$1" in
++ -*l*) files_with_matches=1;;
++ esac;;
++ esac
++ case "$after_dash_dash$1" in
++ -[ef]) opt="$opt $1"; shift; pat="$1"
+ if test "$grep" = grep; then # grep is buggy with -e on SVR4
+ grep=egrep
+ fi;;
+- -A | -B) opt="$opt $1 $2"; shift;;
++ -[ABCdm])opt="$opt $1 $2"; shift;;
++ --) opt="$opt $1"; after_dash_dash=1;;
+ -*) opt="$opt $1";;
+ *) if test -z "$pat"; then
+ pat="$1"
+@@ -35,19 +83,9 @@ done
+ if test -z "$pat"; then
+ echo "grep through bzip2 files"
+ echo "usage: $prog [grep_options] pattern [files]"
+- exit 1
++ exit 2
+ fi
+
+-list=0
+-silent=0
+-op=`echo "$opt" | sed -e 's/ //g' -e 's/-//g'`
+-case "$op" in
+- *l*) list=1
+-esac
+-case "$op" in
+- *h*) silent=1
+-esac
+-
+ if test $# -eq 0; then
+ bzip2 -cdfq | $grep $opt "$pat"
+ exit $?
+@@ -55,21 +93,40 @@ fi
+
+ res=0
+ for i do
+- if test -f "$i"; then :; else if test -f "$i.bz2"; then i="$i.bz2"; fi; fi
+- if test $list -eq 1; then
+- bzip2 -cdfq "$i" | $grep $opt "$pat" 2>&1 > /dev/null && echo $i
+- r=$?
+- elif test $# -eq 1 -o $silent -eq 1; then
+- bzip2 -cdfq "$i" | $grep $opt "$pat"
+- r=$?
+- else
+- j=${i//\\/\\\\}
+- j=${j//|/\\|}
+- j=${j//&/\\&}
+- j=`printf "%s" "$j" | tr '\n' ' '`
+- bzip2 -cdfq "$i" | $grep $opt "$pat" | sed "s|^|${j}:|"
+- r=$?
+- fi
+- test "$r" -ne 0 && res="$r"
++ bzip2 -cdfq -- "$i" |
++ if test $files_with_matches -eq 1; then
++ $grep $opt "$pat" > /dev/null && printf "%s\n" "$i"
++ elif test $files_without_matches -eq 1; then
++ $grep $opt "$pat" > /dev/null || printf "%s\n" "$i"
++ elif test $with_filename -eq 0 && { test $# -eq 1 || test $no_filename -eq 1; }; then
++ $grep $opt "$pat"
++ else
++ i=$(echo "$i" | sed -e 's/[\\|&]/\\&/g')
++ if test $with_filename -eq 1; then
++ sed_script="s|^[^:]*:|${i}:|"
++ else
++ sed_script="s|^|${i}:|"
++ fi
++ # Hack adapted from GPLed code at
++ # http://home.comcast.net/~j.p.h/cus-faq-2
++ # Has the same effect as the following two lines of bash:
++ #
++ # $grep $opt "$pat" | sed "$sed_script"
++ # exit ${PIPESTATUS[0]}
++ #
++ # Inside the `...`, fd4 goes to the pipe whose other end is read
++ # and passed to eval; fd1 is the normal standard output
++ # preserved the line before with exec 3>&1
++ exec 3>&1
++ eval `
++ exec 4>&1 >&3 3>&-
++ {
++ $grep $opt "$pat" 4>&-; echo "r=$?;" >&4
++ } | sed "$sed_script"
++ `
++ exit $r
++ fi
++ r=$?
++ test $res -lt $r && res=$r
+ done
+ exit $res
+--- a/bzdiff 2007-01-03 13:00:55.000000000 +1100
++++ b/bzdiff 2011-12-04 18:16:28.000000000 +1100
+@@ -37,10 +37,6 @@ if test -z "$FILES"; then
+ echo "Usage: $prog [${comp}_options] file [file]"
+ exit 1
+ fi
+-tmp=`mktemp ${TMPDIR:-/tmp}/bzdiff.XXXXXXXXXX` || {
+- echo 'cannot create a temporary file' >&2
+- exit 1
+-}
+ set $FILES
+ if test $# -eq 1; then
+ FILE=`echo "$1" | sed 's/.bz2$//'`
+@@ -53,10 +49,14 @@ elif test $# -eq 2; then
+ case "$2" in
+ *.bz2)
+ F=`echo "$2" | sed 's|.*/||;s|.bz2$||'`
+- bzip2 -cdfq "$2" > $tmp
+- bzip2 -cdfq "$1" | $comp $OPTIONS - $tmp
++ tmp=`mktemp "${TMPDIR:-/tmp}"/bzdiff.XXXXXXXXXX` || {
++ echo 'cannot create a temporary file' >&2
++ exit 1
++ }
++ bzip2 -cdfq "$2" > "$tmp"
++ bzip2 -cdfq "$1" | $comp $OPTIONS - "$tmp"
+ STAT="$?"
+- /bin/rm -f $tmp;;
++ /bin/rm -f "$tmp";;
+
+ *) bzip2 -cdfq "$1" | $comp $OPTIONS - "$2"
+ STAT="$?";;
+@@ -69,8 +69,8 @@ elif test $# -eq 2; then
+ STAT="$?";;
+ esac;;
+ esac
+- exit "$STAT"
+ else
+ echo "Usage: $prog [${comp}_options] file [file]"
+ exit 1
+ fi
++exit "$STAT"
+--- a/manual.xml 2010-09-11 19:36:06.000000000 +1000
++++ b/manual.xml 2011-12-04 18:16:28.000000000 +1100
+@@ -159,13 +159,22 @@ else.</para>
+
+ <listitem><para><computeroutput>bzip2</computeroutput> [
+ -cdfkqstvzVL123456789 ] [ filenames ... ]</para></listitem>
++
++ <listitem><para><computeroutput>bzip2</computeroutput> [
++ -h | --help ]</para></listitem>
+
+ <listitem><para><computeroutput>bunzip2</computeroutput> [
+ -fkvsVL ] [ filenames ... ]</para></listitem>
+
++ <listitem><para><computeroutput>bunzip2</computeroutput> [
++ -h | --help ]</para></listitem>
++
+ <listitem><para><computeroutput>bzcat</computeroutput> [ -s ] [
+ filenames ... ]</para></listitem>
+
++ <listitem><para><computeroutput>bzcat</computeroutput> [
++ -h | --help ]</para></listitem>
++
+ <listitem><para><computeroutput>bzip2recover</computeroutput>
+ filename</para></listitem>
+
+@@ -397,6 +406,10 @@ consistency error (eg, bug) which caused
+ will not be suppressed.</para></listitem>
+ </varlistentry>
+
++ <varlistentry><term><computeroutput>-h --help</computeroutput></term>
++ <listitem><para>Print a help message and exit.</para></listitem>
++ </varlistentry>
++
+ <varlistentry>
+ <term><computeroutput>-v --verbose</computeroutput></term>
+ <listitem><para>Verbose mode -- show the compression ratio for
+@@ -1162,9 +1175,9 @@ BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+ BZ_PARAM_ERROR
+ if strm is NULL
+- or blockSize < 1 or blockSize > 9
+- or verbosity < 0 or verbosity > 4
+- or workFactor < 0 or workFactor > 250
++ or blockSize &lt; 1 or blockSize &gt; 9
++ or verbosity &lt; 0 or verbosity &gt; 4
++ or workFactor &lt; 0 or workFactor &gt; 250
+ BZ_MEM_ERROR
+ if not enough memory is available
+ BZ_OK
+@@ -1474,8 +1487,8 @@ could fail with
+ BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+ BZ_PARAM_ERROR
+- if ( small != 0 && small != 1 )
+- or (verbosity <; 0 || verbosity > 4)
++ if ( small != 0 &amp;&amp; small != 1 )
++ or (verbosity &lt; 0 || verbosity &gt; 4)
+ BZ_MEM_ERROR
+ if insufficient memory is available
+ </programlisting>
+@@ -1560,7 +1573,7 @@ and release memory.</para>
+ <programlisting>
+ BZ_PARAM_ERROR
+ if strm is NULL or strm->s is NULL
+- or strm->avail_out < 1
++ or strm->avail_out &lt; 1
+ BZ_DATA_ERROR
+ if a data integrity error is detected in the compressed stream
+ BZ_DATA_ERROR_MAGIC
+@@ -1733,8 +1746,8 @@ BZ_CONFIG_ERROR
+ BZ_PARAM_ERROR
+ if f is NULL
+ or small is neither 0 nor 1
+- or ( unused == NULL && nUnused != 0 )
+- or ( unused != NULL && !(0 <= nUnused <= BZ_MAX_UNUSED) )
++ or ( unused == NULL &amp;&amp; nUnused != 0 )
++ or ( unused != NULL &amp;&amp; !(0 &lt;= nUnused &lt;= BZ_MAX_UNUSED) )
+ BZ_IO_ERROR
+ if ferror(f) is nonzero
+ BZ_MEM_ERROR
+@@ -1813,7 +1826,7 @@ immediately before
+
+ <programlisting>
+ BZ_PARAM_ERROR
+- if b is NULL or buf is NULL or len < 0
++ if b is NULL or buf is NULL or len &lt; 0
+ BZ_SEQUENCE_ERROR
+ if b was opened with BZ2_bzWriteOpen
+ BZ_IO_ERROR
+@@ -1971,7 +1984,7 @@ BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+ BZ_PARAM_ERROR
+ if f is NULL
+- or blockSize100k < 1 or blockSize100k > 9
++ or blockSize100k &lt; 1 or blockSize100k &gt; 9
+ BZ_IO_ERROR
+ if ferror(f) is nonzero
+ BZ_MEM_ERROR
+@@ -2018,7 +2031,7 @@ compressed and written to the file.</par
+
+ <programlisting>
+ BZ_PARAM_ERROR
+- if b is NULL or buf is NULL or len < 0
++ if b is NULL or buf is NULL or len &lt; 0
+ BZ_SEQUENCE_ERROR
+ if b was opened with BZ2_bzReadOpen
+ BZ_IO_ERROR
+@@ -2169,7 +2182,7 @@ f = fopen ( "myfile.bz2", "w" );
+ if ( !f ) {
+ /* handle error */
+ }
+-b = BZ2_bzWriteOpen( &bzerror, f, 9 );
++b = BZ2_bzWriteOpen( &amp;bzerror, f, 9 );
+ if (bzerror != BZ_OK) {
+ BZ2_bzWriteClose ( b );
+ /* handle error */
+@@ -2177,14 +2190,14 @@ if (bzerror != BZ_OK) {
+
+ while ( /* condition */ ) {
+ /* get data to write into buf, and set nBuf appropriately */
+- nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf );
++ nWritten = BZ2_bzWrite ( &amp;bzerror, b, buf, nBuf );
+ if (bzerror == BZ_IO_ERROR) {
+- BZ2_bzWriteClose ( &bzerror, b );
++ BZ2_bzWriteClose ( &amp;bzerror, b );
+ /* handle error */
+ }
+ }
+
+-BZ2_bzWriteClose( &bzerror, b );
++BZ2_bzWriteClose( &amp;bzerror, b );
+ if (bzerror == BZ_IO_ERROR) {
+ /* handle error */
+ }
+@@ -2204,24 +2217,24 @@ f = fopen ( "myfile.bz2", "r" );
+ if ( !f ) {
+ /* handle error */
+ }
+-b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 );
++b = BZ2_bzReadOpen ( &amp;bzerror, f, 0, NULL, 0 );
+ if ( bzerror != BZ_OK ) {
+- BZ2_bzReadClose ( &bzerror, b );
++ BZ2_bzReadClose ( &amp;bzerror, b );
+ /* handle error */
+ }
+
+ bzerror = BZ_OK;
+-while ( bzerror == BZ_OK && /* arbitrary other conditions */) {
+- nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ );
++while ( bzerror == BZ_OK &amp;&amp; /* arbitrary other conditions */) {
++ nBuf = BZ2_bzRead ( &amp;bzerror, b, buf, /* size of buf */ );
+ if ( bzerror == BZ_OK ) {
+ /* do something with buf[0 .. nBuf-1] */
+ }
+ }
+ if ( bzerror != BZ_STREAM_END ) {
+- BZ2_bzReadClose ( &bzerror, b );
++ BZ2_bzReadClose ( &amp;bzerror, b );
+ /* handle error */
+ } else {
+- BZ2_bzReadClose ( &bzerror, b );
++ BZ2_bzReadClose ( &amp;bzerror, b );
+ }
+ </programlisting>
+
+@@ -2287,9 +2300,9 @@ BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+ BZ_PARAM_ERROR
+ if dest is NULL or destLen is NULL
+- or blockSize100k < 1 or blockSize100k > 9
+- or verbosity < 0 or verbosity > 4
+- or workFactor < 0 or workFactor > 250
++ or blockSize100k &lt; 1 or blockSize100k &gt; 9
++ or verbosity &lt; 0 or verbosity &gt; 4
++ or workFactor &lt; 0 or workFactor &gt; 250
+ BZ_MEM_ERROR
+ if insufficient memory is available
+ BZ_OUTBUFF_FULL
+@@ -2355,8 +2368,8 @@ BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+ BZ_PARAM_ERROR
+ if dest is NULL or destLen is NULL
+- or small != 0 && small != 1
+- or verbosity < 0 or verbosity > 4
++ or small != 0 &amp;&amp; small != 1
++ or verbosity &lt; 0 or verbosity &gt; 4
+ BZ_MEM_ERROR
+ if insufficient memory is available
+ BZ_OUTBUFF_FULL
+--- a/bzmore 2007-01-03 13:00:55.000000000 +1100
++++ b/bzmore 2011-12-04 18:16:28.000000000 +1100
+@@ -24,10 +24,10 @@ else
+ # 'stty min 1' resets eof to ^a on both SunOS and SysV!
+ cb='min 1 -icanon'; ncb='icanon eof ^d'
+ fi
+-if test $? -eq 0 -a -n "$oldtty"; then
+- trap 'stty $oldtty 2>/dev/null; exit' 0 2 3 5 10 13 15
++if test $? -eq 0 && test -n "$oldtty"; then
++ trap 'stty $oldtty 2>/dev/null; exit' 0 INT QUIT TRAP USR1 PIPE TERM
+ else
+- trap 'stty $ncb echo 2>/dev/null; exit' 0 2 3 5 10 13 15
++ trap 'stty $ncb echo 2>/dev/null; exit' 0 INT QUIT TRAP USR1 PIPE TERM
+ fi
+
+ if test $# = 0; then
+@@ -46,7 +46,7 @@ else
+ ANS=`dd bs=1 count=1 2>/dev/null`
+ stty $ncb echo 2>/dev/null
+ echo " "
+- if test "$ANS" = 'e' -o "$ANS" = 'q'; then
++ if test "$ANS" = 'e' || test "$ANS" = 'q'; then
+ exit
+ fi
+ fi
+--- a/bzip2.c 2010-09-11 09:04:53.000000000 +1000
++++ b/bzip2.c 2011-12-04 18:16:28.000000000 +1100
+@@ -1890,7 +1890,9 @@ IntNative main ( IntNative argc, Char *a
+ case '8': blockSize100k = 8; break;
+ case '9': blockSize100k = 9; break;
+ case 'V':
+- case 'L': license(); break;
++ case 'L': license();
++ exit ( 0 );
++ break;
+ case 'v': verbosity++; break;
+ case 'h': usage ( progName );
+ exit ( 0 );
+@@ -1916,8 +1918,8 @@ IntNative main ( IntNative argc, Char *a
+ if (ISFLAG("--keep")) keepInputFiles = True; else
+ if (ISFLAG("--small")) smallMode = True; else
+ if (ISFLAG("--quiet")) noisy = False; else
+- if (ISFLAG("--version")) license(); else
+- if (ISFLAG("--license")) license(); else
++ if (ISFLAG("--version")) { license(); exit ( 0 ); } else
++ if (ISFLAG("--license")) { license(); exit ( 0 ); } else
+ if (ISFLAG("--exponential")) workFactor = 1; else
+ if (ISFLAG("--repetitive-best")) redundant(aa->name); else
+ if (ISFLAG("--repetitive-fast")) redundant(aa->name); else
+@@ -2003,12 +2005,14 @@ IntNative main ( IntNative argc, Char *a
+ testf ( aa->name );
+ }
+ }
+- if (testFailsExist && noisy) {
+- fprintf ( stderr,
+- "\n"
+- "You can use the `bzip2recover' program to attempt to recover\n"
+- "data from undamaged sections of corrupted files.\n\n"
+- );
++ if (testFailsExist) {
++ if (noisy) {
++ fprintf ( stderr,
++ "\n"
++ "You can use the `bzip2recover' program to attempt to recover\n"
++ "data from undamaged sections of corrupted files.\n\n"
++ );
++ }
+ setExit(2);
+ exit(exitValue);
+ }
+--- a/bzexe 2011-12-04 13:55:53.589856334 +1100
++++ b/bzexe 2011-12-04 18:16:28.000000000 +1100
+@@ -0,0 +1,182 @@
++#!/bin/sh
++# gzexe: compressor for Unix executables.
++# Use this only for binaries that you do not use frequently.
++#
++# The compressed version is a shell script which decompresses itself after
++# skipping $skip lines of shell commands. We try invoking the compressed
++# executable with the original name (for programs looking at their name).
++# We also try to retain the original file permissions on the compressed file.
++# For safety reasons, gzexe will not create setuid or setgid shell scripts.
++
++# WARNING: the first line of this file must be either : or #!/bin/sh
++# The : is required for some old versions of csh.
++# On Ultrix, /bin/sh is too buggy, change the first line to: #!/bin/sh5
++
++
++# Copyright (C) 1998, 2002 Free Software Foundation
++# Copyright (C) 1993 Jean-loup Gailly
++
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2, or (at your option)
++# any later version.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++# GNU General Public License for more details.
++
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++# 02111-1307, USA.
++
++
++PATH="/usr/bin:$PATH"
++x=`basename $0`
++if test $# = 0; then
++ echo compress executables. original file foo is renamed to foo~
++ echo usage: ${x} [-d] files...
++ echo " -d decompress the executables"
++ exit 1
++fi
++
++set -C
++tmp=gz$$
++trap "rm -f $tmp; exit 1" HUP INT QUIT TRAP USR1 PIPE TERM
++: > $tmp || exit 1
++
++decomp=0
++res=0
++test "$x" = "ungzexe" && decomp=1
++if test "x$1" = "x-d"; then
++ decomp=1
++ shift
++fi
++
++echo hi > zfoo1$$ || exit 1
++echo hi > zfoo2$$ || exit 1
++if test -z "`(${CPMOD-cpmod} zfoo1$$ zfoo2$$) 2>&1`"; then
++ cpmod=${CPMOD-cpmod}
++fi
++rm -f zfoo[12]$$
++
++tail=""
++IFS="${IFS= }"; saveifs="$IFS"; IFS="${IFS}:"
++for dir in $PATH; do
++ test -z "$dir" && dir=.
++ if test -f $dir/tail; then
++ tail="$dir/tail"
++ break
++ fi
++done
++IFS="$saveifs"
++if test -z "$tail"; then
++ echo cannot find tail
++ exit 1
++fi
++case `echo foo | $tail -n +1 2>/dev/null` in
++foo) tail="$tail -n";;
++esac
++
++for i do
++ if test ! -f "$i" ; then
++ echo ${x}: $i not a file
++ res=1
++ continue
++ fi
++ if test $decomp -eq 0; then
++ if sed -e 1d -e 2q "$i" | grep "^skip=[0-9]*$" >/dev/null; then
++ echo "${x}: $i is already gzexe'd"
++ continue
++ fi
++ fi
++ if ls -l "$i" | grep '^...[sS]' > /dev/null; then
++ echo "${x}: $i has setuid permission, unchanged"
++ continue
++ fi
++ if ls -l "$i" | grep '^......[sS]' > /dev/null; then
++ echo "${x}: $i has setgid permission, unchanged"
++ continue
++ fi
++ case "`basename $i`" in
++ bzip2 | tail | sed | chmod | ln | sleep | rm)
++ echo "${x}: $i would depend on itself"; continue ;;
++ esac
++ if test -z "$cpmod"; then
++ cp -p "$i" $tmp 2>/dev/null || cp "$i" $tmp
++ if test -w $tmp 2>/dev/null; then
++ writable=1
++ else
++ writable=0
++ chmod u+w $tmp 2>/dev/null
++ fi
++ : >| $tmp # truncate the file, ignoring set -C
++ fi
++ if test $decomp -eq 0; then
++ sed 1q $0 >> $tmp
++ sed "s|^if tail|if $tail|" >> $tmp <<'EOF'
++skip=23
++set -C
++umask=`umask`
++umask 77
++tmpfile=`tempfile -p gztmp -d /tmp` || exit 1
++if tail +$skip "$0" | /bin/bzip2 -cd >> $tmpfile; then
++ umask $umask
++ /bin/chmod 700 $tmpfile
++ prog="`echo $0 | /bin/sed 's|^.*/||'`"
++ if /bin/ln -T $tmpfile "/tmp/$prog" 2>/dev/null; then
++ trap '/bin/rm -f $tmpfile "/tmp/$prog"; exit $res' 0
++ (/bin/sleep 5; /bin/rm -f $tmpfile "/tmp/$prog") 2>/dev/null &
++ /tmp/"$prog" ${1+"$@"}; res=$?
++ else
++ trap '/bin/rm -f $tmpfile; exit $res' 0
++ (/bin/sleep 5; /bin/rm -f $tmpfile) 2>/dev/null &
++ $tmpfile ${1+"$@"}; res=$?
++ fi
++else
++ echo Cannot decompress $0; exit 1
++fi; exit $res
++EOF
++ bzip2 -cv9 "$i" >> $tmp || {
++ /bin/rm -f $tmp
++ echo ${x}: compression not possible for $i, file unchanged.
++ res=1
++ continue
++ }
++
++ else
++ # decompression
++ skip=23
++ if sed -e 1d -e 2q "$i" | grep "^skip=[0-9]*$" >/dev/null; then
++ eval `sed -e 1d -e 2q "$i"`
++ fi
++ if tail +$skip "$i" | bzip2 -cd > $tmp; then
++ :
++ else
++ echo ${x}: $i probably not in gzexe format, file unchanged.
++ res=1
++ continue
++ fi
++ fi
++ rm -f "$i~"
++ mv "$i" "$i~" || {
++ echo ${x}: cannot backup $i as $i~
++ rm -f $tmp
++ res=1
++ continue
++ }
++ mv $tmp "$i" || cp -p $tmp "$i" 2>/dev/null || cp $tmp "$i" || {
++ echo ${x}: cannot create $i
++ rm -f $tmp
++ res=1
++ continue
++ }
++ rm -f $tmp
++ if test -n "$cpmod"; then
++ $cpmod "$i~" "$i" 2>/dev/null
++ elif test $writable -eq 0; then
++ chmod u-w $i 2>/dev/null
++ fi
++done
++exit $res
+--- a/bzip2.1 2010-09-11 19:35:11.000000000 +1000
++++ b/bzip2.1 2011-12-04 18:16:28.000000000 +1100
+@@ -14,6 +14,9 @@ bzip2recover \- recovers data from damag
+ [
+ .I "filenames \&..."
+ ]
++.br
++.B bzip2
++.RB [ " \-h|--help " ]
+ .ll -8
+ .br
+ .B bunzip2
+@@ -22,12 +25,18 @@ bzip2recover \- recovers data from damag
+ .I "filenames \&..."
+ ]
+ .br
++.B bunzip2
++.RB [ " \-h|--help " ]
++.br
+ .B bzcat
+ .RB [ " \-s " ]
+ [
+ .I "filenames \&..."
+ ]
+ .br
++.B bzcat
++.RB [ " \-h|--help " ]
++.br
+ .B bzip2recover
+ .I "filename"
+
+@@ -240,6 +249,9 @@ Verbose mode -- show the compression rat
+ Further \-v's increase the verbosity level, spewing out lots of
+ information which is primarily of interest for diagnostic purposes.
+ .TP
++.B \-h --help
++Print a help message and exit.
++.TP
+ .B \-L --license -V --version
+ Display the software version, license terms and conditions.
+ .TP
+--- a/Makefile 2010-09-11 08:46:02.000000000 +1000
++++ b/Makefile 2011-12-04 18:16:28.000000000 +1100
+@@ -12,6 +12,8 @@
+ # in the file LICENSE.
+ # ------------------------------------------------------------------
+
++somajor=1.0
++sominor=$(somajor).4
+ SHELL=/bin/sh
+
+ # To assist in cross-compiling
+@@ -21,7 +23,7 @@ RANLIB=ranlib
+ LDFLAGS=
+
+ BIGFILES=-D_FILE_OFFSET_BITS=64
+-CFLAGS=-Wall -Winline -O2 -g $(BIGFILES)
++CFLAGS=-Wall -Winline -O2 -g $(BIGFILES) $(DEBCFLAGS)
+
+ # Where you want it installed when you do 'make install'
+ PREFIX=/usr/local
+@@ -35,9 +37,9 @@ OBJS= blocksort.o \
+ decompress.o \
+ bzlib.o
+
+-all: libbz2.a bzip2 bzip2recover test
++all: libbz2.a bzip2 bzip2recover # test
+
+-bzip2: libbz2.a bzip2.o
++bzip2: libbz2.so bzip2.o
+ $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 bzip2.o -L. -lbz2
+
+ bzip2recover: bzip2recover.o
+@@ -46,20 +48,42 @@ bzip2recover: bzip2recover.o
+ libbz2.a: $(OBJS)
+ rm -f libbz2.a
+ $(AR) cq libbz2.a $(OBJS)
+- @if ( test -f $(RANLIB) -o -f /usr/bin/ranlib -o \
+- -f /bin/ranlib -o -f /usr/ccs/bin/ranlib ) ; then \
++ @if ( test -f $(RANLIB) || test -f /usr/bin/ranlib || \
++ test -f /bin/ranlib || test -f /usr/ccs/bin/ranlib ) ; then \
+ echo $(RANLIB) libbz2.a ; \
+ $(RANLIB) libbz2.a ; \
+ fi
+
++libbz2.so: libbz2.so.$(somajor)
++ ln -sf $^ $@
++
++libbz2.so.$(somajor): libbz2.so.$(sominor)
++ ln -sf $^ $@
++
++libbz2.so.$(sominor): $(OBJS:%.o=%.sho)
++ $(CC) -o libbz2.so.$(sominor) -shared \
++ -Wl,-soname,libbz2.so.$(somajor) $^ -lc
++
++%.sho: %.c
++ $(CC) $(CFLAGS) -D_REENTRANT -fPIC -o $@ -c $<
++
++%.o: %.c
++ $(CC) $(CFLAGS) -D_REENTRANT -o $@ -c $<
++
+ check: test
+ test: bzip2
+ @cat words1
++ LD_LIBRARY_PATH=.:$$LD_LIBRARY_PATH \
+ ./bzip2 -1 < sample1.ref > sample1.rb2
++ LD_LIBRARY_PATH=.:$$LD_LIBRARY_PATH \
+ ./bzip2 -2 < sample2.ref > sample2.rb2
++ LD_LIBRARY_PATH=.:$$LD_LIBRARY_PATH \
+ ./bzip2 -3 < sample3.ref > sample3.rb2
++ LD_LIBRARY_PATH=.:$$LD_LIBRARY_PATH \
+ ./bzip2 -d < sample1.bz2 > sample1.tst
++ LD_LIBRARY_PATH=.:$$LD_LIBRARY_PATH \
+ ./bzip2 -d < sample2.bz2 > sample2.tst
++ LD_LIBRARY_PATH=.:$$LD_LIBRARY_PATH \
+ ./bzip2 -ds < sample3.bz2 > sample3.tst
+ cmp sample1.bz2 sample1.rb2
+ cmp sample2.bz2 sample2.rb2
+@@ -69,15 +93,15 @@ test: bzip2
+ cmp sample3.tst sample3.ref
+ @cat words3
+
+-install: bzip2 bzip2recover
++install: bzip2 bzip2recover libbz2.a
+ if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi
+ if ( test ! -d $(PREFIX)/lib ) ; then mkdir -p $(PREFIX)/lib ; fi
+ if ( test ! -d $(PREFIX)/man ) ; then mkdir -p $(PREFIX)/man ; fi
+ if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir -p $(PREFIX)/man/man1 ; fi
+ if ( test ! -d $(PREFIX)/include ) ; then mkdir -p $(PREFIX)/include ; fi
+ cp -f bzip2 $(PREFIX)/bin/bzip2
+- cp -f bzip2 $(PREFIX)/bin/bunzip2
+- cp -f bzip2 $(PREFIX)/bin/bzcat
++ ln $(PREFIX)/bin/bzip2 $(PREFIX)/bin/bunzip2
++ ln $(PREFIX)/bin/bzip2 $(PREFIX)/bin/bzcat
+ cp -f bzip2recover $(PREFIX)/bin/bzip2recover
+ chmod a+x $(PREFIX)/bin/bzip2
+ chmod a+x $(PREFIX)/bin/bunzip2
+@@ -87,8 +111,10 @@ install: bzip2 bzip2recover
+ chmod a+r $(PREFIX)/man/man1/bzip2.1
+ cp -f bzlib.h $(PREFIX)/include
+ chmod a+r $(PREFIX)/include/bzlib.h
+- cp -f libbz2.a $(PREFIX)/lib
++ cp -fa libbz2.a libbz2.so* $(PREFIX)/lib
+ chmod a+r $(PREFIX)/lib/libbz2.a
++ cp -f bzexe $(PREFIX)/bin/bzexe
++ chmod a+x $(PREFIX)/bin/bzexe
+ cp -f bzgrep $(PREFIX)/bin/bzgrep
+ ln -s -f $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzegrep
+ ln -s -f $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzfgrep
+@@ -99,7 +125,8 @@ install: bzip2 bzip2recover
+ cp -f bzdiff $(PREFIX)/bin/bzdiff
+ ln -s -f $(PREFIX)/bin/bzdiff $(PREFIX)/bin/bzcmp
+ chmod a+x $(PREFIX)/bin/bzdiff
+- cp -f bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1
++ cp -f bzexe.1 bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1
++ chmod a+r $(PREFIX)/man/man1/bzexe.1
+ chmod a+r $(PREFIX)/man/man1/bzgrep.1
+ chmod a+r $(PREFIX)/man/man1/bzmore.1
+ chmod a+r $(PREFIX)/man/man1/bzdiff.1
+@@ -109,33 +136,13 @@ install: bzip2 bzip2recover
+ echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1
+
+ clean:
+- rm -f *.o libbz2.a bzip2 bzip2recover \
++ rm -f *.o *.sho libbz2.a libbz2.so* bzip2 bzip2recover \
+ sample1.rb2 sample2.rb2 sample3.rb2 \
+ sample1.tst sample2.tst sample3.tst
+
+-blocksort.o: blocksort.c
+- @cat words0
+- $(CC) $(CFLAGS) -c blocksort.c
+-huffman.o: huffman.c
+- $(CC) $(CFLAGS) -c huffman.c
+-crctable.o: crctable.c
+- $(CC) $(CFLAGS) -c crctable.c
+-randtable.o: randtable.c
+- $(CC) $(CFLAGS) -c randtable.c
+-compress.o: compress.c
+- $(CC) $(CFLAGS) -c compress.c
+-decompress.o: decompress.c
+- $(CC) $(CFLAGS) -c decompress.c
+-bzlib.o: bzlib.c
+- $(CC) $(CFLAGS) -c bzlib.c
+-bzip2.o: bzip2.c
+- $(CC) $(CFLAGS) -c bzip2.c
+-bzip2recover.o: bzip2recover.c
+- $(CC) $(CFLAGS) -c bzip2recover.c
+-
+
+ distclean: clean
+- rm -f manual.ps manual.html manual.pdf
++ #rm -f manual.ps manual.html manual.pdf
+
+ DISTNAME=bzip2-1.0.6
+ dist: check manual
+@@ -187,6 +194,8 @@ dist: check manual
+ $(DISTNAME)/bzdiff.1 \
+ $(DISTNAME)/bzmore \
+ $(DISTNAME)/bzmore.1 \
++ $(DISTNAME)/bzexe \
++ $(DISTNAME)/bzexe.1 \
+ $(DISTNAME)/bzgrep \
+ $(DISTNAME)/bzgrep.1 \
+ $(DISTNAME)/Makefile-libbz2_so \