Index: usr.bin/tar/test/Makefile =================================================================== --- usr.bin/tar/test/Makefile (revision 207846) +++ usr.bin/tar/test/Makefile (working copy) @@ -34,10 +34,10 @@ NO_MAN=yes PROG=bsdtar_test -DPADD=${LIBARCHIVE} ${LIBBZ2} ${LIBZ} +DPADD=${LIBARCHIVE} ${LIBBZ2} ${LIBZ} ${LIBLZMA} CFLAGS+= -DPLATFORM_CONFIG_H=\"config_freebsd.h\" CFLAGS+= -I.. -LDADD= -larchive -lz -lbz2 +LDADD= -larchive -lz -lbz2 -llzma CFLAGS+= -static -g -O2 -Wall CFLAGS+= -I${.OBJDIR} CFLAGS+= -I${TAR_SRCDIR} Index: usr.bin/tar/Makefile =================================================================== --- usr.bin/tar/Makefile (revision 207846) +++ usr.bin/tar/Makefile (working copy) @@ -5,8 +5,8 @@ BSDTAR_VERSION_STRING=2.7.0 SRCS= bsdtar.c cmdline.c getdate.c matching.c read.c siginfo.c subst.c tree.c util.c write.c WARNS?= 5 -DPADD= ${LIBARCHIVE} ${LIBBZ2} ${LIBZ} -LDADD= -larchive -lbz2 -lz -lmd +DPADD= ${LIBARCHIVE} ${LIBBZ2} ${LIBZ} ${LIBLZMA} +LDADD= -larchive -lbz2 -lz -lmd -llzma .if ${MK_OPENSSL} != "no" LDADD+= -lcrypto .endif Index: usr.bin/xzdec/Makefile =================================================================== --- usr.bin/xzdec/Makefile (revision 0) +++ usr.bin/xzdec/Makefile (revision 0) @@ -0,0 +1,30 @@ +# $FreeBSD$ + +PROG= xzdec + +LINKS= ${BINDIR}/xzdec ${BINDIR}/lzdec + +MLINKS= xzdec.1 lzmadec.1 + +XZDIR= ${.CURDIR}/../../contrib/xz/src +LZMALIBDIR= ${.CURDIR}/../../lib/liblzma + +.PATH: ${XZDIR}/xzdec + +SRCS= xzdec.c + +.PATH: ${XZDIR}/common + +SRCS+= tuklib_progname.c \ + tuklib_exit.c + +WARNS?= 3 + +CFLAGS+= -DHAVE_CONFIG_H \ + -I${LZMALIBDIR} \ + -I${XZDIR}/common + +DPADD= ${LIBLZMA} +LDADD= -llzma + +.include Index: usr.bin/Makefile =================================================================== --- usr.bin/Makefile (revision 207846) +++ usr.bin/Makefile (working copy) @@ -114,6 +114,7 @@ look \ lorder \ lsvfs \ + lzmainfo \ m4 \ ${_mail} \ ${_make} \ @@ -228,6 +229,8 @@ xinstall \ ${_xlint} \ ${_xstr} \ + xz \ + xzdec \ ${_yacc} \ yes \ ${_ypcat} \ Index: usr.bin/lzmainfo/Makefile =================================================================== --- usr.bin/lzmainfo/Makefile (revision 0) +++ usr.bin/lzmainfo/Makefile (revision 0) @@ -0,0 +1,24 @@ +# $FreeBSD$ + +PROG= lzmainfo + +XZDIR= ${.CURDIR}/../../contrib/xz/src +LZMALIBDIR= ${.CURDIR}/../../lib/liblzma + +.PATH: ${XZDIR}/lzmainfo +SRCS+= lzmainfo.c + +.PATH: ${XZDIR}/common +SRCS+= tuklib_progname.c \ + tuklib_exit.c + +WARNS?= 3 + +CFLAGS+= -DHAVE_CONFIG_H \ + -I${LZMALIBDIR} \ + -I${XZDIR}/common + +DPADD= ${LIBLZMA} +LDADD= -llzma + +.include Index: usr.bin/less/Makefile =================================================================== --- usr.bin/less/Makefile (revision 207846) +++ usr.bin/less/Makefile (working copy) @@ -11,7 +11,9 @@ DPADD= ${LIBTERMCAP} LDADD= -ltermcap LINKS= ${BINDIR}/less ${BINDIR}/more \ - ${BINDIR}/zless ${BINDIR}/bzless + ${BINDIR}/zless ${BINDIR}/bzless \ + ${BINDIR}/zless ${BINDIR}/xzless \ + ${BINDIR}/zless ${BINDIR}/lzless MLINKS= less.1 more.1 CLEANFILES= less.1 Index: usr.bin/less/lesspipe.sh =================================================================== --- usr.bin/less/lesspipe.sh (revision 207846) +++ usr.bin/less/lesspipe.sh (working copy) @@ -13,4 +13,10 @@ *.bz2) exec bzip2 -d -c "$1" 2>/dev/null ;; + *.xz) + exec xz -d -c "$1" 2>/dev/null + ;; + *.lzma) + exec lzma -d -c "$1" 2>/dev/null + ;; esac Index: usr.bin/cpio/test/Makefile =================================================================== --- usr.bin/cpio/test/Makefile (revision 207846) +++ usr.bin/cpio/test/Makefile (working copy) @@ -42,10 +42,10 @@ NO_MAN=yes PROG=bsdcpio_test -DPADD=${LIBARCHIVE} ${LIBBZ2} ${LIBZ} +DPADD=${LIBARCHIVE} ${LIBBZ2} ${LIBZ} ${LIBLZMA} CFLAGS+= -DPLATFORM_CONFIG_H=\"config_freebsd.h\" CFLAGS+= -I.. -LDADD= -larchive -lz -lbz2 +LDADD= -larchive -lz -lbz2 -llzma CFLAGS+= -static -g -O2 -Wall CFLAGS+= -I${.OBJDIR} CFLAGS+= -I${CPIO_SRCDIR} Index: usr.bin/cpio/Makefile =================================================================== --- usr.bin/cpio/Makefile (revision 207846) +++ usr.bin/cpio/Makefile (working copy) @@ -6,7 +6,7 @@ BSDCPIO_VERSION_STRING=2.7.0 SRCS= cpio.c cmdline.c err.c matching.c pathmatch.c WARNS?= 6 -DPADD= ${LIBARCHIVE} ${LIBZ} ${LIBBZ2} +DPADD= ${LIBARCHIVE} ${LIBZ} ${LIBBZ2} ${LIBLZMA} CFLAGS+= -DBSDCPIO_VERSION_STRING=\"${BSDCPIO_VERSION_STRING}\" CFLAGS+= -DPLATFORM_CONFIG_H=\"config_freebsd.h\" .ifdef RELEASE_CRUNCH @@ -14,7 +14,7 @@ # statically linked, cannot use -lcrypto, and are size sensitive. CFLAGS+= -DSMALLER .endif -LDADD+= -larchive -lz -lbz2 -lmd +LDADD+= -larchive -lz -lbz2 -lmd -llzma .if ${MK_OPENSSL} != "no" LDADD+= -lcrypto .endif Index: usr.bin/xz/Makefile =================================================================== --- usr.bin/xz/Makefile (revision 0) +++ usr.bin/xz/Makefile (revision 0) @@ -0,0 +1,46 @@ +# $FreeBSD$ + +PROG= xz + +LINKS= ${BINDIR}/xz ${BINDIR}/unxz +LINKS+= ${BINDIR}/xz ${BINDIR}/lzma +LINKS+= ${BINDIR}/xz ${BINDIR}/unlzma +LINKS+= ${BINDIR}/xz ${BINDIR}/xzcat +LINKS+= ${BINDIR}/xz ${BINDIR}/lzcat + +MLINKS= xz.1 unxz.1 xz.1 lzma.1 xz.1 unlzma.1 xz.1 xzcat.1 xz.1 lzcat.1 + +XZDIR= ${.CURDIR}/../../contrib/xz/src +LZMALIBDIR= ${.CURDIR}/../../lib/liblzma + +.PATH: ${XZDIR}/xz + +SRCS= args.c \ + coder.c \ + file_io.c \ + hardware.c \ + list.c \ + main.c \ + message.c \ + options.c \ + signals.c \ + suffix.c \ + util.c + +.PATH: ${XZDIR}/common + +SRCS+= tuklib_open_stdxxx.c \ + tuklib_progname.c \ + tuklib_exit.c \ + tuklib_cpucores.c + +WARNS?= 3 + +CFLAGS+= -DHAVE_CONFIG_H \ + -I${LZMALIBDIR} \ + -I${XZDIR}/common + +DPADD= ${LIBLZMA} +LDADD= -llzma + +.include Index: usr.bin/ar/Makefile =================================================================== --- usr.bin/ar/Makefile (revision 207846) +++ usr.bin/ar/Makefile (working copy) @@ -5,8 +5,8 @@ WARNS?= 5 -DPADD= ${LIBARCHIVE} ${LIBBZ2} ${LIBZ} ${LIBELF} -LDADD= -larchive -lbz2 -lz -lelf +DPADD= ${LIBARCHIVE} ${LIBBZ2} ${LIBZ} ${LIBLZMA} ${LIBELF} +LDADD= -larchive -lbz2 -lz -llzma -lelf CFLAGS+=-I. -I${.CURDIR} Index: rescue/rescue/Makefile =================================================================== --- rescue/rescue/Makefile (revision 207846) +++ rescue/rescue/Makefile (working copy) @@ -208,6 +208,10 @@ CRUNCH_ALIAS_bzip2= bunzip2 bzcat CRUNCH_LIBS+= -lbz2 +CRUNCH_PROGS_usr.bin+= xz +CRUNCH_ALIAS_xz= unxz lzma unlzma xzcat lzcat +CRUNCH_LIBS+= -llzma + CRUNCH_PROGS_usr.bin+= tar CRUNCH_LIBS+= -larchive -lmd .if ${MK_OPENSSL} != "no" Index: contrib/xz/FREEBSD-upgrade =================================================================== --- contrib/xz/FREEBSD-upgrade (revision 0) +++ contrib/xz/FREEBSD-upgrade (revision 0) @@ -0,0 +1,28 @@ +$FreeBSD$ + +xz + +The source code is pulled with git: + + git clone git://ctrl.tukaani.org/xz.git xz + +ChangeLog is generated with: + + git log > ChangeLog + +For the import files and directories were pruned by: + +sh -c 'for F in `cat FREEBSD-Xlist | grep -v FreeBSD`; do rm -rf ./$F ; done' + +You may check if there are any new files that we don't need. + +The instructions for importing new release and merging to HEAD can be found +at FreeBSD wiki: + + http://wiki.freebsd.org/SubversionPrimer/VendorImports + +To make local changes to xz, simply patch and commit to the trunk +branch (aka HEAD). Never make local changes on the vendor branch. + +mm@FreeBSD.org +10-May-2010 Index: contrib/xz/TODO =================================================================== --- contrib/xz/TODO (revision 0) +++ contrib/xz/TODO (revision 0) @@ -0,0 +1,60 @@ + +XZ Utils To-Do List +=================== + +Known bugs +---------- + + The test suite is too incomplete. + + If the memory usage limit is less than about 13 MiB, xz is unable to + automatically scale down the compression settings enough even though + it would be possible by switching from BT2/BT3/BT4 match finder to + HC3/HC4. + + The code to detect number of CPU cores doesn't count hyperthreading + as multiple cores. In context of xz, it probably should. + Hyperthreading is good at least with p7zip. + + XZ Utils compress some files significantly worse than LZMA Utils. + This is due to faster compression presets used by XZ Utils, and + can be worked around by using "xz --extreme". However, the presets + need some tweaking and maybe this issue can be minimized without + making the typical case too much slower. + + xz doesn't quote unprintable characters when it displays file names + given on the command line. + + tuklib_exit() doesn't block signals => EINTR is possible. + + +Missing features +---------------- + + xz doesn't support copying extended attributes, access control + lists etc. from source to target file. + + Multithreaded compression + + Multithreaded decompression + + Buffer-to-buffer coding could use less RAM (especially when + decompressing LZMA1 or LZMA2). + + I/O library is not implemented. It will possibly be named libzzf. + + lzma_strerror() to convert lzma_ret to human readable form? + This is tricky, because the same error codes are used with + slightly different meanings. + + +Documentation +------------- + + Some tutorial is needed for liblzma. I have planned to write some + extremely well commented example programs, which would work as + a tutorial. I suppose the Doxygen tags are quite OK as a quick + reference once one is familiar with the liblzma API. + + Document the LZMA1 and LZMA2 algorithms. + Index: contrib/xz/AUTHORS =================================================================== --- contrib/xz/AUTHORS (revision 0) +++ contrib/xz/AUTHORS (revision 0) @@ -0,0 +1,27 @@ + +Authors of XZ Utils +=================== + + XZ Utils is developed and maintained by Lasse Collin + . + + Major parts of liblzma are based on code written by Igor Pavlov, + specifically the LZMA SDK . Without + this code, XZ Utils wouldn't exist. + + The SHA-256 implementation in liblzma is based on the code found from + 7-Zip , which has a modified version of the SHA-256 + code found from Crypto++ . The SHA-256 code + in Crypto++ was written by Kevin Springle and Wei Dai. + + Some scripts have been adapted from gzip. The original versions + were written by Jean-loup Gailly, Charles Levert, and Paul Eggert. + Andrew Dudman helped adapting the script and their man pages for + XZ Utils. + + The GNU Autotools based build system contains files from many authors, + which I'm not trying list here. + + Several people have contributed fixes or reported bugs. Most of them + are mentioned in the file THANKS. + Index: contrib/xz/src/lzmainfo/lzmainfo.c =================================================================== --- contrib/xz/src/lzmainfo/lzmainfo.c (revision 0) +++ contrib/xz/src/lzmainfo/lzmainfo.c (revision 0) @@ -0,0 +1,210 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzmainfo.c +/// \brief lzmainfo tool for compatibility with LZMA Utils +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include +#include + +#include "lzma.h" +#include "getopt.h" +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" + + +static void lzma_attribute((noreturn)) +help(void) +{ + printf( +_("Usage: %s [--help] [--version] [FILE]...\n" +"Show information stored in the .lzma file header"), progname); + + printf(_( +"\nWith no FILE, or when FILE is -, read standard input.\n")); + printf("\n"); + + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); + + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, true); +} + + +static void lzma_attribute((noreturn)) +version(void) +{ + puts("lzmainfo (" PACKAGE_NAME ") " PACKAGE_VERSION); + tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, true); +} + + +/// Parse command line options. +static void +parse_args(int argc, char **argv) +{ + enum { + OPT_HELP, + OPT_VERSION, + }; + + static const struct option long_opts[] = { + { "help", no_argument, NULL, OPT_HELP }, + { "version", no_argument, NULL, OPT_VERSION }, + { NULL, 0, NULL, 0 } + }; + + int c; + while ((c = getopt_long(argc, argv, "", long_opts, NULL)) != -1) { + switch (c) { + case OPT_HELP: + help(); + + case OPT_VERSION: + version(); + + default: + exit(EXIT_FAILURE); + } + } + + return; +} + + +/// Primitive base-2 logarithm for integers +static uint32_t +my_log2(uint32_t n) +{ + uint32_t e; + for (e = 0; n > 1; ++e, n /= 2) ; + return e; +} + + +/// Parse the .lzma header and display information about it. +static bool +lzmainfo(const char *name, FILE *f) +{ + uint8_t buf[13]; + const size_t size = fread(buf, 1, sizeof(buf), f); + if (size != 13) { + fprintf(stderr, "%s: %s: %s\n", progname, name, + ferror(f) ? strerror(errno) + : _("File is too small to be a .lzma file")); + return true; + } + + lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; + + // Parse the first five bytes. + switch (lzma_properties_decode(&filter, NULL, buf, 5)) { + case LZMA_OK: + break; + + case LZMA_OPTIONS_ERROR: + fprintf(stderr, "%s: %s: %s\n", progname, name, + _("Not a .lzma file")); + return true; + + case LZMA_MEM_ERROR: + fprintf(stderr, "%s: %s\n", progname, strerror(ENOMEM)); + exit(EXIT_FAILURE); + + default: + fprintf(stderr, "%s: %s\n", progname, + _("Internal error (bug)")); + exit(EXIT_FAILURE); + } + + // Uncompressed size + uint64_t uncompressed_size = 0; + for (size_t i = 0; i < 8; ++i) + uncompressed_size |= (uint64_t)(buf[5 + i]) << (i * 8); + + // Display the results. We don't want to translate these and also + // will use MB instead of MiB, because someone could be parsing + // this output and we don't want to break that when people move + // from LZMA Utils to XZ Utils. + if (f != stdin) + printf("%s\n", name); + + printf("Uncompressed size: "); + if (uncompressed_size == UINT64_MAX) + printf("Unknown"); + else + printf("%" PRIu64 " MB (%" PRIu64 " bytes)", + (uncompressed_size + 512 * 1024) + / (1024 * 1024), + uncompressed_size); + + lzma_options_lzma *opt = filter.options; + + printf("\nDictionary size: " + "%u MB (2^%u bytes)\n" + "Literal context bits (lc): %" PRIu32 "\n" + "Literal pos bits (lp): %" PRIu32 "\n" + "Number of pos bits (pb): %" PRIu32 "\n", + (opt->dict_size + 512 * 1024) / (1024 * 1024), + my_log2(opt->dict_size), opt->lc, opt->lp, opt->pb); + + free(opt); + + return false; +} + + +extern int +main(int argc, char **argv) +{ + tuklib_progname_init(argv); + tuklib_gettext_init(PACKAGE, LOCALEDIR); + + parse_args(argc, argv); + + int ret = EXIT_SUCCESS; + + // We print empty lines around the output only when reading from + // files specified on the command line. This is due to how + // LZMA Utils did it. + if (optind == argc) { + if (lzmainfo("(stdin)", stdin)) + ret = EXIT_FAILURE; + } else { + printf("\n"); + + do { + if (strcmp(argv[optind], "-") == 0) { + if (lzmainfo("(stdin)", stdin)) + ret = EXIT_FAILURE; + } else { + FILE *f = fopen(argv[optind], "r"); + if (f == NULL) { + ret = EXIT_FAILURE; + fprintf(stderr, "%s: %s: %s\n", + progname, + argv[optind], + strerror(errno)); + continue; + } + + if (lzmainfo(argv[optind], f)) + ret = EXIT_FAILURE; + + printf("\n"); + fclose(f); + } + } while (++optind < argc); + } + + tuklib_exit(ret, EXIT_FAILURE, true); +} Index: contrib/xz/src/lzmainfo/lzmainfo.1 =================================================================== --- contrib/xz/src/lzmainfo/lzmainfo.1 (revision 0) +++ contrib/xz/src/lzmainfo/lzmainfo.1 (revision 0) @@ -0,0 +1,55 @@ +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH LZMAINFO 1 "2009-08-13" "Tukaani" "XZ Utils" +.SH NAME +lzmainfo \- show infomation stored in the .lzma file header +.SH SYNOPSIS +.B lzmainfo +.RB [ \-\-help ] +.RB [ \-\-version ] +.RI [ file ]... +.SH DESCRIPTION +.B lzmainfo +shows information stored in the +.B .lzma +file header. It reads the first 13 bytes from the specified +.IR file , +decodes the header, and prints it to standard output in human +readable format. If no +.I files +are given or +.I file +is +.BR \- , +standard input is read. +.PP +Usually the most interesting information is the uncompressed size and +the dictionary size. Uncompressed size can be shown only if the file is +in the non-streamed +.B .lzma +format variant. The amount of memory required to decompress the file is +a few dozen kilobytes plus the dictionary size. +.PP +.B lzmainfo +is included in XZ Utils primarily for backward compatibility with LZMA Utils. +.SH EXIT STATUS +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.SH BUGS +.B lzmainfo +uses +.B MB +while the correct suffix would be +.B MiB +(2^20 bytes). +This is to keep the output compatible with LZMA Utils. +.SH SEE ALSO +.BR xz (1) Index: contrib/xz/src/xz/options.h =================================================================== --- contrib/xz/src/xz/options.h (revision 0) +++ contrib/xz/src/xz/options.h (revision 0) @@ -0,0 +1,38 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.h +/// \brief Parser for filter-specific options +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Parser for Subblock options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_subblock *options_subblock(const char *str); + + +/// \brief Parser for Delta options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_delta *options_delta(const char *str); + + +/// \brief Parser for BCJ options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_bcj *options_bcj(const char *str); + + +/// \brief Parser for LZMA options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_lzma *options_lzma(const char *str); Index: contrib/xz/src/xz/signals.c =================================================================== --- contrib/xz/src/xz/signals.c (revision 0) +++ contrib/xz/src/xz/signals.c (revision 0) @@ -0,0 +1,189 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file signals.c +/// \brief Handling signals to abort operation +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +volatile sig_atomic_t user_abort = false; + + +#ifndef _WIN32 + +/// If we were interrupted by a signal, we store the signal number so that +/// we can raise that signal to kill the program when all cleanups have +/// been done. +static volatile sig_atomic_t exit_signal = 0; + +/// Mask of signals for which have have established a signal handler to set +/// user_abort to true. +static sigset_t hooked_signals; + +/// True once signals_init() has finished. This is used to skip blocking +/// signals (with uninitialized hooked_signals) if signals_block() and +/// signals_unblock() are called before signals_init() has been called. +static bool signals_are_initialized = false; + +/// signals_block() and signals_unblock() can be called recursively. +static size_t signals_block_count = 0; + + +static void +signal_handler(int sig) +{ + exit_signal = sig; + user_abort = true; + return; +} + + +extern void +signals_init(void) +{ + // List of signals for which we establish the signal handler. + static const int sigs[] = { + SIGINT, + SIGTERM, +#ifdef SIGHUP + SIGHUP, +#endif +#ifdef SIGPIPE + SIGPIPE, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + + // Mask of the signals for which we have established a signal handler. + sigemptyset(&hooked_signals); + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + sigaddset(&hooked_signals, sigs[i]); + + struct sigaction sa; + + // All the signals that we handle we also blocked while the signal + // handler runs. + sa.sa_mask = hooked_signals; + + // Don't set SA_RESTART, because we want EINTR so that we can check + // for user_abort and cleanup before exiting. We block the signals + // for which we have established a handler when we don't want EINTR. + sa.sa_flags = 0; + sa.sa_handler = &signal_handler; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) { + // If the parent process has left some signals ignored, + // we don't unignore them. + struct sigaction old; + if (sigaction(sigs[i], NULL, &old) == 0 + && old.sa_handler == SIG_IGN) + continue; + + // Establish the signal handler. + if (sigaction(sigs[i], &sa, NULL)) + message_signal_handler(); + } + + signals_are_initialized = true; + + return; +} + + +#ifndef __VMS +extern void +signals_block(void) +{ + if (signals_are_initialized) { + if (signals_block_count++ == 0) { + const int saved_errno = errno; + mythread_sigmask(SIG_BLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + } + + return; +} + + +extern void +signals_unblock(void) +{ + if (signals_are_initialized) { + assert(signals_block_count > 0); + + if (--signals_block_count == 0) { + const int saved_errno = errno; + mythread_sigmask(SIG_UNBLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + } + + return; +} +#endif + + +extern void +signals_exit(void) +{ + const int sig = exit_signal; + + if (sig != 0) { + struct sigaction sa; + sa.sa_handler = SIG_DFL; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(sig, &sa, NULL); + raise(exit_signal); + } + + return; +} + +#else + +// While Windows has some very basic signal handling functions as required +// by C89, they are not really used, or so I understood. Instead, we use +// SetConsoleCtrlHandler() to catch user pressing C-c. + +#include + + +static BOOL WINAPI +signal_handler(DWORD type lzma_attribute((unused))) +{ + // Since we don't get a signal number which we could raise() at + // signals_exit() like on POSIX, just set the exit status to + // indicate an error, so that we cannot return with zero exit status. + // + // FIXME: Since this function runs in its own thread, + // set_exit_status() should have a mutex. + set_exit_status(E_ERROR); + user_abort = true; + return TRUE; +} + + +extern void +signals_init(void) +{ + if (!SetConsoleCtrlHandler(&signal_handler, TRUE)) + message_signal_handler(); + + return; +} + +#endif Index: contrib/xz/src/xz/coder.h =================================================================== --- contrib/xz/src/xz/coder.h (revision 0) +++ contrib/xz/src/xz/coder.h (revision 0) @@ -0,0 +1,57 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file coder.h +/// \brief Compresses or uncompresses a file +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +enum operation_mode { + MODE_COMPRESS, + MODE_DECOMPRESS, + MODE_TEST, + MODE_LIST, +}; + + +// NOTE: The order of these is significant in suffix.c. +enum format_type { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_LZMA, + // HEADER_GZIP, + FORMAT_RAW, +}; + + +/// Operation mode of the command line tool. This is set in args.c and read +/// in several files. +extern enum operation_mode opt_mode; + +/// File format to use when encoding or what format(s) to accept when +/// decoding. This is a global because it's needed also in suffix.c. +/// This is set in args.c. +extern enum format_type opt_format; + + +/// Set the integrity check type used when compressing +extern void coder_set_check(lzma_check check); + +/// Set preset number +extern void coder_set_preset(size_t new_preset); + +/// Enable extreme mode +extern void coder_set_extreme(void); + +/// Add a filter to the custom filter chain +extern void coder_add_filter(lzma_vli id, void *options); + +/// +extern void coder_set_compression_settings(void); + +/// Compress or decompress the given file +extern void coder_run(const char *filename); Index: contrib/xz/src/xz/args.h =================================================================== --- contrib/xz/src/xz/args.h (revision 0) +++ contrib/xz/src/xz/args.h (revision 0) @@ -0,0 +1,42 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.h +/// \brief Argument parsing +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +typedef struct { + /// Filenames from command line + char **arg_names; + + /// Number of filenames from command line + size_t arg_count; + + /// Name of the file from which to read filenames. This is NULL + /// if --files or --files0 was not used. + char *files_name; + + /// File opened for reading from which filenames are read. This is + /// non-NULL only if files_name is non-NULL. + FILE *files_file; + + /// Delimiter for filenames read from files_file + char files_delim; + +} args_info; + + +extern bool opt_stdout; +extern bool opt_force; +extern bool opt_keep_original; +// extern bool opt_recursive; +extern bool opt_robot; + +extern const char *stdin_filename; + +extern void args_parse(args_info *args, int argc, char **argv); Index: contrib/xz/src/xz/hardware.h =================================================================== --- contrib/xz/src/xz/hardware.h (revision 0) +++ contrib/xz/src/xz/hardware.h (revision 0) @@ -0,0 +1,35 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.h +/// \brief Detection of available hardware resources +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Initialize some hardware-specific variables, which are needed by other +/// hardware_* functions. +extern void hardware_init(void); + + +/// Set custom value for maximum number of coder threads. +extern void hardware_threadlimit_set(uint32_t threadlimit); + +/// Get the maximum number of coder threads. Some additional helper threads +/// are allowed on top of this). +extern uint32_t hardware_threadlimit_get(void); + + +/// Set custom memory usage limit. This is used for both encoding and +/// decoding. Zero indicates resetting the limit back to defaults. +extern void hardware_memlimit_set(uint64_t memlimit); + +/// Set custom memory usage limit as a percentage of installed RAM. +/// The percentage must be in the range [1, 100]. +extern void hardware_memlimit_set_percentage(uint32_t percentage); + +/// Get the current memory usage limit. +extern uint64_t hardware_memlimit_get(void); Index: contrib/xz/src/xz/suffix.c =================================================================== --- contrib/xz/src/xz/suffix.c (revision 0) +++ contrib/xz/src/xz/suffix.c (revision 0) @@ -0,0 +1,211 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.c +/// \brief Checks filename suffix and creates the destination filename +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +// For case-insensitive filename suffix on case-insensitive systems +#if defined(TUKLIB_DOSLIKE) || defined(__VMS) +# define strcmp strcasecmp +#endif + + +static char *custom_suffix = NULL; + + +struct suffix_pair { + const char *compressed; + const char *uncompressed; +}; + + +/// \brief Checks if src_name has given compressed_suffix +/// +/// \param suffix Filename suffix to look for +/// \param src_name Input filename +/// \param src_len strlen(src_name) +/// +/// \return If src_name has the suffix, src_len - strlen(suffix) is +/// returned. It's always a positive integer. Otherwise zero +/// is returned. +static size_t +test_suffix(const char *suffix, const char *src_name, size_t src_len) +{ + const size_t suffix_len = strlen(suffix); + + // The filename must have at least one character in addition to + // the suffix. src_name may contain path to the filename, so we + // need to check for directory separator too. + if (src_len <= suffix_len || src_name[src_len - suffix_len - 1] == '/') + return 0; + + if (strcmp(suffix, src_name + src_len - suffix_len) == 0) + return src_len - suffix_len; + + return 0; +} + + +/// \brief Removes the filename suffix of the compressed file +/// +/// \return Name of the uncompressed file, or NULL if file has unknown +/// suffix. +static char * +uncompressed_name(const char *src_name, const size_t src_len) +{ + static const struct suffix_pair suffixes[] = { + { ".xz", "" }, + { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare. + { ".lzma", "" }, + { ".tlz", ".tar" }, + // { ".gz", "" }, + // { ".tgz", ".tar" }, + }; + + const char *new_suffix = ""; + size_t new_len = 0; + + if (opt_format == FORMAT_RAW) { + // Don't check for known suffixes when --format=raw was used. + if (custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " + "--suffix=.SUF is required unless " + "writing to stdout"), src_name); + return NULL; + } + } else { + for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) { + new_len = test_suffix(suffixes[i].compressed, + src_name, src_len); + if (new_len != 0) { + new_suffix = suffixes[i].uncompressed; + break; + } + } + } + + if (new_len == 0 && custom_suffix != NULL) + new_len = test_suffix(custom_suffix, src_name, src_len); + + if (new_len == 0) { + message_warning(_("%s: Filename has an unknown suffix, " + "skipping"), src_name); + return NULL; + } + + const size_t new_suffix_len = strlen(new_suffix); + char *dest_name = xmalloc(new_len + new_suffix_len + 1); + + memcpy(dest_name, src_name, new_len); + memcpy(dest_name + new_len, new_suffix, new_suffix_len); + dest_name[new_len + new_suffix_len] = '\0'; + + return dest_name; +} + + +/// \brief Appends suffix to src_name +/// +/// In contrast to uncompressed_name(), we check only suffixes that are valid +/// for the specified file format. +static char * +compressed_name(const char *src_name, const size_t src_len) +{ + // The order of these must match the order in args.h. + static const struct suffix_pair all_suffixes[][3] = { + { + { ".xz", "" }, + { ".txz", ".tar" }, + { NULL, NULL } + }, { + { ".lzma", "" }, + { ".tlz", ".tar" }, + { NULL, NULL } +/* + }, { + { ".gz", "" }, + { ".tgz", ".tar" }, + { NULL, NULL } +*/ + }, { + // --format=raw requires specifying the suffix + // manually or using stdout. + { NULL, NULL } + } + }; + + // args.c ensures this. + assert(opt_format != FORMAT_AUTO); + + const size_t format = opt_format - 1; + const struct suffix_pair *const suffixes = all_suffixes[format]; + + for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { + if (test_suffix(suffixes[i].compressed, src_name, src_len) + != 0) { + message_warning(_("%s: File already has `%s' " + "suffix, skipping"), src_name, + suffixes[i].compressed); + return NULL; + } + } + + // TODO: Hmm, maybe it would be better to validate this in args.c, + // since the suffix handling when decoding is weird now. + if (opt_format == FORMAT_RAW && custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " + "--suffix=.SUF is required unless " + "writing to stdout"), src_name); + return NULL; + } + + const char *suffix = custom_suffix != NULL + ? custom_suffix : suffixes[0].compressed; + const size_t suffix_len = strlen(suffix); + + char *dest_name = xmalloc(src_len + suffix_len + 1); + + memcpy(dest_name, src_name, src_len); + memcpy(dest_name + src_len, suffix, suffix_len); + dest_name[src_len + suffix_len] = '\0'; + + return dest_name; +} + + +extern char * +suffix_get_dest_name(const char *src_name) +{ + assert(src_name != NULL); + + // Length of the name is needed in all cases to locate the end of + // the string to compare the suffix, so calculate the length here. + const size_t src_len = strlen(src_name); + + return opt_mode == MODE_COMPRESS + ? compressed_name(src_name, src_len) + : uncompressed_name(src_name, src_len); +} + + +extern void +suffix_set(const char *suffix) +{ + // Empty suffix and suffixes having a slash are rejected. Such + // suffixes would break things later. + if (suffix[0] == '\0' || strchr(suffix, '/') != NULL) + message_fatal(_("%s: Invalid filename suffix"), optarg); + + // Replace the old custom_suffix (if any) with the new suffix. + free(custom_suffix); + custom_suffix = xstrdup(suffix); + return; +} Index: contrib/xz/src/xz/signals.h =================================================================== --- contrib/xz/src/xz/signals.h (revision 0) +++ contrib/xz/src/xz/signals.h (revision 0) @@ -0,0 +1,43 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file signals.h +/// \brief Handling signals to abort operation +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// If this is true, we will clean up the possibly incomplete output file, +/// return to main() as soon as practical. That is, the code needs to poll +/// this variable in various places. +extern volatile sig_atomic_t user_abort; + + +/// Initialize the signal handler, which will set user_abort to true when +/// user e.g. presses C-c. +extern void signals_init(void); + + +#if defined(_WIN32) || defined(__VMS) +# define signals_block() do { } while (0) +# define signals_unblock() do { } while (0) +#else +/// Block the signals which don't have SA_RESTART and which would just set +/// user_abort to true. This is handy when we don't want to handle EINTR +/// and don't want SA_RESTART either. +extern void signals_block(void); + +/// Unblock the signals blocked by signals_block(). +extern void signals_unblock(void); +#endif + +#ifdef _WIN32 +# define signals_exit() do { } while (0) +#else +/// If user has sent us a signal earlier to terminate the process, +/// re-raise that signal to actually terminate the process. +extern void signals_exit(void); +#endif Index: contrib/xz/src/xz/file_io.c =================================================================== --- contrib/xz/src/xz/file_io.c (revision 0) +++ contrib/xz/src/xz/file_io.c (revision 0) @@ -0,0 +1,957 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_io.c +/// \brief File opening, unlinking, and closing +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include + +#ifdef TUKLIB_DOSLIKE +# include +#else +static bool warn_fchown; +#endif + +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) +# include +#elif defined(HAVE_UTIME) +# include +#endif + +#include "tuklib_open_stdxxx.h" + +#ifndef O_BINARY +# define O_BINARY 0 +#endif + +#ifndef O_NOCTTY +# define O_NOCTTY 0 +#endif + + +/// If true, try to create sparse files when decompressing. +static bool try_sparse = true; + +#ifndef TUKLIB_DOSLIKE +/// File status flags of standard output. This is used by io_open_dest() +/// and io_close_dest(). +static int stdout_flags = 0; +#endif + + +static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); + + +extern void +io_init(void) +{ + // Make sure that stdin, stdout, and and stderr are connected to + // a valid file descriptor. Exit immediately with exit code ERROR + // if we cannot make the file descriptors valid. Maybe we should + // print an error message, but our stderr could be screwed anyway. + tuklib_open_stdxxx(E_ERROR); + +#ifndef TUKLIB_DOSLIKE + // If fchown() fails setting the owner, we warn about it only if + // we are root. + warn_fchown = geteuid() == 0; +#endif + +#ifdef __DJGPP__ + // Avoid doing useless things when statting files. + // This isn't important but doesn't hurt. + _djstat_flags = _STAT_INODE | _STAT_EXEC_EXT + | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; +#endif + + return; +} + + +extern void +io_no_sparse(void) +{ + try_sparse = false; + return; +} + + +/// \brief Unlink a file +/// +/// This tries to verify that the file being unlinked really is the file that +/// we want to unlink by verifying device and inode numbers. There's still +/// a small unavoidable race, but this is much better than nothing (the file +/// could have been moved/replaced even hours earlier). +static void +io_unlink(const char *name, const struct stat *known_st) +{ +#if defined(TUKLIB_DOSLIKE) + // On DOS-like systems, st_ino is meaningless, so don't bother + // testing it. Just silence a compiler warning. + (void)known_st; +#else + struct stat new_st; + + // If --force was used, use stat() instead of lstat(). This way + // (de)compressing symlinks works correctly. However, it also means + // that xz cannot detect if a regular file foo is renamed to bar + // and then a symlink foo -> bar is created. Because of stat() + // instead of lstat(), xz will think that foo hasn't been replaced + // with another file. Thus, xz will remove foo even though it no + // longer is the same file that xz used when it started compressing. + // Probably it's not too bad though, so this doesn't need a more + // complex fix. + const int stat_ret = opt_force + ? stat(name, &new_st) : lstat(name, &new_st); + + if (stat_ret +# ifdef __VMS + // st_ino is an array, and we don't want to + // compare st_dev at all. + || memcmp(&new_st.st_ino, &known_st->st_ino, + sizeof(new_st.st_ino)) != 0 +# else + // Typical POSIX-like system + || new_st.st_dev != known_st->st_dev + || new_st.st_ino != known_st->st_ino +# endif + ) + // TRANSLATORS: When compression or decompression finishes, + // and xz is going to remove the source file, xz first checks + // if the source file still exists, and if it does, does its + // device and inode numbers match what xz saw when it opened + // the source file. If these checks fail, this message is + // shown, %s being the filename, and the file is not deleted. + // The check for device and inode numbers is there, because + // it is possible that the user has put a new file in place + // of the original file, and in that case it obviously + // shouldn't be removed. + message_error(_("%s: File seems to have been moved, " + "not removing"), name); + else +#endif + // There's a race condition between lstat() and unlink() + // but at least we have tried to avoid removing wrong file. + if (unlink(name)) + message_error(_("%s: Cannot remove: %s"), + name, strerror(errno)); + + return; +} + + +/// \brief Copies owner/group and permissions +/// +/// \todo ACL and EA support +/// +static void +io_copy_attrs(const file_pair *pair) +{ + // Skip chown and chmod on Windows. +#ifndef TUKLIB_DOSLIKE + // This function is more tricky than you may think at first. + // Blindly copying permissions may permit users to access the + // destination file who didn't have permission to access the + // source file. + + // Try changing the owner of the file. If we aren't root or the owner + // isn't already us, fchown() probably doesn't succeed. We warn + // about failing fchown() only if we are root. + if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) + message_warning(_("%s: Cannot set the file owner: %s"), + pair->dest_name, strerror(errno)); + + mode_t mode; + + if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { + message_warning(_("%s: Cannot set the file group: %s"), + pair->dest_name, strerror(errno)); + // We can still safely copy some additional permissions: + // `group' must be at least as strict as `other' and + // also vice versa. + // + // NOTE: After this, the owner of the source file may + // get additional permissions. This shouldn't be too bad, + // because the owner would have had permission to chmod + // the original file anyway. + mode = ((pair->src_st.st_mode & 0070) >> 3) + & (pair->src_st.st_mode & 0007); + mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; + } else { + // Drop the setuid, setgid, and sticky bits. + mode = pair->src_st.st_mode & 0777; + } + + if (fchmod(pair->dest_fd, mode)) + message_warning(_("%s: Cannot set the file permissions: %s"), + pair->dest_name, strerror(errno)); +#endif + + // Copy the timestamps. We have several possible ways to do this, of + // which some are better in both security and precision. + // + // First, get the nanosecond part of the timestamps. As of writing, + // it's not standardized by POSIX, and there are several names for + // the same thing in struct stat. + long atime_nsec; + long mtime_nsec; + +# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + // GNU and Solaris + atime_nsec = pair->src_st.st_atim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + // BSD + atime_nsec = pair->src_st.st_atimespec.tv_nsec; + mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) + // GNU and BSD without extensions + atime_nsec = pair->src_st.st_atimensec; + mtime_nsec = pair->src_st.st_mtimensec; + +# elif defined(HAVE_STRUCT_STAT_ST_UATIME) + // Tru64 + atime_nsec = pair->src_st.st_uatime * 1000; + mtime_nsec = pair->src_st.st_umtime * 1000; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) + // UnixWare + atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; + +# else + // Safe fallback + atime_nsec = 0; + mtime_nsec = 0; +# endif + + // Construct a structure to hold the timestamps and call appropriate + // function to set the timestamps. +#if defined(HAVE_FUTIMENS) + // Use nanosecond precision. + struct timespec tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_nsec = atime_nsec; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_nsec = mtime_nsec; + + (void)futimens(pair->dest_fd, tv); + +#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) + // Use microsecond precision. + struct timeval tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_usec = atime_nsec / 1000; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_usec = mtime_nsec / 1000; + +# if defined(HAVE_FUTIMES) + (void)futimes(pair->dest_fd, tv); +# elif defined(HAVE_FUTIMESAT) + (void)futimesat(pair->dest_fd, NULL, tv); +# else + // Argh, no function to use a file descriptor to set the timestamp. + (void)utimes(pair->dest_name, tv); +# endif + +#elif defined(HAVE_UTIME) + // Use one-second precision. utime() doesn't support using file + // descriptor either. Some systems have broken utime() prototype + // so don't make this const. + struct utimbuf buf = { + .actime = pair->src_st.st_atime, + .modtime = pair->src_st.st_mtime, + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)utime(pair->dest_name, &buf); +#endif + + return; +} + + +/// Opens the source file. Returns false on success, true on error. +static bool +io_open_src_real(file_pair *pair) +{ + // There's nothing to open when reading from stdin. + if (pair->src_name == stdin_filename) { + pair->src_fd = STDIN_FILENO; +#ifdef TUKLIB_DOSLIKE + setmode(STDIN_FILENO, O_BINARY); +#endif + return false; + } + + // Symlinks are not followed unless writing to stdout or --force + // was used. + const bool follow_symlinks = opt_stdout || opt_force; + + // We accept only regular files if we are writing the output + // to disk too. bzip2 allows overriding this with --force but + // gzip and xz don't. + const bool reg_files_only = !opt_stdout; + + // Flags for open() + int flags = O_RDONLY | O_BINARY | O_NOCTTY; + +#ifndef TUKLIB_DOSLIKE + // If we accept only regular files, we need to be careful to avoid + // problems with special files like devices and FIFOs. O_NONBLOCK + // prevents blocking when opening such files. When we want to accept + // special files, we must not use O_NONBLOCK, or otherwise we won't + // block waiting e.g. FIFOs to become readable. + if (reg_files_only) + flags |= O_NONBLOCK; +#endif + +#if defined(O_NOFOLLOW) + if (!follow_symlinks) + flags |= O_NOFOLLOW; +#elif !defined(TUKLIB_DOSLIKE) + // Some POSIX-like systems lack O_NOFOLLOW (it's not required + // by POSIX). Check for symlinks with a separate lstat() on + // these systems. + if (!follow_symlinks) { + struct stat st; + if (lstat(pair->src_name, &st)) { + message_error("%s: %s", pair->src_name, + strerror(errno)); + return true; + + } else if (S_ISLNK(st.st_mode)) { + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + return true; + } + } +#else + // Avoid warnings. + (void)follow_symlinks; +#endif + + // Try to open the file. If we are accepting non-regular files, + // unblock the caught signals so that open() can be interrupted + // if it blocks e.g. due to a FIFO file. + if (!reg_files_only) + signals_unblock(); + + // Maybe this wouldn't need a loop, since all the signal handlers for + // which we don't use SA_RESTART set user_abort to true. But it + // doesn't hurt to have it just in case. + do { + pair->src_fd = open(pair->src_name, flags); + } while (pair->src_fd == -1 && errno == EINTR && !user_abort); + + if (!reg_files_only) + signals_block(); + + if (pair->src_fd == -1) { + // If we were interrupted, don't display any error message. + if (errno == EINTR) { + // All the signals that don't have SA_RESTART + // set user_abort. + assert(user_abort); + return true; + } + +#ifdef O_NOFOLLOW + // Give an understandable error message in if reason + // for failing was that the file was a symbolic link. + // + // Note that at least Linux, OpenBSD, Solaris, and Darwin + // use ELOOP to indicate if O_NOFOLLOW was the reason + // that open() failed. Because there may be + // directories in the pathname, ELOOP may occur also + // because of a symlink loop in the directory part. + // So ELOOP doesn't tell us what actually went wrong. + // + // FreeBSD associates EMLINK with O_NOFOLLOW and + // Tru64 uses ENOTSUP. We use these directly here + // and skip the lstat() call and the associated race. + // I want to hear if there are other kernels that + // fail with something else than ELOOP with O_NOFOLLOW. + bool was_symlink = false; + +# if defined(__FreeBSD__) || defined(__DragonFly__) + if (errno == EMLINK) + was_symlink = true; + +# elif defined(__digital__) && defined(__unix__) + if (errno == ENOTSUP) + was_symlink = true; + +# elif defined(__NetBSD__) + // FIXME? As of 2008-11-20, NetBSD doesn't document what + // errno is used with O_NOFOLLOW. It seems to be EFTYPE, + // but since it isn't documented, it may be wrong to rely + // on it here. + if (errno == EFTYPE) + was_symlink = true; + +# else + if (errno == ELOOP && !follow_symlinks) { + const int saved_errno = errno; + struct stat st; + if (lstat(pair->src_name, &st) == 0 + && S_ISLNK(st.st_mode)) + was_symlink = true; + + errno = saved_errno; + } +# endif + + if (was_symlink) + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + else +#endif + // Something else than O_NOFOLLOW failing + // (assuming that the race conditions didn't + // confuse us). + message_error("%s: %s", pair->src_name, + strerror(errno)); + + return true; + } + +#ifndef TUKLIB_DOSLIKE + // Drop O_NONBLOCK, which is used only when we are accepting only + // regular files. After the open() call, we want things to block + // instead of giving EAGAIN. + if (reg_files_only) { + flags = fcntl(pair->src_fd, F_GETFL); + if (flags == -1) + goto error_msg; + + flags &= ~O_NONBLOCK; + + if (fcntl(pair->src_fd, F_SETFL, flags)) + goto error_msg; + } +#endif + + // Stat the source file. We need the result also when we copy + // the permissions, and when unlinking. + if (fstat(pair->src_fd, &pair->src_st)) + goto error_msg; + + if (S_ISDIR(pair->src_st.st_mode)) { + message_warning(_("%s: Is a directory, skipping"), + pair->src_name); + goto error; + } + + if (reg_files_only) { + if (!S_ISREG(pair->src_st.st_mode)) { + message_warning(_("%s: Not a regular file, " + "skipping"), pair->src_name); + goto error; + } + + // These are meaningless on Windows. +#ifndef TUKLIB_DOSLIKE + if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { + // gzip rejects setuid and setgid files even + // when --force was used. bzip2 doesn't check + // for them, but calls fchown() after fchmod(), + // and many systems automatically drop setuid + // and setgid bits there. + // + // We accept setuid and setgid files if + // --force was used. We drop these bits + // explicitly in io_copy_attr(). + message_warning(_("%s: File has setuid or " + "setgid bit set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_mode & S_ISVTX) { + message_warning(_("%s: File has sticky bit " + "set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_nlink > 1) { + message_warning(_("%s: Input file has more " + "than one hard link, " + "skipping"), pair->src_name); + goto error; + } +#endif + } + + return false; + +error_msg: + message_error("%s: %s", pair->src_name, strerror(errno)); +error: + (void)close(pair->src_fd); + return true; +} + + +extern file_pair * +io_open_src(const char *src_name) +{ + if (is_empty_filename(src_name)) + return NULL; + + // Since we have only one file open at a time, we can use + // a statically allocated structure. + static file_pair pair; + + pair = (file_pair){ + .src_name = src_name, + .dest_name = NULL, + .src_fd = -1, + .dest_fd = -1, + .src_eof = false, + .dest_try_sparse = false, + .dest_pending_sparse = 0, + }; + + // Block the signals, for which we have a custom signal handler, so + // that we don't need to worry about EINTR. + signals_block(); + const bool error = io_open_src_real(&pair); + signals_unblock(); + + return error ? NULL : &pair; +} + + +/// \brief Closes source file of the file_pair structure +/// +/// \param pair File whose src_fd should be closed +/// \param success If true, the file will be removed from the disk if +/// closing succeeds and --keep hasn't been used. +static void +io_close_src(file_pair *pair, bool success) +{ + if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { +#ifdef TUKLIB_DOSLIKE + (void)close(pair->src_fd); +#endif + + // If we are going to unlink(), do it before closing the file. + // This way there's no risk that someone replaces the file and + // happens to get same inode number, which would make us + // unlink() wrong file. + // + // NOTE: DOS-like systems are an exception to this, because + // they don't allow unlinking files that are open. *sigh* + if (success && !opt_keep_original) + io_unlink(pair->src_name, &pair->src_st); + +#ifndef TUKLIB_DOSLIKE + (void)close(pair->src_fd); +#endif + } + + return; +} + + +static bool +io_open_dest_real(file_pair *pair) +{ + if (opt_stdout || pair->src_fd == STDIN_FILENO) { + // We don't modify or free() this. + pair->dest_name = (char *)"(stdout)"; + pair->dest_fd = STDOUT_FILENO; +#ifdef TUKLIB_DOSLIKE + setmode(STDOUT_FILENO, O_BINARY); +#endif + } else { + pair->dest_name = suffix_get_dest_name(pair->src_name); + if (pair->dest_name == NULL) + return true; + + // If --force was used, unlink the target file first. + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error(_("%s: Cannot remove: %s"), + pair->dest_name, strerror(errno)); + free(pair->dest_name); + return true; + } + + // Open the file. + const int flags = O_WRONLY | O_BINARY | O_NOCTTY + | O_CREAT | O_EXCL; + const mode_t mode = S_IRUSR | S_IWUSR; + pair->dest_fd = open(pair->dest_name, flags, mode); + + if (pair->dest_fd == -1) { + message_error("%s: %s", pair->dest_name, + strerror(errno)); + free(pair->dest_name); + return true; + } + } + + // If this really fails... well, we have a safe fallback. + if (fstat(pair->dest_fd, &pair->dest_st)) { +#if defined(__VMS) + pair->dest_st.st_ino[0] = 0; + pair->dest_st.st_ino[1] = 0; + pair->dest_st.st_ino[2] = 0; +#elif !defined(TUKLIB_DOSLIKE) + pair->dest_st.st_dev = 0; + pair->dest_st.st_ino = 0; +#endif +#ifndef TUKLIB_DOSLIKE + } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { + // When writing to standard output, we need to be extra + // careful: + // - It may be connected to something else than + // a regular file. + // - We aren't necessarily writing to a new empty file + // or to the end of an existing file. + // - O_APPEND may be active. + // + // TODO: I'm keeping this disabled for DOS-like systems + // for now. FAT doesn't support sparse files, but NTFS + // does, so maybe this should be enabled on Windows after + // some testing. + if (pair->dest_fd == STDOUT_FILENO) { + if (!S_ISREG(pair->dest_st.st_mode)) + return false; + + const int flags = fcntl(STDOUT_FILENO, F_GETFL); + if (flags == -1) + return false; + + if (flags & O_APPEND) { + // Creating a sparse file is not possible + // when O_APPEND is active (it's used by + // shell's >> redirection). As I understand + // it, it is safe to temporarily disable + // O_APPEND in xz, because if someone + // happened to write to the same file at the + // same time, results would be bad anyway + // (users shouldn't assume that xz uses any + // specific block size when writing data). + // + // The write position may be something else + // than the end of the file, so we must fix + // it to start writing at the end of the file + // to imitate O_APPEND. + if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) + return false; + + if (fcntl(STDOUT_FILENO, F_SETFL, + stdout_flags & ~O_APPEND)) + return false; + + // Remember the flags so that io_close_dest() + // can restore them. + stdout_flags = flags; + + } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) + != pair->dest_st.st_size) { + // Writing won't start exactly at the end + // of the file. We cannot use sparse output, + // because it would probably corrupt the file. + return false; + } + } + + pair->dest_try_sparse = true; +#endif + } + + return false; +} + + +extern bool +io_open_dest(file_pair *pair) +{ + signals_block(); + const bool ret = io_open_dest_real(pair); + signals_unblock(); + return ret; +} + + +/// \brief Closes destination file of the file_pair structure +/// +/// \param pair File whose dest_fd should be closed +/// \param success If false, the file will be removed from the disk. +/// +/// \return Zero if closing succeeds. On error, -1 is returned and +/// error message printed. +static bool +io_close_dest(file_pair *pair, bool success) +{ +#ifndef TUKLIB_DOSLIKE + // If io_open_dest() has disabled O_APPEND, restore it here. + if (stdout_flags != 0) { + assert(pair->dest_fd == STDOUT_FILENO); + + const int fail = fcntl(STDOUT_FILENO, F_SETFL, stdout_flags); + stdout_flags = 0; + + if (fail) { + message_error(_("Error restoring the O_APPEND flag " + "to standard output: %s"), + strerror(errno)); + return true; + } + } +#endif + + if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) + return false; + + if (close(pair->dest_fd)) { + message_error(_("%s: Closing the file failed: %s"), + pair->dest_name, strerror(errno)); + + // Closing destination file failed, so we cannot trust its + // contents. Get rid of junk: + io_unlink(pair->dest_name, &pair->dest_st); + free(pair->dest_name); + return true; + } + + // If the operation using this file wasn't successful, we git rid + // of the junk file. + if (!success) + io_unlink(pair->dest_name, &pair->dest_st); + + free(pair->dest_name); + + return false; +} + + +extern void +io_close(file_pair *pair, bool success) +{ + // Take care of sparseness at the end of the output file. + if (success && pair->dest_try_sparse + && pair->dest_pending_sparse > 0) { + // Seek forward one byte less than the size of the pending + // hole, then write one zero-byte. This way the file grows + // to its correct size. An alternative would be to use + // ftruncate() but that isn't portable enough (e.g. it + // doesn't work with FAT on Linux; FAT isn't that important + // since it doesn't support sparse files anyway, but we don't + // want to create corrupt files on it). + if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, + SEEK_CUR) == -1) { + message_error(_("%s: Seeking failed when trying " + "to create a sparse file: %s"), + pair->dest_name, strerror(errno)); + success = false; + } else { + const uint8_t zero[1] = { '\0' }; + if (io_write_buf(pair, zero, 1)) + success = false; + } + } + + signals_block(); + + // Copy the file attributes. We need to skip this if destination + // file isn't open or it is standard output. + if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) + io_copy_attrs(pair); + + // Close the destination first. If it fails, we must not remove + // the source file! + if (io_close_dest(pair, success)) + success = false; + + // Close the source file, and unlink it if the operation using this + // file pair was successful and we haven't requested to keep the + // source file. + io_close_src(pair, success); + + signals_unblock(); + + return; +} + + +extern size_t +io_read(file_pair *pair, io_buf *buf_union, size_t size) +{ + // We use small buffers here. + assert(size < SSIZE_MAX); + + uint8_t *buf = buf_union->u8; + size_t left = size; + + while (left > 0) { + const ssize_t amount = read(pair->src_fd, buf, left); + + if (amount == 0) { + pair->src_eof = true; + break; + } + + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return SIZE_MAX; + + continue; + } + + message_error(_("%s: Read error: %s"), + pair->src_name, strerror(errno)); + + // FIXME Is this needed? + pair->src_eof = true; + + return SIZE_MAX; + } + + buf += (size_t)(amount); + left -= (size_t)(amount); + } + + return size - left; +} + + +extern bool +io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) +{ + // Using lseek() and read() is more portable than pread() and + // for us it is as good as real pread(). + if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { + message_error(_("%s: Error seeking the file: %s"), + pair->src_name, strerror(errno)); + return true; + } + + const size_t amount = io_read(pair, buf, size); + if (amount == SIZE_MAX) + return true; + + if (amount != size) { + message_error(_("%s: Unexpected end of file"), + pair->src_name); + return true; + } + + return false; +} + + +static bool +is_sparse(const io_buf *buf) +{ + assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); + + for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) + if (buf->u64[i] != 0) + return false; + + return true; +} + + +static bool +io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) +{ + assert(size < SSIZE_MAX); + + while (size > 0) { + const ssize_t amount = write(pair->dest_fd, buf, size); + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return -1; + + continue; + } + + // Handle broken pipe specially. gzip and bzip2 + // don't print anything on SIGPIPE. In addition, + // gzip --quiet uses exit status 2 (warning) on + // broken pipe instead of whatever raise(SIGPIPE) + // would make it return. It is there to hide "Broken + // pipe" message on some old shells (probably old + // GNU bash). + // + // We don't do anything special with --quiet, which + // is what bzip2 does too. If we get SIGPIPE, we + // will handle it like other signals by setting + // user_abort, and get EPIPE here. + if (errno != EPIPE) + message_error(_("%s: Write error: %s"), + pair->dest_name, strerror(errno)); + + return true; + } + + buf += (size_t)(amount); + size -= (size_t)(amount); + } + + return false; +} + + +extern bool +io_write(file_pair *pair, const io_buf *buf, size_t size) +{ + assert(size <= IO_BUFFER_SIZE); + + if (pair->dest_try_sparse) { + // Check if the block is sparse (contains only zeros). If it + // sparse, we just store the amount and return. We will take + // care of actually skipping over the hole when we hit the + // next data block or close the file. + // + // Since io_close() requires that dest_pending_sparse > 0 + // if the file ends with sparse block, we must also return + // if size == 0 to avoid doing the lseek(). + if (size == IO_BUFFER_SIZE) { + if (is_sparse(buf)) { + pair->dest_pending_sparse += size; + return false; + } + } else if (size == 0) { + return false; + } + + // This is not a sparse block. If we have a pending hole, + // skip it now. + if (pair->dest_pending_sparse > 0) { + if (lseek(pair->dest_fd, pair->dest_pending_sparse, + SEEK_CUR) == -1) { + message_error(_("%s: Seeking failed when " + "trying to create a sparse " + "file: %s"), pair->dest_name, + strerror(errno)); + return true; + } + + pair->dest_pending_sparse = 0; + } + } + + return io_write_buf(pair, buf->u8, size); +} Index: contrib/xz/src/xz/suffix.h =================================================================== --- contrib/xz/src/xz/suffix.h (revision 0) +++ contrib/xz/src/xz/suffix.h (revision 0) @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.h +/// \brief Checks filename suffix and creates the destination filename +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Get the name of the destination file +/// +/// Depending on the global variable opt_mode, this tries to find a matching +/// counterpart for src_name. If the name can be constructed, it is allocated +/// and returned (caller must free it). On error, a message is printed and +/// NULL is returned. +extern char *suffix_get_dest_name(const char *src_name); + + +/// \brief Set a custom filename suffix +/// +/// This function calls xstrdup() for the given suffix, thus the caller +/// doesn't need to keep the memory allocated. There can be only one custom +/// suffix, thus if this is called multiple times, the old suffixes are freed +/// and forgotten. +extern void suffix_set(const char *suffix); Index: contrib/xz/src/xz/file_io.h =================================================================== --- contrib/xz/src/xz/file_io.h (revision 0) +++ contrib/xz/src/xz/file_io.h (revision 0) @@ -0,0 +1,129 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_io.h +/// \brief I/O types and functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. +// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE (BUFSIZ & ~7U) +#endif + + +/// is_sparse() accesses the buffer as uint64_t for maximum speed. +/// Use an union to make sure that the buffer is properly aligned. +typedef union { + uint8_t u8[IO_BUFFER_SIZE]; + uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; + uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; +} io_buf; + + +typedef struct { + /// Name of the source filename (as given on the command line) or + /// pointer to static "(stdin)" when reading from standard input. + const char *src_name; + + /// Destination filename converted from src_name or pointer to static + /// "(stdout)" when writing to standard output. + char *dest_name; + + /// File descriptor of the source file + int src_fd; + + /// File descriptor of the target file + int dest_fd; + + /// True once end of the source file has been detected. + bool src_eof; + + /// If true, we look for long chunks of zeros and try to create + /// a sparse file. + bool dest_try_sparse; + + /// This is used only if dest_try_sparse is true. This holds the + /// number of zero bytes we haven't written out, because we plan + /// to make that byte range a sparse chunk. + off_t dest_pending_sparse; + + /// Stat of the source file. + struct stat src_st; + + /// Stat of the destination file. + struct stat dest_st; + +} file_pair; + + +/// \brief Initialize the I/O module +extern void io_init(void); + + +/// \brief Disable creation of sparse files when decompressing +extern void io_no_sparse(void); + + +/// \brief Open the source file +extern file_pair *io_open_src(const char *src_name); + + +/// \brief Open the destination file +extern bool io_open_dest(file_pair *pair); + + +/// \brief Closes the file descriptors and frees possible allocated memory +/// +/// The success argument determines if source or destination file gets +/// unlinked: +/// - false: The destination file is unlinked. +/// - true: The source file is unlinked unless writing to stdout or --keep +/// was used. +extern void io_close(file_pair *pair, bool success); + + +/// \brief Reads from the source file to a buffer +/// +/// \param pair File pair having the source file open for reading +/// \param buf Destination buffer to hold the read data +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, number of bytes read is returned. On end of +/// file zero is returned and pair->src_eof set to true. +/// On error, SIZE_MAX is returned and error message printed. +extern size_t io_read(file_pair *pair, io_buf *buf, size_t size); + + +/// \brief Read from source file from given offset to a buffer +/// +/// This is remotely similar to standard pread(). This uses lseek() though, +/// so the read offset is changed on each call. +/// +/// \param pair Seekable source file +/// \param buf Destination buffer +/// \param size Amount of data to read +/// \param pos Offset relative to the beginning of the file, +/// from which the data should be read. +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos); + + +/// \brief Writes a buffer to the destination file +/// +/// \param pair File pair having the destination file open for writing +/// \param buf Buffer containing the data to be written +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, zero is returned. On error, -1 is returned +/// and error message printed. +extern bool io_write(file_pair *pair, const io_buf *buf, size_t size); Index: contrib/xz/src/xz/list.c =================================================================== --- contrib/xz/src/xz/list.c (revision 0) +++ contrib/xz/src/xz/list.c (revision 0) @@ -0,0 +1,742 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_integer.h" + + +/// Totals that are displayed if there was more than one file. +/// The "files" counter is also used in print_info_adv() to show +/// the file number. +static struct { + uint64_t files; + uint64_t streams; + uint64_t blocks; + uint64_t compressed_size; + uint64_t uncompressed_size; + uint32_t checks; +} totals = { 0, 0, 0, 0, 0, 0 }; + + +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param idx If decoding is successful, *idx will be set to point +/// to lzma_index containing the decoded information. +/// On error, *idx is not modified. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +// TODO: This function is pretty big. liblzma should have a function that +// takes a callback function to parse the Index(es) from a .xz file to make +// it easy for applications. +static bool +parse_indexes(lzma_index **idx, file_pair *pair) +{ + if (pair->src_st.st_size <= 0) { + message_error(_("%s: File is empty"), pair->src_name); + return true; + } + + if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error(_("%s: Too small to be a valid .xz file"), + pair->src_name); + return true; + } + + io_buf buf; + lzma_stream_flags header_flags; + lzma_stream_flags footer_flags; + lzma_ret ret; + + // lzma_stream for the Index decoder + lzma_stream strm = LZMA_STREAM_INIT; + + // All Indexes decoded so far + lzma_index *combined_index = NULL; + + // The Index currently being decoded + lzma_index *this_index = NULL; + + // Current position in the file. We parse the file backwards so + // initialize it to point to the end of the file. + off_t pos = pair->src_st.st_size; + + // Each loop iteration decodes one Index. + do { + // Check that there is enough data left to contain at least + // the Stream Header and Stream Footer. This check cannot + // fail in the first pass of this loop. + if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + pos -= LZMA_STREAM_HEADER_SIZE; + lzma_vli stream_padding = 0; + + // Locate the Stream Footer. There may be Stream Padding which + // we must skip when reading backwards. + while (true) { + if (pos < LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm( + LZMA_DATA_ERROR)); + goto error; + } + + if (io_pread(pair, &buf, + LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + // Stream Padding is always a multiple of four bytes. + int i = 2; + if (buf.u32[i] != 0) + break; + + // To avoid calling io_pread() for every four bytes + // of Stream Padding, take advantage that we read + // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and + // check them too before calling io_pread() again. + do { + stream_padding += 4; + pos -= 4; + --i; + } while (i >= 0 && buf.u32[i] == 0); + } + + // Decode the Stream Footer. + ret = lzma_stream_footer_decode(&footer_flags, buf.u8); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + // Check that the size of the Index field looks sane. + lzma_vli index_size = footer_flags.backward_size; + if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + // Set pos to the beginning of the Index. + pos -= index_size; + + // See how much memory we can use for decoding this Index. + uint64_t memlimit = hardware_memlimit_get(); + uint64_t memused = 0; + if (combined_index != NULL) { + memused = lzma_index_memused(combined_index); + if (memused > memlimit) + message_bug(); + + memlimit -= memused; + } + + // Decode the Index. + ret = lzma_index_decoder(&strm, &this_index, memlimit); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + do { + // Don't give the decoder more input than the + // Index size. + strm.avail_in = MIN(IO_BUFFER_SIZE, index_size); + if (io_pread(pair, &buf, strm.avail_in, pos)) + goto error; + + pos += strm.avail_in; + index_size -= strm.avail_in; + + strm.next_in = buf.u8; + ret = lzma_code(&strm, LZMA_RUN); + + } while (ret == LZMA_OK); + + // If the decoding seems to be successful, check also that + // the Index decoder consumed as much input as indicated + // by the Backward Size field. + if (ret == LZMA_STREAM_END) + if (index_size != 0 || strm.avail_in != 0) + ret = LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) { + // LZMA_BUFFER_ERROR means that the Index decoder + // would have liked more input than what the Index + // size should be according to Stream Footer. + // The message for LZMA_DATA_ERROR makes more + // sense in that case. + if (ret == LZMA_BUF_ERROR) + ret = LZMA_DATA_ERROR; + + message_error("%s: %s", pair->src_name, + message_strm(ret)); + + // If the error was too low memory usage limit, + // show also how much memory would have been needed. + if (ret == LZMA_MEMLIMIT_ERROR) { + uint64_t needed = lzma_memusage(&strm); + if (UINT64_MAX - needed < memused) + needed = UINT64_MAX; + else + needed += memused; + + message_mem_needed(V_ERROR, needed); + } + + goto error; + } + + // Decode the Stream Header and check that its Stream Flags + // match the Stream Footer. + pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; + if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + pos -= lzma_index_total_size(this_index); + if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + ret = lzma_stream_header_decode(&header_flags, buf.u8); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + ret = lzma_stream_flags_compare(&header_flags, &footer_flags); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + // Store the decoded Stream Flags into this_index. This is + // needed so that we can print which Check is used in each + // Stream. + ret = lzma_index_stream_flags(this_index, &footer_flags); + if (ret != LZMA_OK) + message_bug(); + + // Store also the size of the Stream Padding field. It is + // needed to show the offsets of the Streams correctly. + ret = lzma_index_stream_padding(this_index, stream_padding); + if (ret != LZMA_OK) + message_bug(); + + if (combined_index != NULL) { + // Append the earlier decoded Indexes + // after this_index. + ret = lzma_index_cat( + this_index, combined_index, NULL); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + } + + combined_index = this_index; + this_index = NULL; + + } while (pos > 0); + + lzma_end(&strm); + + // All OK. Make combined_index available to the caller. + *idx = combined_index; + return false; + +error: + // Something went wrong, free the allocated memory. + lzma_end(&strm); + lzma_index_end(combined_index, NULL); + lzma_index_end(this_index, NULL); + return true; +} + + +/// \brief Get the compression ratio +/// +/// This has slightly different format than that is used by in message.c. +static const char * +get_ratio(uint64_t compressed_size, uint64_t uncompressed_size) +{ + if (uncompressed_size == 0) + return "---"; + + const double ratio = (double)(compressed_size) + / (double)(uncompressed_size); + if (ratio > 9.999) + return "---"; + + static char buf[6]; + snprintf(buf, sizeof(buf), "%.3f", ratio); + return buf; +} + + +static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = { + "None", + "CRC32", + "Unknown-2", + "Unknown-3", + "CRC64", + "Unknown-5", + "Unknown-6", + "Unknown-7", + "Unknown-8", + "Unknown-9", + "SHA-256", + "Unknown-11", + "Unknown-12", + "Unknown-13", + "Unknown-14", + "Unknown-15", +}; + + +/// \brief Get a comma-separated list of Check names +/// +/// \param checks Bit mask of Checks to print +/// \param space_after_comma +/// It's better to not use spaces in table-like listings, +/// but in more verbose formats a space after a comma +/// is good for readability. +static const char * +get_check_names(uint32_t checks, bool space_after_comma) +{ + assert(checks != 0); + + static char buf[sizeof(check_names)]; + char *pos = buf; + size_t left = sizeof(buf); + + const char *sep = space_after_comma ? ", " : ","; + bool comma = false; + + for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) { + if (checks & (UINT32_C(1) << i)) { + my_snprintf(&pos, &left, "%s%s", + comma ? sep : "", check_names[i]); + comma = true; + } + } + + return buf; +} + + +/// \brief Read the Check value from the .xz file and print it +/// +/// Since this requires a seek, listing all Check values for all Blocks can +/// be slow. +/// +/// \param pair Input file +/// \param iter Location of the Block whose Check value should +/// be printed. +/// +/// \return False on success, true on I/O error. +static bool +print_check_value(file_pair *pair, const lzma_index_iter *iter) +{ + // Don't read anything from the file if there is no integrity Check. + if (iter->stream.flags->check == LZMA_CHECK_NONE) { + printf("---"); + return false; + } + + // Locate and read the Check field. + const uint32_t size = lzma_check_size(iter->stream.flags->check); + const off_t offset = iter->block.compressed_file_offset + + iter->block.total_size - size; + io_buf buf; + if (io_pread(pair, &buf, size, offset)) + return true; + + // CRC32 and CRC64 are in little endian. Guess that all the future + // 32-bit and 64-bit Check values are little endian too. It shouldn't + // be a too big problem if this guess is wrong. + if (size == 4) { + printf("%08" PRIx32, conv32le(buf.u32[0])); + } else if (size == 8) { + printf("%016" PRIx64, conv64le(buf.u64[0])); + } else { + for (size_t i = 0; i < size; ++i) + printf("%02x", buf.u8[i]); + } + + return false; +} + + +static void +print_info_basic(const lzma_index *idx, file_pair *pair) +{ + static bool headings_displayed = false; + if (!headings_displayed) { + headings_displayed = true; + // TRANSLATORS: These are column titles. From Strms (Streams) + // to Ratio, the columns are right aligned. Check and Filename + // are left aligned. If you need longer words, it's OK to + // use two lines here. Test with xz --list. + puts(_("Strms Blocks Compressed Uncompressed Ratio " + "Check Filename")); + } + + printf("%5s %7s %11s %11s %5s %-7s %s\n", + uint64_to_str(lzma_index_stream_count(idx), 0), + uint64_to_str(lzma_index_block_count(idx), 1), + uint64_to_nicestr(lzma_index_file_size(idx), + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(lzma_index_uncompressed_size(idx), + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx)), + get_check_names(lzma_index_checks(idx), false), + pair->src_name); + + return; +} + + +static void +print_adv_helper(uint64_t stream_count, uint64_t block_count, + uint64_t compressed_size, uint64_t uncompressed_size, + uint32_t checks) +{ + printf(_(" Stream count: %s\n"), + uint64_to_str(stream_count, 0)); + printf(_(" Block count: %s\n"), + uint64_to_str(block_count, 0)); + printf(_(" Compressed size: %s\n"), + uint64_to_nicestr(compressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(_(" Uncompressed size: %s\n"), + uint64_to_nicestr(uncompressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(_(" Ratio: %s\n"), + get_ratio(compressed_size, uncompressed_size)); + printf(_(" Check: %s\n"), + get_check_names(checks, true)); + return; +} + + +static void +print_info_adv(const lzma_index *idx, file_pair *pair) +{ + // Print the overall information. + print_adv_helper(lzma_index_stream_count(idx), + lzma_index_block_count(idx), + lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx), + lzma_index_checks(idx)); + + // TODO: The rest of this function needs some work. Currently + // the offsets are not printed, which could be useful even when + // printed in a less accurate format. On the other hand, maybe + // this should print the information with exact byte values, + // or maybe there should be at least an option to do that. + // + // We could also display some other info. E.g. it could be useful + // to quickly see how big is the biggest Block (uncompressed size) + // and if all Blocks have Compressed Size and Uncompressed Size + // fields present, which can be used e.g. for multithreaded + // decompression. + + // Avoid printing Stream and Block lists when they wouldn't be useful. + bool show_blocks = false; + if (lzma_index_stream_count(idx) > 1) { + puts(_(" Streams:")); + puts(_(" Number Blocks Compressed " + "Uncompressed Ratio Check")); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) { + if (iter.stream.block_count > 1) + show_blocks = true; + + printf(" %8s %10s %11s %11s %5s %s\n", + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.stream.block_count, 1), + uint64_to_nicestr( + iter.stream.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr( + iter.stream.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + check_names[iter.stream.flags->check]); + } + } + + if (show_blocks || lzma_index_block_count(idx) + > lzma_index_stream_count(idx) + || message_verbosity_get() >= V_DEBUG) { + puts(_(" Blocks:")); + // FIXME: Number in Stream/file, which one is better? + puts(_(" Stream Number Compressed " + "Uncompressed Ratio Check")); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf(" %8s %10s %11s %11s %5s %-7s", + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.block.number_in_stream, 1), + uint64_to_nicestr(iter.block.total_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr( + iter.block.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) + if (print_check_value(pair, &iter)) + return; + + putchar('\n'); + } + } +} + + +static void +print_info_robot(const lzma_index *idx, file_pair *pair) +{ + printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%s\n", + lzma_index_stream_count(idx), + lzma_index_block_count(idx), + lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx), + get_ratio(lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx)), + get_check_names(lzma_index_checks(idx), false), + pair->src_name); + + if (message_verbosity_get() >= V_VERBOSE) { + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) + printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%" PRIu64 "\t%s\n", + iter.stream.number, + iter.stream.compressed_offset, + iter.stream.uncompressed_offset, + iter.stream.compressed_size, + iter.stream.uncompressed_size, + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + iter.stream.padding, + check_names[iter.stream.flags->check]); + + lzma_index_iter_rewind(&iter); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s", + iter.stream.number, + iter.block.number_in_stream, + iter.block.number_in_file, + iter.block.compressed_file_offset, + iter.block.uncompressed_file_offset, + iter.block.total_size, + iter.block.uncompressed_size, + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) { + putchar('\t'); + if (print_check_value(pair, &iter)) + return; + } + + putchar('\n'); + } + } + + return; +} + + +static void +update_totals(const lzma_index *idx) +{ + // TODO: Integer overflow checks + ++totals.files; + totals.streams += lzma_index_stream_count(idx); + totals.blocks += lzma_index_block_count(idx); + totals.compressed_size += lzma_index_file_size(idx); + totals.uncompressed_size += lzma_index_uncompressed_size(idx); + totals.checks |= lzma_index_checks(idx); + return; +} + + +static void +print_totals_basic(void) +{ + // Print a separator line. + char line[80]; + memset(line, '-', sizeof(line)); + line[sizeof(line) - 1] = '\0'; + puts(line); + + // Print the totals except the file count, which needs + // special handling. + printf("%5s %7s %11s %11s %5s %-7s ", + uint64_to_str(totals.streams, 0), + uint64_to_str(totals.blocks, 1), + uint64_to_nicestr(totals.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(totals.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(totals.compressed_size, + totals.uncompressed_size), + get_check_names(totals.checks, false)); + + // Since we print totals only when there are at least two files, + // the English message will always use "%s files". But some other + // languages need different forms for different plurals so we + // have to translate this string still. + // + // TRANSLATORS: This simply indicates the number of files shown + // by --list even though the format string uses %s. + printf(N_("%s file", "%s files\n", + totals.files <= ULONG_MAX ? totals.files + : (totals.files % 1000000) + 1000000), + uint64_to_str(totals.files, 0)); + + return; +} + + +static void +print_totals_adv(void) +{ + putchar('\n'); + puts(_("Totals:")); + printf(_(" Number of files: %s\n"), + uint64_to_str(totals.files, 0)); + print_adv_helper(totals.streams, totals.blocks, + totals.compressed_size, totals.uncompressed_size, + totals.checks); + + return; +} + + +static void +print_totals_robot(void) +{ + printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\n", + totals.streams, + totals.blocks, + totals.compressed_size, + totals.uncompressed_size, + get_ratio(totals.compressed_size, + totals.uncompressed_size), + get_check_names(totals.checks, false), + totals.files); + + return; +} + + +extern void +list_totals(void) +{ + if (opt_robot) { + // Always print totals in --robot mode. It can be convenient + // in some cases and doesn't complicate usage of the + // single-file case much. + print_totals_robot(); + + } else if (totals.files > 1) { + // For non-robot mode, totals are printed only if there + // is more than one file. + if (message_verbosity_get() <= V_WARNING) + print_totals_basic(); + else + print_totals_adv(); + } + + return; +} + + +extern void +list_file(const char *filename) +{ + if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO) + message_fatal(_("--list works only on .xz files " + "(--format=xz or --format=auto)")); + + message_filename(filename); + + if (filename == stdin_filename) { + message_error(_("--list does not support reading from " + "standard input")); + return; + } + + // Unset opt_stdout so that io_open_src() won't accept special files. + // Set opt_force so that io_open_src() will follow symlinks. + opt_stdout = false; + opt_force = true; + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + lzma_index *idx; + if (!parse_indexes(&idx, pair)) { + // Update the totals that are displayed after all + // the individual files have been listed. + update_totals(idx); + + // We have three main modes: + // - --robot, which has submodes if --verbose is specified + // once or twice + // - Normal --list without --verbose + // - --list with one or two --verbose + if (opt_robot) + print_info_robot(idx, pair); + else if (message_verbosity_get() <= V_WARNING) + print_info_basic(idx, pair); + else + print_info_adv(idx, pair); + + lzma_index_end(idx, NULL); + } + + io_close(pair, false); + return; +} Index: contrib/xz/src/xz/util.c =================================================================== --- contrib/xz/src/xz/util.c (revision 0) +++ contrib/xz/src/xz/util.c (revision 0) @@ -0,0 +1,314 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.c +/// \brief Miscellaneous utility functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include + + +extern void * +xrealloc(void *ptr, size_t size) +{ + assert(size > 0); + + ptr = realloc(ptr, size); + if (ptr == NULL) + message_fatal("%s", strerror(errno)); + + return ptr; +} + + +extern char * +xstrdup(const char *src) +{ + assert(src != NULL); + const size_t size = strlen(src) + 1; + char *dest = xmalloc(size); + return memcpy(dest, src, size); +} + + +extern uint64_t +str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) +{ + uint64_t result = 0; + + // Skip blanks. + while (*value == ' ' || *value == '\t') + ++value; + + // Accept special value "max". Supporting "min" doesn't seem useful. + if (strcmp(value, "max") == 0) + return max; + + if (*value < '0' || *value > '9') + message_fatal(_("%s: Value is not a non-negative " + "decimal integer"), value); + + do { + // Don't overflow. + if (result > (UINT64_MAX - 9) / 10) + goto error; + + result *= 10; + result += *value - '0'; + ++value; + } while (*value >= '0' && *value <= '9'); + + if (*value != '\0') { + // Look for suffix. Originally this supported both base-2 + // and base-10, but since there seems to be little need + // for base-10 in this program, treat everything as base-2 + // and also be more relaxed about the case of the first + // letter of the suffix. + uint64_t multiplier = 0; + if (*value == 'k' || *value == 'K') + multiplier = UINT64_C(1) << 10; + else if (*value == 'm' || *value == 'M') + multiplier = UINT64_C(1) << 20; + else if (*value == 'g' || *value == 'G') + multiplier = UINT64_C(1) << 30; + + ++value; + + // Allow also e.g. Ki, KiB, and KB. + if (*value != '\0' && strcmp(value, "i") != 0 + && strcmp(value, "iB") != 0 + && strcmp(value, "B") != 0) + multiplier = 0; + + if (multiplier == 0) { + message(V_ERROR, _("%s: Invalid multiplier suffix"), + value - 1); + message_fatal(_("Valid suffixes are `KiB' (2^10), " + "`MiB' (2^20), and `GiB' (2^30).")); + } + + // Don't overflow here either. + if (result > UINT64_MAX / multiplier) + goto error; + + result *= multiplier; + } + + if (result < min || result > max) + goto error; + + return result; + +error: + message_fatal(_("Value of the option `%s' must be in the range " + "[%" PRIu64 ", %" PRIu64 "]"), + name, min, max); +} + + +extern uint64_t +round_up_to_mib(uint64_t n) +{ + return (n >> 20) + ((n & ((UINT32_C(1) << 20) - 1)) != 0); +} + + +extern const char * +uint64_to_str(uint64_t value, uint32_t slot) +{ + // 2^64 with thousand separators is 26 bytes plus trailing '\0'. + static char bufs[4][32]; + + assert(slot < ARRAY_SIZE(bufs)); + + static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; + if (thousand == UNKNOWN) { + bufs[slot][0] = '\0'; + snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, + UINT64_C(1)); + thousand = bufs[slot][0] == '1' ? WORKS : BROKEN; + } + + if (thousand == WORKS) + snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, value); + else + snprintf(bufs[slot], sizeof(bufs[slot]), "%" PRIu64, value); + + return bufs[slot]; +} + + +extern const char * +uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min, + enum nicestr_unit unit_max, bool always_also_bytes, + uint32_t slot) +{ + assert(unit_min <= unit_max); + assert(unit_max <= NICESTR_TIB); + + enum nicestr_unit unit = NICESTR_B; + const char *str; + + if ((unit_min == NICESTR_B && value < 10000) + || unit_max == NICESTR_B) { + // The value is shown as bytes. + str = uint64_to_str(value, slot); + } else { + // Scale the value to a nicer unit. Unless unit_min and + // unit_max limit us, we will show at most five significant + // digits with one decimal place. + double d = (double)(value); + do { + d /= 1024.0; + ++unit; + } while (unit < unit_min || (d > 9999.9 && unit < unit_max)); + + str = double_to_str(d); + } + + static const char suffix[5][4] = { "B", "KiB", "MiB", "GiB", "TiB" }; + + // Minimum buffer size: + // 26 2^64 with thousand separators + // 4 " KiB" + // 2 " (" + // 26 2^64 with thousand separators + // 3 " B)" + // 1 '\0' + // 62 Total + static char buf[4][64]; + char *pos = buf[slot]; + size_t left = sizeof(buf[slot]); + my_snprintf(&pos, &left, "%s %s", str, suffix[unit]); + + if (always_also_bytes && value >= 10000) + snprintf(pos, left, " (%s B)", uint64_to_str(value, slot)); + + return buf[slot]; +} + + +extern const char * +double_to_str(double value) +{ + static char buf[64]; + + static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; + if (thousand == UNKNOWN) { + buf[0] = '\0'; + snprintf(buf, sizeof(buf), "%'.1f", 2.0); + thousand = buf[0] == '2' ? WORKS : BROKEN; + } + + if (thousand == WORKS) + snprintf(buf, sizeof(buf), "%'.1f", value); + else + snprintf(buf, sizeof(buf), "%.1f", value); + + return buf; +} + + +extern void +my_snprintf(char **pos, size_t *left, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(*pos, *left, fmt, ap); + va_end(ap); + + // If an error occurred, we want the caller to think that the whole + // buffer was used. This way no more data will be written to the + // buffer. We don't need better error handling here. + if (len < 0 || (size_t)(len) >= *left) { + *left = 0; + } else { + *pos += len; + *left -= len; + } + + return; +} + + +/* +/// \brief Simple quoting to get rid of ASCII control characters +/// +/// This is not so cool and locale-dependent, but should be good enough +/// At least we don't print any control characters on the terminal. +/// +extern char * +str_quote(const char *str) +{ + size_t dest_len = 0; + bool has_ctrl = false; + + while (str[dest_len] != '\0') + if (*(unsigned char *)(str + dest_len++) < 0x20) + has_ctrl = true; + + char *dest = malloc(dest_len + 1); + if (dest != NULL) { + if (has_ctrl) { + for (size_t i = 0; i < dest_len; ++i) + if (*(unsigned char *)(str + i) < 0x20) + dest[i] = '?'; + else + dest[i] = str[i]; + + dest[dest_len] = '\0'; + + } else { + // Usually there are no control characters, + // so we can optimize. + memcpy(dest, str, dest_len + 1); + } + } + + return dest; +} +*/ + + +extern bool +is_empty_filename(const char *filename) +{ + if (filename[0] == '\0') { + message_error(_("Empty filename, skipping")); + return true; + } + + return false; +} + + +extern bool +is_tty_stdin(void) +{ + const bool ret = isatty(STDIN_FILENO); + + if (ret) + message_error(_("Compressed data cannot be read from " + "a terminal")); + + return ret; +} + + +extern bool +is_tty_stdout(void) +{ + const bool ret = isatty(STDOUT_FILENO); + + if (ret) + message_error(_("Compressed data cannot be written to " + "a terminal")); + + return ret; +} Index: contrib/xz/src/xz/private.h =================================================================== --- contrib/xz/src/xz/private.h (revision 0) +++ contrib/xz/src/xz/private.h (revision 0) @@ -0,0 +1,51 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file private.h +/// \brief Common includes, definions, and prototypes +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include "mythread.h" +#include "lzma.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" + +#ifndef STDIN_FILENO +# define STDIN_FILENO (fileno(stdin)) +#endif + +#ifndef STDOUT_FILENO +# define STDOUT_FILENO (fileno(stdout)) +#endif + +#ifndef STDERR_FILENO +# define STDERR_FILENO (fileno(stderr)) +#endif + +#include "main.h" +#include "coder.h" +#include "message.h" +#include "args.h" +#include "hardware.h" +#include "file_io.h" +#include "options.h" +#include "signals.h" +#include "suffix.h" +#include "util.h" +#include "list.h" Index: contrib/xz/src/xz/xz.1 =================================================================== --- contrib/xz/src/xz/xz.1 (revision 0) +++ contrib/xz/src/xz/xz.1 (revision 0) @@ -0,0 +1,1351 @@ +'\" t +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH XZ 1 "2010-03-07" "Tukaani" "XZ Utils" +.SH NAME +xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files +.SH SYNOPSIS +.B xz +.RI [ option ]... +.RI [ file ]... +.PP +.B unxz +is equivalent to +.BR "xz \-\-decompress" . +.br +.B xzcat +is equivalent to +.BR "xz \-\-decompress \-\-stdout" . +.br +.B lzma +is equivalent to +.BR "xz \-\-format=lzma" . +.br +.B unlzma +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress" . +.br +.B lzcat +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress \-\-stdout" . +.PP +When writing scripts that need to decompress files, it is recommended to +always use the name +.B xz +with appropriate arguments +.RB ( "xz \-d" +or +.BR "xz \-dc" ) +instead of the names +.B unxz +and +.BR xzcat. +.SH DESCRIPTION +.B xz +is a general-purpose data compression tool with command line syntax similar to +.BR gzip (1) +and +.BR bzip2 (1). +The native file format is the +.B .xz +format, but also the legacy +.B .lzma +format and raw compressed streams with no container format headers +are supported. +.PP +.B xz +compresses or decompresses each +.I file +according to the selected operation mode. +If no +.I files +are given or +.I file +is +.BR \- , +.B xz +reads from standard input and writes the processed data to standard output. +.B xz +will refuse (display an error and skip the +.IR file ) +to write compressed data to standard output if it is a terminal. Similarly, +.B xz +will refuse to read compressed data from standard input if it is a terminal. +.PP +Unless +.B \-\-stdout +is specified, +.I files +other than +.B \- +are written to a new file whose name is derived from the source +.I file +name: +.IP \(bu 3 +When compressing, the suffix of the target file format +.RB ( .xz +or +.BR .lzma ) +is appended to the source filename to get the target filename. +.IP \(bu 3 +When decompressing, the +.B .xz +or +.B .lzma +suffix is removed from the filename to get the target filename. +.B xz +also recognizes the suffixes +.B .txz +and +.BR .tlz , +and replaces them with the +.B .tar +suffix. +.PP +If the target file already exists, an error is displayed and the +.I file +is skipped. +.PP +Unless writing to standard output, +.B xz +will display a warning and skip the +.I file +if any of the following applies: +.IP \(bu 3 +.I File +is not a regular file. Symbolic links are not followed, thus they +are not considered to be regular files. +.IP \(bu 3 +.I File +has more than one hard link. +.IP \(bu 3 +.I File +has setuid, setgid, or sticky bit set. +.IP \(bu 3 +The operation mode is set to compress, and the +.I file +already has a suffix of the target file format +.RB ( .xz +or +.B .txz +when compressing to the +.B .xz +format, and +.B .lzma +or +.B .tlz +when compressing to the +.B .lzma +format). +.IP \(bu 3 +The operation mode is set to decompress, and the +.I file +doesn't have a suffix of any of the supported file formats +.RB ( .xz , +.BR .txz , +.BR .lzma , +or +.BR .tlz ). +.PP +After successfully compressing or decompressing the +.IR file , +.B xz +copies the owner, group, permissions, access time, and modification time +from the source +.I file +to the target file. If copying the group fails, the permissions are modified +so that the target file doesn't become accessible to users who didn't have +permission to access the source +.IR file . +.B xz +doesn't support copying other metadata like access control lists +or extended attributes yet. +.PP +Once the target file has been successfully closed, the source +.I file +is removed unless +.B \-\-keep +was specified. The source +.I file +is never removed if the output is written to standard output. +.PP +Sending +.B SIGINFO +or +.B SIGUSR1 +to the +.B xz +process makes it print progress information to standard error. +This has only limited use since when standard error is a terminal, using +.B \-\-verbose +will display an automatically updating progress indicator. +.SS "Memory usage" +The memory usage of +.B xz +varies from a few hundred kilobytes to several gigabytes depending on +the compression settings. The settings used when compressing a file +affect also the memory usage of the decompressor. Typically the decompressor +needs only 5\ % to 20\ % of the amount of RAM that the compressor needed when +creating the file. Still, the worst-case memory usage of the decompressor +is several gigabytes. +.PP +To prevent uncomfortable surprises caused by huge memory usage, +.B xz +has a built-in memory usage limiter. While some operating systems provide +ways to limit the memory usage of processes, relying on it wasn't deemed +to be flexible enough. The default limit depends on the total amount of +physical RAM: +.IP \(bu 3 +If 40\ % of RAM is at least 80 MiB, 40\ % of RAM is used as the limit. +.IP \(bu 3 +If 80\ % of RAM is over 80 MiB, 80 MiB is used as the limit. +.IP \(bu 3 +Otherwise 80\ % of RAM is used as the limit. +.PP +When compressing, if the selected compression settings exceed the memory +usage limit, the settings are automatically adjusted downwards and a notice +about this is displayed. As an exception, if the memory usage limit is +exceeded when compressing with +.BR \-\-format=raw , +an error is displayed and +.B xz +will exit with exit status +.BR 1 . +.PP +If source +.I file +cannot be decompressed without exceeding the memory usage limit, an error +message is displayed and the file is skipped. Note that compressed files +may contain many blocks, which may have been compressed with different +settings. Typically all blocks will have roughly the same memory requirements, +but it is possible that a block later in the file will exceed the memory usage +limit, and an error about too low memory usage limit gets displayed after some +data has already been decompressed. +.PP +The absolute value of the active memory usage limit can be seen with +.B \-\-info-memory +or near the bottom of the output of +.BR \-\-long\-help . +The default limit can be overridden with +\fB\-\-memory=\fIlimit\fR. +.SH OPTIONS +.SS "Integer suffixes and special values" +In most places where an integer argument is expected, an optional suffix +is supported to easily indicate large integers. There must be no space +between the integer and the suffix. +.TP +.B KiB +The integer is multiplied by 1,024 (2^10). Also +.BR Ki , +.BR k , +.BR kB , +.BR K , +and +.B KB +are accepted as synonyms for +.BR KiB . +.TP +.B MiB +The integer is multiplied by 1,048,576 (2^20). Also +.BR Mi , +.BR m , +.BR M , +and +.B MB +are accepted as synonyms for +.BR MiB . +.TP +.B GiB +The integer is multiplied by 1,073,741,824 (2^30). Also +.BR Gi , +.BR g , +.BR G , +and +.B GB +are accepted as synonyms for +.BR GiB . +.PP +A special value +.B max +can be used to indicate the maximum integer value supported by the option. +.SS "Operation mode" +If multiple operation mode options are given, the last one takes effect. +.TP +.BR \-z ", " \-\-compress +Compress. This is the default operation mode when no operation mode option +is specified, and no other operation mode is implied from the command name +(for example, +.B unxz +implies +.BR \-\-decompress ). +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Decompress. +.TP +.BR \-t ", " \-\-test +Test the integrity of compressed +.IR files . +No files are created or removed. This option is equivalent to +.B "\-\-decompress \-\-stdout" +except that the decompressed data is discarded instead of being +written to standard output. +.TP +.BR \-l ", " \-\-list +View information about the compressed files. No uncompressed output is +produced, and no files are created or removed. In list mode, the program +cannot read the compressed data from standard input or from other +unseekable sources. +.IP +.B "This feature has not been implemented yet." +.SS "Operation modifiers" +.TP +.BR \-k ", " \-\-keep +Keep (don't delete) the input files. +.TP +.BR \-f ", " \-\-force +This option has several effects: +.RS +.IP \(bu 3 +If the target file already exists, delete it before compressing or +decompressing. +.IP \(bu 3 +Compress or decompress even if the input is a symbolic link to a regular file, +has more than one hard link, or has setuid, setgid, or sticky bit set. +The setuid, setgid, and sticky bits are not copied to the target file. +.IP \(bu 3 +If combined with +.B \-\-decompress +.BR \-\-stdout +and +.B xz +doesn't recognize the type of the source file, +.B xz +will copy the source file as is to standard output. This allows using +.B xzcat +.B \--force +like +.BR cat (1) +for files that have not been compressed with +.BR xz . +Note that in future, +.B xz +might support new compressed file formats, which may make +.B xz +decompress more types of files instead of copying them as is to +standard output. +.BI \-\-format= format +can be used to restrict +.B xz +to decompress only a single file format. +.RE +.TP +.BR \-c ", " \-\-stdout ", " \-\-to-stdout +Write the compressed or decompressed data to standard output instead of +a file. This implies +.BR \-\-keep . +.TP +.B \-\-no\-sparse +Disable creation of sparse files. By default, if decompressing into +a regular file, +.B xz +tries to make the file sparse if the decompressed data contains long +sequences of binary zeros. It works also when writing to standard output +as long as standard output is connected to a regular file, and certain +additional conditions are met to make it safe. Creating sparse files may +save disk space and speed up the decompression by reducing the amount of +disk I/O. +.TP +\fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf +When compressing, use +.I .suf +as the suffix for the target file instead of +.B .xz +or +.BR .lzma . +If not writing to standard output and the source file already has the suffix +.IR .suf , +a warning is displayed and the file is skipped. +.IP +When decompressing, recognize also files with the suffix +.I .suf +in addition to files with the +.BR .xz , +.BR .txz , +.BR .lzma , +or +.B .tlz +suffix. If the source file has the suffix +.IR .suf , +the suffix is removed to get the target filename. +.IP +When compressing or decompressing raw streams +.RB ( \-\-format=raw ), +the suffix must always be specified unless writing to standard output, +because there is no default suffix for raw streams. +.TP +\fB\-\-files\fR[\fB=\fIfile\fR] +Read the filenames to process from +.IR file ; +if +.I file +is omitted, filenames are read from standard input. Filenames must be +terminated with the newline character. A dash +.RB ( \- ) +is taken as a regular filename; it doesn't mean standard input. +If filenames are given also as command line arguments, they are +processed before the filenames read from +.IR file . +.TP +\fB\-\-files0\fR[\fB=\fIfile\fR] +This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except that the +filenames must be terminated with the null character. +.SS "Basic file format and compression options" +.TP +\fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat +Specify the file format to compress or decompress: +.RS +.IP \(bu 3 +.BR auto : +This is the default. When compressing, +.B auto +is equivalent to +.BR xz . +When decompressing, the format of the input file is automatically detected. +Note that raw streams (created with +.BR \-\-format=raw ) +cannot be auto-detected. +.IP \(bu 3 +.BR xz : +Compress to the +.B .xz +file format, or accept only +.B .xz +files when decompressing. +.IP \(bu 3 +.B lzma +or +.BR alone : +Compress to the legacy +.B .lzma +file format, or accept only +.B .lzma +files when decompressing. The alternative name +.B alone +is provided for backwards compatibility with LZMA Utils. +.IP \(bu 3 +.BR raw : +Compress or uncompress a raw stream (no headers). This is meant for advanced +users only. To decode raw streams, you need to set not only +.B \-\-format=raw +but also specify the filter chain, which would normally be stored in the +container format headers. +.RE +.TP +\fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck +Specify the type of the integrity check, which is calculated from the +uncompressed data. This option has an effect only when compressing into the +.B .xz +format; the +.B .lzma +format doesn't support integrity checks. +The integrity check (if any) is verified when the +.B .xz +file is decompressed. +.IP +Supported +.I check +types: +.RS +.IP \(bu 3 +.BR none : +Don't calculate an integrity check at all. This is usually a bad idea. This +can be useful when integrity of the data is verified by other means anyway. +.IP \(bu 3 +.BR crc32 : +Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet). +.IP \(bu 3 +.BR crc64 : +Calculate CRC64 using the polynomial from ECMA-182. This is the default, since +it is slightly better than CRC32 at detecting damaged files and the speed +difference is negligible. +.IP \(bu 3 +.BR sha256 : +Calculate SHA-256. This is somewhat slower than CRC32 and CRC64. +.RE +.IP +Integrity of the +.B .xz +headers is always verified with CRC32. It is not possible to change or +disable it. +.TP +.BR \-0 " ... " \-9 +Select compression preset. If a preset level is specified multiple times, +the last one takes effect. +.IP +The compression preset levels can be categorised roughly into three +categories: +.RS +.IP "\fB\-0\fR ... \fB\-2" +Fast presets with relatively low memory usage. +.B \-1 +and +.B \-2 +should give compression speed and ratios comparable to +.B "bzip2 \-1" +and +.BR "bzip2 \-9" , +respectively. +Currently +.B \-0 +is not very good (not much faster than +.B \-1 +but much worse compression). In future, +.B \-0 +may be indicate some fast algorithm instead of LZMA2. +.IP "\fB\-3\fR ... \fB\-5" +Good compression ratio with low to medium memory usage. +These are significantly slower than levels 0\-2. +.IP "\fB\-6\fR ... \fB\-9" +Excellent compression with medium to high memory usage. These are also +slower than the lower preset levels. The default is +.BR \-6 . +Unless you want to maximize the compression ratio, you probably don't want +a higher preset level than +.B \-7 +due to speed and memory usage. +.RE +.IP +The exact compression settings (filter chain) used by each preset may +vary between +.B xz +versions. The settings may also vary between files being compressed, if +.B xz +determines that modified settings will probably give better compression +ratio without significantly affecting compression time or memory usage. +.IP +Because the settings may vary, the memory usage may vary too. The following +table lists the maximum memory usage of each preset level, which won't be +exceeded even in future versions of +.BR xz . +.IP +.B "FIXME: The table below is just a rough idea." +.RS +.RS +.TS +tab(;); +c c c +n n n. +Preset;Compression;Decompression +\-0;6 MiB;1 MiB +\-1;6 MiB;1 MiB +\-2;10 MiB;1 MiB +\-3;20 MiB;2 MiB +\-4;30 MiB;3 MiB +\-5;60 MiB;6 MiB +\-6;100 MiB;10 MiB +\-7;200 MiB;20 MiB +\-8;400 MiB;40 MiB +\-9;800 MiB;80 MiB +.TE +.RE +.RE +.IP +When compressing, +.B xz +automatically adjusts the compression settings downwards if +the memory usage limit would be exceeded, so it is safe to specify +a high preset level even on systems that don't have lots of RAM. +.TP +.BR \-\-fast " and " \-\-best +These are somewhat misleading aliases for +.B \-0 +and +.BR \-9 , +respectively. +These are provided only for backwards compatibility with LZMA Utils. +Avoid using these options. +.IP +Especially the name of +.B \-\-best +is misleading, because the definition of best depends on the input data, +and that usually people don't want the very best compression ratio anyway, +because it would be very slow. +.TP +.BR \-e ", " \-\-extreme +Modify the compression preset (\fB\-0\fR ... \fB\-9\fR) so that a little bit +better compression ratio can be achieved without increasing memory usage +of the compressor or decompressor (exception: compressor memory usage may +increase a little with presets \fB\-0\fR ... \fB\-2\fR). The downside is that +the compression time will increase dramatically (it can easily double). +.TP +\fB\-M\fR \fIlimit\fR, \fB\-\-memory=\fIlimit +Set the memory usage limit. If this option is specified multiple times, +the last one takes effect. The +.I limit +can be specified in multiple ways: +.RS +.IP \(bu 3 +The +.I limit +can be an absolute value in bytes. Using an integer suffix like +.B MiB +can be useful. Example: +.B "\-\-memory=80MiB" +.IP \(bu 3 +The +.I limit +can be specified as a percentage of physical RAM. Example: +.B "\-\-memory=70%" +.IP \(bu 3 +The +.I limit +can be reset back to its default value by setting it to +.BR 0 . +See the section +.B "Memory usage" +for how the default limit is defined. +.IP \(bu 3 +The memory usage limiting can be effectively disabled by setting +.I limit +to +.BR max . +This isn't recommended. It's usually better to use, for example, +.BR \-\-memory=90% . +.RE +.IP +The current +.I limit +can be seen near the bottom of the output of the +.B \-\-long-help +option. +.TP +\fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads +Specify the maximum number of worker threads to use. The default is +the number of available CPU cores. You can see the current value of +.I threads +near the end of the output of the +.B \-\-long\-help +option. +.IP +The actual number of worker threads can be less than +.I threads +if using more threads would exceed the memory usage limit. +In addition to CPU-intensive worker threads, +.B xz +may use a few auxiliary threads, which don't use a lot of CPU time. +.IP +.B "Multithreaded compression and decompression are not implemented yet," +.B "so this option has no effect for now." +.SS Custom compressor filter chains +A custom filter chain allows specifying the compression settings in detail +instead of relying on the settings associated to the preset levels. +When a custom filter chain is specified, the compression preset level options +(\fB\-0\fR ... \fB\-9\fR and \fB\-\-extreme\fR) are silently ignored. +.PP +A filter chain is comparable to piping on the UN*X command line. +When compressing, the uncompressed input goes to the first filter, whose +output goes to the next filter (if any). The output of the last filter +gets written to the compressed file. The maximum number of filters in +the chain is four, but typically a filter chain has only one or two filters. +.PP +Many filters have limitations where they can be in the filter chain: +some filters can work only as the last filter in the chain, some only +as a non-last filter, and some work in any position in the chain. Depending +on the filter, this limitation is either inherent to the filter design or +exists to prevent security issues. +.PP +A custom filter chain is specified by using one or more filter options in +the order they are wanted in the filter chain. That is, the order of filter +options is significant! When decoding raw streams +.RB ( \-\-format=raw ), +the filter chain is specified in the same order as it was specified when +compressing. +.PP +Filters take filter-specific +.I options +as a comma-separated list. Extra commas in +.I options +are ignored. Every option has a default value, so you need to +specify only those you want to change. +.TP +\fB\-\-lzma1\fR[\fB=\fIoptions\fR], \fB\-\-lzma2\fR[\fB=\fIoptions\fR] +Add LZMA1 or LZMA2 filter to the filter chain. These filter can be used +only as the last filter in the chain. +.IP +LZMA1 is a legacy filter, which is supported almost solely due to the legacy +.B .lzma +file format, which supports only LZMA1. LZMA2 is an updated +version of LZMA1 to fix some practical issues of LZMA1. The +.B .xz +format uses LZMA2, and doesn't support LZMA1 at all. Compression speed and +ratios of LZMA1 and LZMA2 are practically the same. +.IP +LZMA1 and LZMA2 share the same set of +.IR options : +.RS +.TP +.BI preset= preset +Reset all LZMA1 or LZMA2 +.I options +to +.IR preset . +.I Preset +consist of an integer, which may be followed by single-letter preset +modifiers. The integer can be from +.B 0 +to +.BR 9 , +matching the command line options \fB\-0\fR ... \fB\-9\fR. +The only supported modifier is currently +.BR e , +which matches +.BR \-\-extreme . +.IP +The default +.I preset +is +.BR 6 , +from which the default values for the rest of the LZMA1 or LZMA2 +.I options +are taken. +.TP +.BI dict= size +Dictionary (history buffer) size indicates how many bytes of the recently +processed uncompressed data is kept in memory. One method to reduce size of +the uncompressed data is to store distance-length pairs, which +indicate what data to repeat from the dictionary buffer. The bigger +the dictionary, the better the compression ratio usually is, +but dictionaries bigger than the uncompressed data are waste of RAM. +.IP +Typical dictionary size is from 64 KiB to 64 MiB. The minimum is 4 KiB. +The maximum for compression is currently 1.5 GiB. The decompressor already +supports dictionaries up to one byte less than 4 GiB, which is the +maximum for LZMA1 and LZMA2 stream formats. +.IP +Dictionary size has the biggest effect on compression ratio. +Dictionary size and match finder together determine the memory usage of +the LZMA1 or LZMA2 encoder. The same dictionary size is required +for decompressing that was used when compressing, thus the memory usage of +the decoder is determined by the dictionary size used when compressing. +.TP +.BI lc= lc +Specify the number of literal context bits. The minimum is +.B 0 +and the maximum is +.BR 4 ; +the default is +.BR 3 . +In addition, the sum of +.I lc +and +.I lp +must not exceed +.BR 4 . +.TP +.BI lp= lp +Specify the number of literal position bits. The minimum is +.B 0 +and the maximum is +.BR 4 ; +the default is +.BR 0 . +.TP +.BI pb= pb +Specify the number of position bits. The minimum is +.B 0 +and the maximum is +.BR 4 ; +the default is +.BR 2 . +.TP +.BI mode= mode +Compression +.I mode +specifies the function used to analyze the data produced by the match finder. +Supported +.I modes +are +.B fast +and +.BR normal . +The default is +.B fast +for +.I presets +.BR 0 \- 2 +and +.B normal +for +.I presets +.BR 3 \- 9 . +.TP +.BI mf= mf +Match finder has a major effect on encoder speed, memory usage, and +compression ratio. Usually Hash Chain match finders are faster than +Binary Tree match finders. Hash Chains are usually used together with +.B mode=fast +and Binary Trees with +.BR mode=normal . +The memory usage formulas are only rough estimates, +which are closest to reality when +.I dict +is a power of two. +.RS +.TP +.B hc3 +Hash Chain with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.I dict +* 7.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 5.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B hc4 +Hash Chain with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.I dict +* 7.5 +.TP +.B bt2 +Binary Tree with 2-byte hashing +.br +Minimum value for +.IR nice : +2 +.br +Memory usage: +.I dict +* 9.5 +.TP +.B bt3 +Binary Tree with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.I dict +* 11.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 9.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B bt4 +Binary Tree with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.I dict +* 11.5 +.RE +.TP +.BI nice= nice +Specify what is considered to be a nice length for a match. Once a match +of at least +.I nice +bytes is found, the algorithm stops looking for possibly better matches. +.IP +.I nice +can be 2\-273 bytes. Higher values tend to give better compression ratio +at expense of speed. The default depends on the +.I preset +level. +.TP +.BI depth= depth +Specify the maximum search depth in the match finder. The default is the +special value +.BR 0 , +which makes the compressor determine a reasonable +.I depth +from +.I mf +and +.IR nice . +.IP +Using very high values for +.I depth +can make the encoder extremely slow with carefully crafted files. +Avoid setting the +.I depth +over 1000 unless you are prepared to interrupt the compression in case it +is taking too long. +.RE +.IP +When decoding raw streams +.RB ( \-\-format=raw ), +LZMA2 needs only the value of +.BR dict . +LZMA1 needs also +.BR lc , +.BR lp , +and +.BR pb. +.TP +\fB\-\-x86\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-powerpc\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-ia64\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-arm\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-armthumb\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-sparc\fR[\fB=\fIoptions\fR] +Add a branch/call/jump (BCJ) filter to the filter chain. These filters +can be used only as non-last filter in the filter chain. +.IP +A BCJ filter converts relative addresses in the machine code to their +absolute counterparts. This doesn't change the size of the data, but +it increases redundancy, which allows e.g. LZMA2 to get better +compression ratio. +.IP +The BCJ filters are always reversible, so using a BCJ filter for wrong +type of data doesn't cause any data loss. However, applying a BCJ filter +for wrong type of data is a bad idea, because it tends to make the +compression ratio worse. +.IP +Different instruction sets have have different alignment: +.RS +.RS +.TS +tab(;); +l n l +l n l. +Filter;Alignment;Notes +x86;1;32-bit and 64-bit x86 +PowerPC;4;Big endian only +ARM;4;Little endian only +ARM-Thumb;2;Little endian only +IA-64;16;Big or little endian +SPARC;4;Big or little endian +.TE +.RE +.RE +.IP +Since the BCJ-filtered data is usually compressed with LZMA2, the compression +ratio may be improved slightly if the LZMA2 options are set to match the +alignment of the selected BCJ filter. For example, with the IA-64 filter, +it's good to set +.B pb=4 +with LZMA2 (2^4=16). The x86 filter is an exception; it's usually good to +stick to LZMA2's default four-byte alignment when compressing x86 executables. +.IP +All BCJ filters support the same +.IR options : +.RS +.TP +.BI start= offset +Specify the start +.I offset +that is used when converting between relative and absolute addresses. +The +.I offset +must be a multiple of the alignment of the filter (see the table above). +The default is zero. In practice, the default is good; specifying +a custom +.I offset +is almost never useful. +.IP +Specifying a non-zero start +.I offset +is probably useful only if the executable has multiple sections, and there +are many cross-section jumps or calls. Applying a BCJ filter separately for +each section with proper start offset and then compressing the result as +a single chunk may give some improvement in compression ratio compared +to applying the BCJ filter with the default +.I offset +for the whole executable. +.RE +.TP +\fB\-\-delta\fR[\fB=\fIoptions\fR] +Add Delta filter to the filter chain. The Delta filter +can be used only as non-last filter in the filter chain. +.IP +Currently only simple byte-wise delta calculation is supported. It can +be useful when compressing e.g. uncompressed bitmap images or uncompressed +PCM audio. However, special purpose algorithms may give significantly better +results than Delta + LZMA2. This is true especially with audio, which +compresses faster and better e.g. with FLAC. +.IP +Supported +.IR options : +.RS +.TP +.BI dist= distance +Specify the +.I distance +of the delta calculation as bytes. +.I distance +must be 1\-256. The default is 1. +.IP +For example, with +.B dist=2 +and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be +A1 B1 01 02 01 02 01 02. +.RE +.SS "Other options" +.TP +.BR \-q ", " \-\-quiet +Suppress warnings and notices. Specify this twice to suppress errors too. +This option has no effect on the exit status. That is, even if a warning +was suppressed, the exit status to indicate a warning is still used. +.TP +.BR \-v ", " \-\-verbose +Be verbose. If standard error is connected to a terminal, +.B xz +will display a progress indicator. +Specifying +.B \-\-verbose +twice will give even more verbose output (useful mostly for debugging). +.IP +The progress indicator shows the following information: +.RS +.IP \(bu 3 +Completion percentage is shown if the size of the input file is known. +That is, percentage cannot be shown in pipes. +.IP \(bu 3 +Amount of compressed data produced (compressing) or consumed (decompressing). +.IP \(bu 3 +Amount of uncompressed data consumed (compressing) or produced +(decompressing). +.IP \(bu 3 +Compression ratio, which is calculated by dividing the amount of +compressed data processed so far by the amount of uncompressed data +processed so far. +.IP \(bu 3 +Compression or decompression speed. This is measured as the amount of +uncompressed data consumed (compression) or produced (decompression) +per second. It is shown once a few seconds have passed since +.B xz +started processing the file. +.IP \(bu 3 +Elapsed time or estimated time remaining. +Elapsed time is displayed in the format M:SS or H:MM:SS. +The estimated remaining time is displayed in a less precise format +which never has colons, for example, 2 min 30 s. The estimate can +be shown only when the size of the input file is known and a couple of +seconds have already passed since +.B xz +started processing the file. +.RE +.IP +When standard error is not a terminal, +.B \-\-verbose +will make +.B xz +print the filename, compressed size, uncompressed size, compression ratio, +speed, and elapsed time on a single line to standard error after +compressing or decompressing the file. If operating took at least a few +seconds, also the speed and elapsed time are printed. If the operation +didn't finish, for example due to user interruption, also the completion +percentage is printed if the size of the input file is known. +.TP +.BR \-Q ", " \-\-no\-warn +Don't set the exit status to +.B 2 +even if a condition worth a warning was detected. This option doesn't affect +the verbosity level, thus both +.B \-\-quiet +and +.B \-\-no\-warn +have to be used to not display warnings and to not alter the exit status. +.TP +.B \-\-robot +Print messages in a machine-parsable format. This is intended to ease +writing frontends that want to use +.B xz +instead of liblzma, which may be the case with various scripts. The output +with this option enabled is meant to be stable across +.B xz +releases. Currently +.B \-\-robot +is implemented only for +.B \-\-info\-memory +and +.BR \-\-version , +but the idea is to make it usable for actual compression +and decompression too. +.TP +.BR \-\-info-memory +Display the current memory usage limit in human-readable format on +a single line, and exit successfully. To see how much RAM +.B xz +thinks your system has, use +.BR "\-\-memory=100% \-\-info\-memory" . +To get machine-parsable output +(memory usage limit as bytes without thousand separators), specify +.B \-\-robot +before +.BR \-\-info-memory . +.TP +.BR \-h ", " \-\-help +Display a help message describing the most commonly used options, +and exit successfully. +.TP +.BR \-H ", " \-\-long\-help +Display a help message describing all features of +.BR xz , +and exit successfully +.TP +.BR \-V ", " \-\-version +Display the version number of +.B xz +and liblzma in human readable format. To get machine-parsable output, specify +.B \-\-robot +before +.BR \-\-version . +.SH "EXIT STATUS" +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.TP +.B 2 +Something worth a warning occurred, but no actual errors occurred. +.PP +Notices (not warnings or errors) printed on standard error don't affect +the exit status. +.SH ENVIRONMENT +.TP +.B XZ_OPT +A space-separated list of options is parsed from +.B XZ_OPT +before parsing the options given on the command line. Note that only +options are parsed from +.BR XZ_OPT ; +all non-options are silently ignored. Parsing is done with +.BR getopt_long (3) +which is used also for the command line arguments. +.SH "LZMA UTILS COMPATIBILITY" +The command line syntax of +.B xz +is practically a superset of +.BR lzma , +.BR unlzma , +and +.BR lzcat +as found from LZMA Utils 4.32.x. In most cases, it is possible to replace +LZMA Utils with XZ Utils without breaking existing scripts. There are some +incompatibilities though, which may sometimes cause problems. +.SS "Compression preset levels" +The numbering of the compression level presets is not identical in +.B xz +and LZMA Utils. +The most important difference is how dictionary sizes are mapped to different +presets. Dictionary size is roughly equal to the decompressor memory usage. +.RS +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils +\-1;64 KiB;64 KiB +\-2;512 KiB;1 MiB +\-3;1 MiB;512 KiB +\-4;2 MiB;1 MiB +\-5;4 MiB;2 MiB +\-6;8 MiB;4 MiB +\-7;16 MiB;8 MiB +\-8;32 MiB;16 MiB +\-9;64 MiB;32 MiB +.TE +.RE +.PP +The dictionary size differences affect the compressor memory usage too, +but there are some other differences between LZMA Utils and XZ Utils, which +make the difference even bigger: +.RS +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils 4.32.x +\-1;2 MiB;2 MiB +\-2;5 MiB;12 MiB +\-3;13 MiB;12 MiB +\-4;25 MiB;16 MiB +\-5;48 MiB;26 MiB +\-6;94 MiB;45 MiB +\-7;186 MiB;83 MiB +\-8;370 MiB;159 MiB +\-9;674 MiB;311 MiB +.TE +.RE +.PP +The default preset level in LZMA Utils is +.B \-7 +while in XZ Utils it is +.BR \-6 , +so both use 8 MiB dictionary by default. +.SS "Streamed vs. non-streamed .lzma files" +Uncompressed size of the file can be stored in the +.B .lzma +header. LZMA Utils does that when compressing regular files. +The alternative is to mark that uncompressed size is unknown and +use end of payload marker to indicate where the decompressor should stop. +LZMA Utils uses this method when uncompressed size isn't known, which is +the case for example in pipes. +.PP +.B xz +supports decompressing +.B .lzma +files with or without end of payload marker, but all +.B .lzma +files created by +.B xz +will use end of payload marker and have uncompressed size marked as unknown +in the +.B .lzma +header. This may be a problem in some (uncommon) situations. For example, a +.B .lzma +decompressor in an embedded device might work only with files that have known +uncompressed size. If you hit this problem, you need to use LZMA Utils or +LZMA SDK to create +.B .lzma +files with known uncompressed size. +.SS "Unsupported .lzma files" +The +.B .lzma +format allows +.I lc +values up to 8, and +.I lp +values up to 4. LZMA Utils can decompress files with any +.I lc +and +.IR lp , +but always creates files with +.B lc=3 +and +.BR lp=0 . +Creating files with other +.I lc +and +.I lp +is possible with +.B xz +and with LZMA SDK. +.PP +The implementation of the LZMA1 filter in liblzma requires +that the sum of +.I lc +and +.I lp +must not exceed 4. Thus, +.B .lzma +files which exceed this limitation, cannot be decompressed with +.BR xz . +.PP +LZMA Utils creates only +.B .lzma +files which have dictionary size of +.RI "2^" n +(a power of 2), but accepts files with any dictionary size. +liblzma accepts only +.B .lzma +files which have dictionary size of +.RI "2^" n +or +.RI "2^" n " + 2^(" n "\-1)." +This is to decrease false positives when detecting +.B .lzma +files. +.PP +These limitations shouldn't be a problem in practice, since practically all +.B .lzma +files have been compressed with settings that liblzma will accept. +.SS "Trailing garbage" +When decompressing, LZMA Utils silently ignore everything after the first +.B .lzma +stream. In most situations, this is a bug. This also means that LZMA Utils +don't support decompressing concatenated +.B .lzma +files. +.PP +If there is data left after the first +.B .lzma +stream, +.B xz +considers the file to be corrupt. This may break obscure scripts which have +assumed that trailing garbage is ignored. +.SH NOTES +.SS Compressed output may vary +The exact compressed output produced from the same uncompressed input file +may vary between XZ Utils versions even if compression options are identical. +This is because the encoder can be improved (faster or better compression) +without affecting the file format. The output can vary even between different +builds of the same XZ Utils version, if different build options are used. +.PP +The above means that implementing +.B \-\-rsyncable +to create rsyncable +.B .xz +files is not going to happen without freezing a part of the encoder +implementation, which can then be used with +.BR \-\-rsyncable . +.SS Embedded .xz decompressors +Embedded +.B .xz +decompressor implementations like XZ Embedded don't necessarily support files +created with +.I check +types other than +.B none +and +.BR crc32 . +Since the default is \fB\-\-check=\fIcrc64\fR, you must use +.B \-\-check=none +or +.B \-\-check=crc32 +when creating files for embedded systems. +.PP +Outside embedded systems, all +.B .xz +format decompressors support all the +.I check +types, or at least are able to decompress the file without verifying the +integrity check if the particular +.I check +is not supported. +.PP +XZ Embedded supports BCJ filters, but only with the default start offset. +.SH "SEE ALSO" +.BR xzdec (1), +.BR gzip (1), +.BR bzip2 (1) +.PP +XZ Utils: +.br +XZ Embedded: +.br +LZMA SDK: Index: contrib/xz/src/xz/list.h =================================================================== --- contrib/xz/src/xz/list.h (revision 0) +++ contrib/xz/src/xz/list.h (revision 0) @@ -0,0 +1,18 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.h +/// \brief List information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief List information about the given .xz file +extern void list_file(const char *filename); + + +/// \brief Show the totals after all files have been listed +extern void list_totals(void); Index: contrib/xz/src/xz/util.h =================================================================== --- contrib/xz/src/xz/util.h (revision 0) +++ contrib/xz/src/xz/util.h (revision 0) @@ -0,0 +1,129 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.h +/// \brief Miscellaneous utility functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Safe malloc() that never returns NULL +/// +/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when +/// there are files open for writing, that should be cleaned up +/// before exiting. +#define xmalloc(size) xrealloc(NULL, size) + + +/// \brief Safe realloc() that never returns NULL +extern void *xrealloc(void *ptr, size_t size); + + +/// \brief Safe strdup() that never returns NULL +extern char *xstrdup(const char *src); + + +/// \brief Fancy version of strtoull() +/// +/// \param name Name of the option to show in case of an error +/// \param value String containing the number to be parsed; may +/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" +/// \param min Minimum valid value +/// \param max Maximum valid value +/// +/// \return Parsed value that is in the range [min, max]. Does not return +/// if an error occurs. +/// +extern uint64_t str_to_uint64(const char *name, const char *value, + uint64_t min, uint64_t max); + + +/// \brief Round an integer up to the next full MiB and convert to MiB +/// +/// This is used when printing memory usage and limit. +extern uint64_t round_up_to_mib(uint64_t n); + + +/// \brief Convert uint64_t to a string +/// +/// Convert the given value to a string with locale-specific thousand +/// separators, if supported by the snprintf() implementation. The string +/// is stored into an internal static buffer indicated by the slot argument. +/// A pointer to the selected buffer is returned. +/// +/// This function exists, because non-POSIX systems don't support thousand +/// separator in format strings. Solving the problem in a simple way doesn't +/// work, because it breaks gettext (specifically, the xgettext tool). +extern const char *uint64_to_str(uint64_t value, uint32_t slot); + + +enum nicestr_unit { + NICESTR_B, + NICESTR_KIB, + NICESTR_MIB, + NICESTR_GIB, + NICESTR_TIB, +}; + + +/// \brief Convert uint64_t to a nice human readable string +/// +/// This is like uint64_to_str() but uses B, KiB, MiB, GiB, or TiB suffix +/// and optionally includes the exact size in parenthesis. +/// +/// \param value Value to be printed +/// \param unit_min Smallest unit to use. This and unit_max are used +/// e.g. when showing the progress indicator to force +/// the unit to MiB. +/// \param unit_max Biggest unit to use. assert(unit_min <= unit_max). +/// \param always_also_bytes +/// Show also the exact byte value in parenthesis +/// if the nicely formatted string uses bigger unit +/// than bytes. +/// \param slot Which static buffer to use to hold the string. +/// This is shared with uint64_to_str(). +/// +/// \return Pointer to statically allocated buffer containing the string. +/// +/// \note This uses double_to_str() internally so the static buffer +/// in double_to_str() will be overwritten. +/// +extern const char *uint64_to_nicestr(uint64_t value, + enum nicestr_unit unit_min, enum nicestr_unit unit_max, + bool always_also_bytes, uint32_t slot); + + +/// \brief Convert double to a string with one decimal place +/// +/// This is like uint64_to_str() except that this converts a double and +/// uses exactly one decimal place. +extern const char *double_to_str(double value); + + +/// \brief Wrapper for snprintf() to help constructing a string in pieces +/// +/// A maximum of *left bytes is written starting from *pos. *pos and *left +/// are updated accordingly. +extern void my_snprintf(char **pos, size_t *left, const char *fmt, ...) + lzma_attribute((format(printf, 3, 4))); + + +/// \brief Check if filename is empty and print an error message +extern bool is_empty_filename(const char *filename); + + +/// \brief Test if stdin is a terminal +/// +/// If stdin is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdin(void); + + +/// \brief Test if stdout is a terminal +/// +/// If stdout is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdout(void); Index: contrib/xz/src/xz/main.c =================================================================== --- contrib/xz/src/xz/main.c (revision 0) +++ contrib/xz/src/xz/main.c (revision 0) @@ -0,0 +1,272 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.c +/// \brief main() +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include + + +/// Exit status to use. This can be changed with set_exit_status(). +static enum exit_status_type exit_status = E_SUCCESS; + +/// True if --no-warn is specified. When this is true, we don't set +/// the exit status to E_WARNING when something worth a warning happens. +static bool no_warn = false; + + +extern void +set_exit_status(enum exit_status_type new_status) +{ + assert(new_status == E_WARNING || new_status == E_ERROR); + + if (exit_status != E_ERROR) + exit_status = new_status; + + return; +} + + +extern void +set_exit_no_warn(void) +{ + no_warn = true; + return; +} + + +static const char * +read_name(const args_info *args) +{ + // FIXME: Maybe we should have some kind of memory usage limit here + // like the tool has for the actual compression and decompression. + // Giving some huge text file with --files0 makes us to read the + // whole file in RAM. + static char *name = NULL; + static size_t size = 256; + + // Allocate the initial buffer. This is never freed, since after it + // is no longer needed, the program exits very soon. It is safe to + // use xmalloc() and xrealloc() in this function, because while + // executing this function, no files are open for writing, and thus + // there's no need to cleanup anything before exiting. + if (name == NULL) + name = xmalloc(size); + + // Write position in name + size_t pos = 0; + + // Read one character at a time into name. + while (!user_abort) { + const int c = fgetc(args->files_file); + + if (ferror(args->files_file)) { + // Take care of EINTR since we have established + // the signal handlers already. + if (errno == EINTR) + continue; + + message_error(_("%s: Error reading filenames: %s"), + args->files_name, strerror(errno)); + return NULL; + } + + if (feof(args->files_file)) { + if (pos != 0) + message_error(_("%s: Unexpected end of input " + "when reading filenames"), + args->files_name); + + return NULL; + } + + if (c == args->files_delim) { + // We allow consecutive newline (--files) or '\0' + // characters (--files0), and ignore such empty + // filenames. + if (pos == 0) + continue; + + // A non-empty name was read. Terminate it with '\0' + // and return it. + name[pos] = '\0'; + return name; + } + + if (c == '\0') { + // A null character was found when using --files, + // which expects plain text input separated with + // newlines. + message_error(_("%s: Null character found when " + "reading filenames; maybe you meant " + "to use `--files0' instead " + "of `--files'?"), args->files_name); + return NULL; + } + + name[pos++] = c; + + // Allocate more memory if needed. There must always be space + // at least for one character to allow terminating the string + // with '\0'. + if (pos == size) { + size *= 2; + name = xrealloc(name, size); + } + } + + return NULL; +} + + +int +main(int argc, char **argv) +{ + // Set up the progname variable. + tuklib_progname_init(argv); + + // Initialize the file I/O. This makes sure that + // stdin, stdout, and stderr are something valid. + io_init(); + + // Set up the locale and message translations. + tuklib_gettext_init(PACKAGE, LOCALEDIR); + + // Initialize handling of error/warning/other messages. + message_init(); + + // Set hardware-dependent default values. These can be overriden + // on the command line, thus this must be done before args_parse(). + hardware_init(); + + // Parse the command line arguments and get an array of filenames. + // This doesn't return if something is wrong with the command line + // arguments. If there are no arguments, one filename ("-") is still + // returned to indicate stdin. + args_info args; + args_parse(&args, argc, argv); + + if (opt_mode != MODE_LIST && opt_robot) + message_fatal(_("Compression and decompression with --robot " + "are not supported yet.")); + + // Tell the message handling code how many input files there are if + // we know it. This way the progress indicator can show it. + if (args.files_name != NULL) + message_set_files(0); + else + message_set_files(args.arg_count); + + // Refuse to write compressed data to standard output if it is + // a terminal. + if (opt_mode == MODE_COMPRESS) { + if (opt_stdout || (args.arg_count == 1 + && strcmp(args.arg_names[0], "-") == 0)) { + if (is_tty_stdout()) { + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + } + + // Set up the signal handlers. We don't need these before we + // start the actual action and not in --list mode, so this is + // done after parsing the command line arguments. + // + // It's good to keep signal handlers in normal compression and + // decompression modes even when only writing to stdout, because + // we might need to restore O_APPEND flag on stdout before exiting. + // In --test mode, signal handlers aren't really needed, but let's + // keep them there for consistency with normal decompression. + if (opt_mode != MODE_LIST) + signals_init(); + + // coder_run() handles compression, decompression, and testing. + // list_file() is for --list. + void (*run)(const char *filename) = opt_mode == MODE_LIST + ? &list_file : &coder_run; + + // Process the files given on the command line. Note that if no names + // were given, args_parse() gave us a fake "-" filename. + for (size_t i = 0; i < args.arg_count && !user_abort; ++i) { + if (strcmp("-", args.arg_names[i]) == 0) { + // Processing from stdin to stdout. Check that we + // aren't writing compressed data to a terminal or + // reading it from a terminal. + if (opt_mode == MODE_COMPRESS) { + if (is_tty_stdout()) + continue; + } else if (is_tty_stdin()) { + continue; + } + + // It doesn't make sense to compress data from stdin + // if we are supposed to read filenames from stdin + // too (enabled with --files or --files0). + if (args.files_name == stdin_filename) { + message_error(_("Cannot read data from " + "standard input when " + "reading filenames " + "from standard input")); + continue; + } + + // Replace the "-" with a special pointer, which is + // recognized by coder_run() and other things. + // This way error messages get a proper filename + // string and the code still knows that it is + // handling the special case of stdin. + args.arg_names[i] = (char *)stdin_filename; + } + + // Do the actual compression or decompression. + run(args.arg_names[i]); + } + + // If --files or --files0 was used, process the filenames from the + // given file or stdin. Note that here we don't consider "-" to + // indicate stdin like we do with the command line arguments. + if (args.files_name != NULL) { + // read_name() checks for user_abort so we don't need to + // check it as loop termination condition. + while (true) { + const char *name = read_name(&args); + if (name == NULL) + break; + + // read_name() doesn't return empty names. + assert(name[0] != '\0'); + run(name); + } + + if (args.files_name != stdin_filename) + (void)fclose(args.files_file); + } + + // All files have now been handled. If in --list mode, display + // the totals before exiting. We don't have signal handlers + // enabled in --list mode, so we don't need to check user_abort. + if (opt_mode == MODE_LIST) { + assert(!user_abort); + list_totals(); + } + + // If we have got a signal, raise it to kill the program instead + // of calling tuklib_exit(). + signals_exit(); + + // Suppress the exit status indicating a warning if --no-warn + // was specified. + if (exit_status == E_WARNING && no_warn) + exit_status = E_SUCCESS; + + tuklib_exit(exit_status, E_ERROR, + message_verbosity_get() != V_SILENT); +} Index: contrib/xz/src/xz/message.c =================================================================== --- contrib/xz/src/xz/message.c (revision 0) +++ contrib/xz/src/xz/message.c (revision 0) @@ -0,0 +1,1189 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.c +/// \brief Printing messages +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#ifdef HAVE_SYS_TIME_H +# include +#endif + +#include + + +/// Number of the current file +static unsigned int files_pos = 0; + +/// Total number of input files; zero if unknown. +static unsigned int files_total; + +/// Verbosity level +static enum message_verbosity verbosity = V_WARNING; + +/// Filename which we will print with the verbose messages +static const char *filename; + +/// True once the a filename has been printed to stderr as part of progress +/// message. If automatic progress updating isn't enabled, this becomes true +/// after the first progress message has been printed due to user sending +/// SIGINFO, SIGUSR1, or SIGALRM. Once this variable is true, we will print +/// an empty line before the next filename to make the output more readable. +static bool first_filename_printed = false; + +/// This is set to true when we have printed the current filename to stderr +/// as part of a progress message. This variable is useful only if not +/// updating progress automatically: if user sends many SIGINFO, SIGUSR1, or +/// SIGALRM signals, we won't print the name of the same file multiple times. +static bool current_filename_printed = false; + +/// True if we should print progress indicator and update it automatically +/// if also verbose >= V_VERBOSE. +static bool progress_automatic; + +/// True if message_progress_start() has been called but +/// message_progress_end() hasn't been called yet. +static bool progress_started = false; + +/// This is true when a progress message was printed and the cursor is still +/// on the same line with the progress message. In that case, a newline has +/// to be printed before any error messages. +static bool progress_active = false; + +/// Pointer to lzma_stream used to do the encoding or decoding. +static lzma_stream *progress_strm; + +/// Expected size of the input stream is needed to show completion percentage +/// and estimate remaining time. +static uint64_t expected_in_size; + +/// Time when we started processing the file +static uint64_t start_time; + + +// Use alarm() and SIGALRM when they are supported. This has two minor +// advantages over the alternative of polling gettimeofday(): +// - It is possible for the user to send SIGINFO, SIGUSR1, or SIGALRM to +// get intermediate progress information even when --verbose wasn't used +// or stderr is not a terminal. +// - alarm() + SIGALRM seems to have slightly less overhead than polling +// gettimeofday(). +#ifdef SIGALRM + +/// The signal handler for SIGALRM sets this to true. It is set back to false +/// once the progress message has been updated. +static volatile sig_atomic_t progress_needs_updating = false; + +/// Signal handler for SIGALRM +static void +progress_signal_handler(int sig lzma_attribute((unused))) +{ + progress_needs_updating = true; + return; +} + +#else + +/// This is true when progress message printing is wanted. Using the same +/// variable name as above to avoid some ifdefs. +static bool progress_needs_updating = false; + +/// Elapsed time when the next progress message update should be done. +static uint64_t progress_next_update; + +#endif + + +/// Get the current time as microseconds since epoch +static uint64_t +my_time(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (uint64_t)(tv.tv_sec) * UINT64_C(1000000) + tv.tv_usec; +} + + +extern void +message_init(void) +{ + // If --verbose is used, we use a progress indicator if and only + // if stderr is a terminal. If stderr is not a terminal, we print + // verbose information only after finishing the file. As a special + // exception, even if --verbose was not used, user can send SIGALRM + // to make us print progress information once without automatic + // updating. + progress_automatic = isatty(STDERR_FILENO); + + // Commented out because COLUMNS is rarely exported to environment. + // Most users have at least 80 columns anyway, let's think something + // fancy here if enough people complain. +/* + if (progress_automatic) { + // stderr is a terminal. Check the COLUMNS environment + // variable to see if the terminal is wide enough. If COLUMNS + // doesn't exist or it has some unparsable value, we assume + // that the terminal is wide enough. + const char *columns_str = getenv("COLUMNS"); + if (columns_str != NULL) { + char *endptr; + const long columns = strtol(columns_str, &endptr, 10); + if (*endptr != '\0' || columns < 80) + progress_automatic = false; + } + } +*/ + +#ifdef SIGALRM + // At least DJGPP lacks SA_RESTART. It's not essential for us (the + // rest of the code can handle interrupted system calls), so just + // define it zero. +# ifndef SA_RESTART +# define SA_RESTART 0 +# endif + // Establish the signal handlers which set a flag to tell us that + // progress info should be updated. Since these signals don't + // require any quick action, we set SA_RESTART. + static const int sigs[] = { +#ifdef SIGALRM + SIGALRM, +#endif +#ifdef SIGINFO + SIGINFO, +#endif +#ifdef SIGUSR1 + SIGUSR1, +#endif + }; + + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sa.sa_handler = &progress_signal_handler; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + if (sigaction(sigs[i], &sa, NULL)) + message_signal_handler(); +#endif + + return; +} + + +extern void +message_verbosity_increase(void) +{ + if (verbosity < V_DEBUG) + ++verbosity; + + return; +} + + +extern void +message_verbosity_decrease(void) +{ + if (verbosity > V_SILENT) + --verbosity; + + return; +} + + +extern enum message_verbosity +message_verbosity_get(void) +{ + return verbosity; +} + + +extern void +message_set_files(unsigned int files) +{ + files_total = files; + return; +} + + +/// Prints the name of the current file if it hasn't been printed already, +/// except if we are processing exactly one stream from stdin to stdout. +/// I think it looks nicer to not print "(stdin)" when --verbose is used +/// in a pipe and no other files are processed. +static void +print_filename(void) +{ + if (files_total != 1 || filename != stdin_filename) { + signals_block(); + + FILE *file = opt_mode == MODE_LIST ? stdout : stderr; + + // If a file was already processed, put an empty line + // before the next filename to improve readability. + if (first_filename_printed) + fputc('\n', file); + + first_filename_printed = true; + current_filename_printed = true; + + // If we don't know how many files there will be due + // to usage of --files or --files0. + if (files_total == 0) + fprintf(file, "%s (%u)\n", filename, + files_pos); + else + fprintf(file, "%s (%u/%u)\n", filename, + files_pos, files_total); + + signals_unblock(); + } + + return; +} + + +extern void +message_filename(const char *src_name) +{ + // Start numbering the files starting from one. + ++files_pos; + filename = src_name; + + if (verbosity >= V_VERBOSE + && (progress_automatic || opt_mode == MODE_LIST)) + print_filename(); + else + current_filename_printed = false; + + return; +} + + +extern void +message_progress_start(lzma_stream *strm, uint64_t in_size) +{ + // Store the pointer to the lzma_stream used to do the coding. + // It is needed to find out the position in the stream. + progress_strm = strm; + + // Store the processing start time of the file and its expected size. + // If we aren't printing any statistics, then these are unused. But + // since it is possible that the user sends us a signal to show + // statistics, we need to have these available anyway. + start_time = my_time(); + expected_in_size = in_size; + + // Indicate that progress info may need to be printed before + // printing error messages. + progress_started = true; + + // If progress indicator is wanted, print the filename and possibly + // the file count now. + if (verbosity >= V_VERBOSE && progress_automatic) { + // Start the timer to display the first progress message + // after one second. An alternative would be to show the + // first message almost immediately, but delaying by one + // second looks better to me, since extremely early + // progress info is pretty much useless. +#ifdef SIGALRM + // First disable a possibly existing alarm. + alarm(0); + progress_needs_updating = false; + alarm(1); +#else + progress_needs_updating = true; + progress_next_update = 1000000; +#endif + } + + return; +} + + +/// Make the string indicating completion percentage. +static const char * +progress_percentage(uint64_t in_pos) +{ + // If the size of the input file is unknown or the size told us is + // clearly wrong since we have processed more data than the alleged + // size of the file, show a static string indicating that we have + // no idea of the completion percentage. + if (expected_in_size == 0 || in_pos > expected_in_size) + return "--- %"; + + // Never show 100.0 % before we actually are finished. + double percentage = (double)(in_pos) / (double)(expected_in_size) + * 99.9; + + static char buf[sizeof("99.9 %")]; + snprintf(buf, sizeof(buf), "%.1f %%", percentage); + + return buf; +} + + +/// Make the string containing the amount of input processed, amount of +/// output produced, and the compression ratio. +static const char * +progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) +{ + // This is enough to hold sizes up to about 99 TiB if thousand + // separator is used, or about 1 PiB without thousand separator. + // After that the progress indicator will look a bit silly, since + // the compression ratio no longer fits with three decimal places. + static char buf[36]; + + char *pos = buf; + size_t left = sizeof(buf); + + // Print the sizes. If this the final message, use more reasonable + // units than MiB if the file was small. + const enum nicestr_unit unit_min = final ? NICESTR_B : NICESTR_MIB; + my_snprintf(&pos, &left, "%s / %s", + uint64_to_nicestr(compressed_pos, + unit_min, NICESTR_TIB, false, 0), + uint64_to_nicestr(uncompressed_pos, + unit_min, NICESTR_TIB, false, 1)); + + // Avoid division by zero. If we cannot calculate the ratio, set + // it to some nice number greater than 10.0 so that it gets caught + // in the next if-clause. + const double ratio = uncompressed_pos > 0 + ? (double)(compressed_pos) / (double)(uncompressed_pos) + : 16.0; + + // If the ratio is very bad, just indicate that it is greater than + // 9.999. This way the length of the ratio field stays fixed. + if (ratio > 9.999) + snprintf(pos, left, " > %.3f", 9.999); + else + snprintf(pos, left, " = %.3f", ratio); + + return buf; +} + + +/// Make the string containing the processing speed of uncompressed data. +static const char * +progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) +{ + // Don't print the speed immediately, since the early values look + // somewhat random. + if (elapsed < 3000000) + return ""; + + static const char unit[][8] = { + "KiB/s", + "MiB/s", + "GiB/s", + }; + + size_t unit_index = 0; + + // Calculate the speed as KiB/s. + double speed = (double)(uncompressed_pos) + / ((double)(elapsed) * (1024.0 / 1e6)); + + // Adjust the unit of the speed if needed. + while (speed > 999.0) { + speed /= 1024.0; + if (++unit_index == ARRAY_SIZE(unit)) + return ""; // Way too fast ;-) + } + + // Use decimal point only if the number is small. Examples: + // - 0.1 KiB/s + // - 9.9 KiB/s + // - 99 KiB/s + // - 999 KiB/s + static char buf[sizeof("999 GiB/s")]; + snprintf(buf, sizeof(buf), "%.*f %s", + speed > 9.9 ? 0 : 1, speed, unit[unit_index]); + return buf; +} + + +/// Make a string indicating elapsed or remaining time. The format is either +/// M:SS or H:MM:SS depending on if the time is an hour or more. +static const char * +progress_time(uint64_t useconds) +{ + // 9999 hours = 416 days + static char buf[sizeof("9999:59:59")]; + + uint32_t seconds = useconds / 1000000; + + // Don't show anything if the time is zero or ridiculously big. + if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59) + return ""; + + uint32_t minutes = seconds / 60; + seconds %= 60; + + if (minutes >= 60) { + const uint32_t hours = minutes / 60; + minutes %= 60; + snprintf(buf, sizeof(buf), + "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, + hours, minutes, seconds); + } else { + snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, + minutes, seconds); + } + + return buf; +} + + +/// Return a string containing estimated remaining time when +/// reasonably possible. +static const char * +progress_remaining(uint64_t in_pos, uint64_t elapsed) +{ + // Don't show the estimated remaining time when it wouldn't + // make sense: + // - Input size is unknown. + // - Input has grown bigger since we started (de)compressing. + // - We haven't processed much data yet, so estimate would be + // too inaccurate. + // - Only a few seconds has passed since we started (de)compressing, + // so estimate would be too inaccurate. + if (expected_in_size == 0 || in_pos > expected_in_size + || in_pos < (UINT64_C(1) << 19) || elapsed < 8000000) + return ""; + + // Calculate the estimate. Don't give an estimate of zero seconds, + // since it is possible that all the input has been already passed + // to the library, but there is still quite a bit of output pending. + uint32_t remaining = (double)(expected_in_size - in_pos) + * ((double)(elapsed) / 1e6) / (double)(in_pos); + if (remaining < 1) + remaining = 1; + + static char buf[sizeof("9 h 55 min")]; + + // Select appropriate precision for the estimated remaining time. + if (remaining <= 10) { + // A maximum of 10 seconds remaining. + // Show the number of seconds as is. + snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); + + } else if (remaining <= 50) { + // A maximum of 50 seconds remaining. + // Round up to the next multiple of five seconds. + remaining = (remaining + 4) / 5 * 5; + snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); + + } else if (remaining <= 590) { + // A maximum of 9 minutes and 50 seconds remaining. + // Round up to the next multiple of ten seconds. + remaining = (remaining + 9) / 10 * 10; + snprintf(buf, sizeof(buf), "%" PRIu32 " min %" PRIu32 " s", + remaining / 60, remaining % 60); + + } else if (remaining <= 59 * 60) { + // A maximum of 59 minutes remaining. + // Round up to the next multiple of a minute. + remaining = (remaining + 59) / 60; + snprintf(buf, sizeof(buf), "%" PRIu32 " min", remaining); + + } else if (remaining <= 9 * 3600 + 50 * 60) { + // A maximum of 9 hours and 50 minutes left. + // Round up to the next multiple of ten minutes. + remaining = (remaining + 599) / 600 * 10; + snprintf(buf, sizeof(buf), "%" PRIu32 " h %" PRIu32 " min", + remaining / 60, remaining % 60); + + } else if (remaining <= 23 * 3600) { + // A maximum of 23 hours remaining. + // Round up to the next multiple of an hour. + remaining = (remaining + 3599) / 3600; + snprintf(buf, sizeof(buf), "%" PRIu32 " h", remaining); + + } else if (remaining <= 9 * 24 * 3600 + 23 * 3600) { + // A maximum of 9 days and 23 hours remaining. + // Round up to the next multiple of an hour. + remaining = (remaining + 3599) / 3600; + snprintf(buf, sizeof(buf), "%" PRIu32 " d %" PRIu32 " h", + remaining / 24, remaining % 24); + + } else if (remaining <= 999 * 24 * 3600) { + // A maximum of 999 days remaining. ;-) + // Round up to the next multiple of a day. + remaining = (remaining + 24 * 3600 - 1) / (24 * 3600); + snprintf(buf, sizeof(buf), "%" PRIu32 " d", remaining); + + } else { + // The estimated remaining time is too big. Don't show it. + return ""; + } + + return buf; +} + + +/// Calculate the elapsed time as microseconds. +static uint64_t +progress_elapsed(void) +{ + return my_time() - start_time; +} + + +/// Get information about position in the stream. This is currently simple, +/// but it will become more complicated once we have multithreading support. +static void +progress_pos(uint64_t *in_pos, + uint64_t *compressed_pos, uint64_t *uncompressed_pos) +{ + *in_pos = progress_strm->total_in; + + if (opt_mode == MODE_COMPRESS) { + *compressed_pos = progress_strm->total_out; + *uncompressed_pos = progress_strm->total_in; + } else { + *compressed_pos = progress_strm->total_in; + *uncompressed_pos = progress_strm->total_out; + } + + return; +} + + +extern void +message_progress_update(void) +{ + if (!progress_needs_updating) + return; + + // Calculate how long we have been processing this file. + const uint64_t elapsed = progress_elapsed(); + +#ifndef SIGALRM + if (progress_next_update > elapsed) + return; + + progress_next_update = elapsed + 1000000; +#endif + + // Get our current position in the stream. + uint64_t in_pos; + uint64_t compressed_pos; + uint64_t uncompressed_pos; + progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); + + // Block signals so that fprintf() doesn't get interrupted. + signals_block(); + + // Print the filename if it hasn't been printed yet. + if (!current_filename_printed) + print_filename(); + + // Print the actual progress message. The idea is that there is at + // least three spaces between the fields in typical situations, but + // even in rare situations there is at least one space. + fprintf(stderr, "\r %6s %35s %9s %10s %10s\r", + progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, false), + progress_speed(uncompressed_pos, elapsed), + progress_time(elapsed), + progress_remaining(in_pos, elapsed)); + +#ifdef SIGALRM + // Updating the progress info was finished. Reset + // progress_needs_updating to wait for the next SIGALRM. + // + // NOTE: This has to be done before alarm(1) or with (very) bad + // luck we could be setting this to false after the alarm has already + // been triggered. + progress_needs_updating = false; + + if (verbosity >= V_VERBOSE && progress_automatic) { + // Mark that the progress indicator is active, so if an error + // occurs, the error message gets printed cleanly. + progress_active = true; + + // Restart the timer so that progress_needs_updating gets + // set to true after about one second. + alarm(1); + } else { + // The progress message was printed because user had sent us + // SIGALRM. In this case, each progress message is printed + // on its own line. + fputc('\n', stderr); + } +#else + // When SIGALRM isn't supported and we get here, it's always due to + // automatic progress update. We set progress_active here too like + // described above. + assert(verbosity >= V_VERBOSE); + assert(progress_automatic); + progress_active = true; +#endif + + signals_unblock(); + + return; +} + + +static void +progress_flush(bool finished) +{ + if (!progress_started || verbosity < V_VERBOSE) + return; + + uint64_t in_pos; + uint64_t compressed_pos; + uint64_t uncompressed_pos; + progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); + + // Avoid printing intermediate progress info if some error occurs + // in the beginning of the stream. (If something goes wrong later in + // the stream, it is sometimes useful to tell the user where the + // error approximately occurred, especially if the error occurs + // after a time-consuming operation.) + if (!finished && !progress_active + && (compressed_pos == 0 || uncompressed_pos == 0)) + return; + + progress_active = false; + + const uint64_t elapsed = progress_elapsed(); + + signals_block(); + + // When using the auto-updating progress indicator, the final + // statistics are printed in the same format as the progress + // indicator itself. + if (progress_automatic) { + fprintf(stderr, "\r %6s %35s %9s %10s %10s\n", + finished ? "100 %" : progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, true), + progress_speed(uncompressed_pos, elapsed), + progress_time(elapsed), + finished ? "" : progress_remaining(in_pos, elapsed)); + } else { + // The filename is always printed. + fprintf(stderr, "%s: ", filename); + + // Percentage is printed only if we didn't finish yet. + if (!finished) { + // Don't print the percentage when it isn't known + // (starts with a dash). + const char *percentage = progress_percentage(in_pos); + if (percentage[0] != '-') + fprintf(stderr, "%s, ", percentage); + } + + // Size information is always printed. + fprintf(stderr, "%s", progress_sizes( + compressed_pos, uncompressed_pos, true)); + + // The speed and elapsed time aren't always shown. + const char *speed = progress_speed(uncompressed_pos, elapsed); + if (speed[0] != '\0') + fprintf(stderr, ", %s", speed); + + const char *elapsed_str = progress_time(elapsed); + if (elapsed_str[0] != '\0') + fprintf(stderr, ", %s", elapsed_str); + + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +extern void +message_progress_end(bool success) +{ + assert(progress_started); + progress_flush(success); + progress_started = false; + return; +} + + +static void +vmessage(enum message_verbosity v, const char *fmt, va_list ap) +{ + if (v <= verbosity) { + signals_block(); + + progress_flush(false); + + fprintf(stderr, "%s: ", progname); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + + signals_unblock(); + } + + return; +} + + +extern void +message(enum message_verbosity v, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(v, fmt, ap); + va_end(ap); + return; +} + + +extern void +message_warning(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_WARNING, fmt, ap); + va_end(ap); + + set_exit_status(E_WARNING); + return; +} + + +extern void +message_error(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + set_exit_status(E_ERROR); + return; +} + + +extern void +message_fatal(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + tuklib_exit(E_ERROR, E_ERROR, false); +} + + +extern void +message_bug(void) +{ + message_fatal(_("Internal error (bug)")); +} + + +extern void +message_signal_handler(void) +{ + message_fatal(_("Cannot establish signal handlers")); +} + + +extern const char * +message_strm(lzma_ret code) +{ + switch (code) { + case LZMA_NO_CHECK: + return _("No integrity check; not verifying file integrity"); + + case LZMA_UNSUPPORTED_CHECK: + return _("Unsupported type of integrity check; " + "not verifying file integrity"); + + case LZMA_MEM_ERROR: + return strerror(ENOMEM); + + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + + case LZMA_OPTIONS_ERROR: + return _("Unsupported options"); + + case LZMA_DATA_ERROR: + return _("Compressed data is corrupt"); + + case LZMA_BUF_ERROR: + return _("Unexpected end of input"); + + case LZMA_OK: + case LZMA_STREAM_END: + case LZMA_GET_CHECK: + case LZMA_PROG_ERROR: + return _("Internal error (bug)"); + } + + return NULL; +} + + +extern void +message_mem_needed(enum message_verbosity v, uint64_t memusage) +{ + if (v > verbosity) + return; + + // Convert memusage to MiB, rounding up to the next full MiB. + // This way the user can always use the displayed usage as + // the new memory usage limit. (If we rounded to the nearest, + // the user might need to +1 MiB to get high enough limit.) + memusage = round_up_to_mib(memusage); + + // 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1 + char memlimitstr[32]; + + // Show the memory usage limit as MiB unless it is less than 1 MiB. + // This way it's easy to notice errors where one has typed + // --memory=123 instead of --memory=123MiB. + uint64_t memlimit = hardware_memlimit_get(); + if (memlimit < (UINT32_C(1) << 20)) { + snprintf(memlimitstr, sizeof(memlimitstr), "%s B", + uint64_to_str(memlimit, 1)); + } else { + // Round up just like with memusage. If this function is + // called for informational purposes (to just show the + // current usage and limit), we should never show that + // the usage is higher than the limit, which would give + // a false impression that the memory usage limit isn't + // properly enforced. + snprintf(memlimitstr, sizeof(memlimitstr), "%s MiB", + uint64_to_str(round_up_to_mib(memlimit), 1)); + } + + message(v, _("%s MiB of memory is required. The limit is %s."), + uint64_to_str(memusage, 0), memlimitstr); + + return; +} + + +extern void +message_filters(enum message_verbosity v, const lzma_filter *filters) +{ + if (v > verbosity) + return; + + fprintf(stderr, _("%s: Filter chain:"), progname); + + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + fprintf(stderr, " --"); + + switch (filters[i].id) { + case LZMA_FILTER_LZMA1: + case LZMA_FILTER_LZMA2: { + const lzma_options_lzma *opt = filters[i].options; + const char *mode; + const char *mf; + + switch (opt->mode) { + case LZMA_MODE_FAST: + mode = "fast"; + break; + + case LZMA_MODE_NORMAL: + mode = "normal"; + break; + + default: + mode = "UNKNOWN"; + break; + } + + switch (opt->mf) { + case LZMA_MF_HC3: + mf = "hc3"; + break; + + case LZMA_MF_HC4: + mf = "hc4"; + break; + + case LZMA_MF_BT2: + mf = "bt2"; + break; + + case LZMA_MF_BT3: + mf = "bt3"; + break; + + case LZMA_MF_BT4: + mf = "bt4"; + break; + + default: + mf = "UNKNOWN"; + break; + } + + fprintf(stderr, "lzma%c=dict=%" PRIu32 + ",lc=%" PRIu32 ",lp=%" PRIu32 + ",pb=%" PRIu32 + ",mode=%s,nice=%" PRIu32 ",mf=%s" + ",depth=%" PRIu32, + filters[i].id == LZMA_FILTER_LZMA2 + ? '2' : '1', + opt->dict_size, + opt->lc, opt->lp, opt->pb, + mode, opt->nice_len, mf, opt->depth); + break; + } + + case LZMA_FILTER_X86: + fprintf(stderr, "x86"); + break; + + case LZMA_FILTER_POWERPC: + fprintf(stderr, "powerpc"); + break; + + case LZMA_FILTER_IA64: + fprintf(stderr, "ia64"); + break; + + case LZMA_FILTER_ARM: + fprintf(stderr, "arm"); + break; + + case LZMA_FILTER_ARMTHUMB: + fprintf(stderr, "armthumb"); + break; + + case LZMA_FILTER_SPARC: + fprintf(stderr, "sparc"); + break; + + case LZMA_FILTER_DELTA: { + const lzma_options_delta *opt = filters[i].options; + fprintf(stderr, "delta=dist=%" PRIu32, opt->dist); + break; + } + + default: + fprintf(stderr, "UNKNOWN"); + break; + } + } + + fputc('\n', stderr); + return; +} + + +extern void +message_try_help(void) +{ + // Print this with V_WARNING instead of V_ERROR to prevent it from + // showing up when --quiet has been specified. + message(V_WARNING, _("Try `%s --help' for more information."), + progname); + return; +} + + +extern void +message_memlimit(void) +{ + if (opt_robot) + printf("%" PRIu64 "\n", hardware_memlimit_get()); + else + printf(_("%s MiB (%s bytes)\n"), + uint64_to_str( + round_up_to_mib(hardware_memlimit_get()), 0), + uint64_to_str(hardware_memlimit_get(), 1)); + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} + + +extern void +message_version(void) +{ + // It is possible that liblzma version is different than the command + // line tool version, so print both. + if (opt_robot) { + printf("XZ_VERSION=%d\nLIBLZMA_VERSION=%d\n", + LZMA_VERSION, lzma_version_number()); + } else { + printf("xz (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"); + printf("liblzma %s\n", lzma_version_string()); + } + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} + + +extern void +message_help(bool long_help) +{ + printf(_("Usage: %s [OPTION]... [FILE]...\n" + "Compress or decompress FILEs in the .xz format.\n\n"), + progname); + + puts(_("Mandatory arguments to long options are mandatory for " + "short options too.\n")); + + if (long_help) + puts(_(" Operation mode:\n")); + + puts(_( +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list information about files")); + + if (long_help) + puts(_("\n Operation modifiers:\n")); + + puts(_( +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files")); + + if (long_help) + puts(_( +" --no-sparse do not create sparse files when decompressing\n" +" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" +" --files=[FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline character\n" +" --files0=[FILE] like --files but use the null character as terminator")); + + if (long_help) { + puts(_("\n Basic file format and compression options:\n")); + puts(_( +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto' (default), `xz', `lzma', and `raw'\n" +" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" +" `sha256', or `none' (use with caution)")); + } + + puts(_( +" -0 .. -9 compression preset; 0-2 fast compression, 3-5 good\n" +" compression, 6-9 excellent compression; default is 6")); + + puts(_( +" -e, --extreme use more CPU time when encoding to increase compression\n" +" ratio without increasing memory usage of the decoder")); + + if (long_help) + puts(_( // xgettext:no-c-format +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n" +" the default setting, which is 40 % of total RAM")); + + if (long_help) { + puts(_( +"\n Custom filter chain for compression (alternative for using presets):")); + +#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ + || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + puts(_( +"\n" +" --lzma1[=OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" +" --lzma2[=OPTS] more of the following options (valid values; default):\n" +" preset=NUM reset options to preset number NUM (0-9)\n" +" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" +" lc=NUM number of literal context bits (0-4; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (fast, normal; normal)\n" +" nice=NUM nice length of a match (2-273; 64)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" +" depth=NUM maximum search depth; 0=automatic (default)")); +#endif + + puts(_( +"\n" +" --x86[=OPTS] x86 BCJ filter\n" +" --powerpc[=OPTS] PowerPC BCJ filter (big endian only)\n" +" --ia64[=OPTS] IA64 (Itanium) BCJ filter\n" +" --arm[=OPTS] ARM BCJ filter (little endian only)\n" +" --armthumb[=OPTS] ARM-Thumb BCJ filter (little endian only)\n" +" --sparc[=OPTS] SPARC BCJ filter\n" +" Valid OPTS for all BCJ filters:\n" +" start=NUM start offset for conversions (default=0)")); + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + puts(_( +"\n" +" --delta[=OPTS] Delta filter; valid OPTS (valid values; default):\n" +" dist=NUM distance between bytes being subtracted\n" +" from each other (1-256; 1)")); +#endif + +#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK) + puts(_( +"\n" +" --subblock[=OPTS] Subblock filter; valid OPTS (valid values; default):\n" +" size=NUM number of bytes of data per subblock\n" +" (1 - 256Mi; 4Ki)\n" +" rle=NUM run-length encoder chunk size (0-256; 0)")); +#endif + } + + if (long_help) + puts(_("\n Other options:\n")); + + puts(_( +" -q, --quiet suppress warnings; specify twice to suppress errors too\n" +" -v, --verbose be verbose; specify twice for even more verbose")); + + if (long_help) { + puts(_( +" -Q, --no-warn make warnings not affect the exit status")); + puts(_( +" --robot use machine-parsable messages (useful for scripts)")); + puts(""); + puts(_( +" --info-memory display the memory usage limit and exit")); + puts(_( +" -h, --help display the short help (lists only the basic options)\n" +" -H, --long-help display this long help and exit")); + } else { + puts(_( +" -h, --help display this short help and exit\n" +" -H, --long-help display the long help (lists also the advanced options)")); + } + + puts(_( +" -V, --version display the version number and exit")); + + puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); + + if (long_help) { + printf(_( +"On this system and configuration, this program will use a maximum of roughly\n" +"%s MiB RAM and "), uint64_to_str(round_up_to_mib(hardware_memlimit_get()), 0)); + printf(N_("one thread.\n\n", "%s threads.\n\n", + hardware_threadlimit_get()), + uint64_to_str(hardware_threadlimit_get(), 0)); + } + + // TRANSLATORS: This message indicates the bug reporting address + // for this package. Please add _another line_ saying + // "Report translation bugs to <...>\n" with the email or WWW + // address for translation bugs. Thanks. + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} Index: contrib/xz/src/xz/message.h =================================================================== --- contrib/xz/src/xz/message.h (revision 0) +++ contrib/xz/src/xz/message.h (revision 0) @@ -0,0 +1,151 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.h +/// \brief Printing messages to stderr +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Verbosity levels +enum message_verbosity { + V_SILENT, ///< No messages + V_ERROR, ///< Only error messages + V_WARNING, ///< Errors and warnings + V_VERBOSE, ///< Errors, warnings, and verbose statistics + V_DEBUG, ///< Debugging, FIXME remove? +}; + + +/// \brief Initializes the message functions +/// +/// If an error occurs, this function doesn't return. +/// +extern void message_init(void); + + +/// Increase verbosity level by one step unless it was at maximum. +extern void message_verbosity_increase(void); + +/// Decrease verbosity level by one step unless it was at minimum. +extern void message_verbosity_decrease(void); + +/// Get the current verbosity level. +extern enum message_verbosity message_verbosity_get(void); + + +/// \brief Print a message if verbosity level is at least "verbosity" +/// +/// This doesn't touch the exit status. +extern void message(enum message_verbosity verbosity, const char *fmt, ...) + lzma_attribute((format(printf, 2, 3))); + + +/// \brief Prints a warning and possibly sets exit status +/// +/// The message is printed only if verbosity level is at least V_WARNING. +/// The exit status is set to WARNING unless it was already at ERROR. +extern void message_warning(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and sets exit status +/// +/// The message is printed only if verbosity level is at least V_ERROR. +/// The exit status is set to ERROR. +extern void message_error(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and exits with EXIT_ERROR +/// +/// The message is printed only if verbosity level is at least V_ERROR. +extern void message_fatal(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))) + lzma_attribute((noreturn)); + + +/// Print an error message that an internal error occurred and exit with +/// EXIT_ERROR. +extern void message_bug(void) lzma_attribute((noreturn)); + + +/// Print a message that establishing signal handlers failed, and exit with +/// exit status ERROR. +extern void message_signal_handler(void) lzma_attribute((noreturn)); + + +/// Convert lzma_ret to a string. +extern const char *message_strm(lzma_ret code); + + +/// Display how much memory was needed and how much the limit was. +extern void message_mem_needed(enum message_verbosity v, uint64_t memusage); + + +/// Print the filter chain. +extern void message_filters( + enum message_verbosity v, const lzma_filter *filters); + + +/// Print a message that user should try --help. +extern void message_try_help(void); + + +/// Print the memory usage limit and exit. +extern void message_memlimit(void) lzma_attribute((noreturn)); + + +/// Prints the version number to stdout and exits with exit status SUCCESS. +extern void message_version(void) lzma_attribute((noreturn)); + + +/// Print the help message. +extern void message_help(bool long_help) lzma_attribute((noreturn)); + + +/// \brief Set the total number of files to be processed +/// +/// Standard input is counted as a file here. This is used when printing +/// the filename via message_filename(). +extern void message_set_files(unsigned int files); + + +/// \brief Set the name of the current file and possibly print it too +/// +/// The name is printed immediately if --list was used or if --verbose +/// was used and stderr is a terminal. Even when the filename isn't printed, +/// it is stored so that it can be printed later if needed for progress +/// messages. +extern void message_filename(const char *src_name); + + +/// \brief Start progress info handling +/// +/// message_filename() must be called before this function to set +/// the filename. +/// +/// This must be paired with a call to message_progress_end() before the +/// given *strm becomes invalid. +/// +/// \param strm Pointer to lzma_stream used for the coding. +/// \param in_size Size of the input file, or zero if unknown. +/// +extern void message_progress_start(lzma_stream *strm, uint64_t in_size); + + +/// Update the progress info if in verbose mode and enough time has passed +/// since the previous update. This can be called only when +/// message_progress_start() has already been used. +extern void message_progress_update(void); + + +/// \brief Finishes the progress message if we were in verbose mode +/// +/// \param finished True if the whole stream was successfully coded +/// and output written to the output stream. +/// +extern void message_progress_end(bool finished); Index: contrib/xz/src/xz/main.h =================================================================== --- contrib/xz/src/xz/main.h (revision 0) +++ contrib/xz/src/xz/main.h (revision 0) @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.h +/// \brief Miscellaneous declarations +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Possible exit status values. These are the same as used by gzip and bzip2. +enum exit_status_type { + E_SUCCESS = 0, + E_ERROR = 1, + E_WARNING = 2, +}; + + +/// Sets the exit status after a warning or error has occurred. If new_status +/// is E_WARNING and the old exit status was already E_ERROR, the exit +/// status is not changed. +extern void set_exit_status(enum exit_status_type new_status); + + +/// Use E_SUCCESS instead of E_WARNING if something worth a warning occurs +/// but nothing worth an error has occurred. This is called when --no-warn +/// is specified. +extern void set_exit_no_warn(void); Index: contrib/xz/src/xz/options.c =================================================================== --- contrib/xz/src/xz/options.c (revision 0) +++ contrib/xz/src/xz/options.c (revision 0) @@ -0,0 +1,435 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.c +/// \brief Parser for filter-specific options +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/////////////////// +// Generic stuff // +/////////////////// + +typedef struct { + const char *name; + uint64_t id; +} name_id_map; + + +typedef struct { + const char *name; + const name_id_map *map; + uint64_t min; + uint64_t max; +} option_map; + + +/// Parses option=value pairs that are separated with colons, semicolons, +/// or commas: opt=val:opt=val;opt=val,opt=val +/// +/// Each option is a string, that is converted to an integer using the +/// index where the option string is in the array. +/// +/// Value can be +/// - a string-id map mapping a list of possible string values to integers +/// (opts[i].map != NULL, opts[i].min and opts[i].max are ignored); +/// - a number with minimum and maximum value limit +/// (opts[i].map == NULL && opts[i].min != UINT64_MAX); +/// - a string that will be parsed by the filter-specific code +/// (opts[i].map == NULL && opts[i].min == UINT64_MAX, opts[i].max ignored) +/// +/// When parsing both option and value succeed, a filter-specific function +/// is called, which should update the given value to filter-specific +/// options structure. +/// +/// \param str String containing the options from the command line +/// \param opts Filter-specific option map +/// \param set Filter-specific function to update filter_options +/// \param filter_options Pointer to filter-specific options structure +/// +/// \return Returns only if no errors occur. +/// +static void +parse_options(const char *str, const option_map *opts, + void (*set)(void *filter_options, + uint32_t key, uint64_t value, const char *valuestr), + void *filter_options) +{ + if (str == NULL || str[0] == '\0') + return; + + char *s = xstrdup(str); + char *name = s; + + while (*name != '\0') { + if (*name == ',') { + ++name; + continue; + } + + char *split = strchr(name, ','); + if (split != NULL) + *split = '\0'; + + char *value = strchr(name, '='); + if (value != NULL) + *value++ = '\0'; + + if (value == NULL || value[0] == '\0') + message_fatal(_("%s: Options must be `name=value' " + "pairs separated with commas"), str); + + // Look for the option name from the option map. + size_t i = 0; + while (true) { + if (opts[i].name == NULL) + message_fatal(_("%s: Invalid option name"), + name); + + if (strcmp(name, opts[i].name) == 0) + break; + + ++i; + } + + // Option was found from the map. See how we should handle it. + if (opts[i].map != NULL) { + // value is a string which we should map + // to an integer. + size_t j; + for (j = 0; opts[i].map[j].name != NULL; ++j) { + if (strcmp(opts[i].map[j].name, value) == 0) + break; + } + + if (opts[i].map[j].name == NULL) + message_fatal(_("%s: Invalid option value"), + value); + + set(filter_options, i, opts[i].map[j].id, value); + + } else if (opts[i].min == UINT64_MAX) { + // value is a special string that will be + // parsed by set(). + set(filter_options, i, 0, value); + + } else { + // value is an integer. + const uint64_t v = str_to_uint64(name, value, + opts[i].min, opts[i].max); + set(filter_options, i, v, value); + } + + // Check if it was the last option. + if (split == NULL) + break; + + name = split + 1; + } + + free(s); + return; +} + + +////////////// +// Subblock // +////////////// + +enum { + OPT_SIZE, + OPT_RLE, + OPT_ALIGN, +}; + + +static void +set_subblock(void *options, uint32_t key, uint64_t value, + const char *valuestr lzma_attribute((unused))) +{ + lzma_options_subblock *opt = options; + + switch (key) { + case OPT_SIZE: + opt->subblock_data_size = value; + break; + + case OPT_RLE: + opt->rle = value; + break; + + case OPT_ALIGN: + opt->alignment = value; + break; + } +} + + +extern lzma_options_subblock * +options_subblock(const char *str) +{ + static const option_map opts[] = { + { "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN, + LZMA_SUBBLOCK_DATA_SIZE_MAX }, + { "rle", NULL, LZMA_SUBBLOCK_RLE_OFF, + LZMA_SUBBLOCK_RLE_MAX }, + { "align",NULL, LZMA_SUBBLOCK_ALIGNMENT_MIN, + LZMA_SUBBLOCK_ALIGNMENT_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_subblock *options + = xmalloc(sizeof(lzma_options_subblock)); + *options = (lzma_options_subblock){ + .allow_subfilters = false, + .alignment = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT, + .subblock_data_size = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT, + .rle = LZMA_SUBBLOCK_RLE_OFF, + }; + + parse_options(str, opts, &set_subblock, options); + + return options; +} + + +/////////// +// Delta // +/////////// + +enum { + OPT_DIST, +}; + + +static void +set_delta(void *options, uint32_t key, uint64_t value, + const char *valuestr lzma_attribute((unused))) +{ + lzma_options_delta *opt = options; + switch (key) { + case OPT_DIST: + opt->dist = value; + break; + } +} + + +extern lzma_options_delta * +options_delta(const char *str) +{ + static const option_map opts[] = { + { "dist", NULL, LZMA_DELTA_DIST_MIN, + LZMA_DELTA_DIST_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta)); + *options = (lzma_options_delta){ + // It's hard to give a useful default for this. + .type = LZMA_DELTA_TYPE_BYTE, + .dist = LZMA_DELTA_DIST_MIN, + }; + + parse_options(str, opts, &set_delta, options); + + return options; +} + + +///////// +// BCJ // +///////// + +enum { + OPT_START_OFFSET, +}; + + +static void +set_bcj(void *options, uint32_t key, uint64_t value, + const char *valuestr lzma_attribute((unused))) +{ + lzma_options_bcj *opt = options; + switch (key) { + case OPT_START_OFFSET: + opt->start_offset = value; + break; + } +} + + +extern lzma_options_bcj * +options_bcj(const char *str) +{ + static const option_map opts[] = { + { "start", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_bcj *options = xmalloc(sizeof(lzma_options_bcj)); + *options = (lzma_options_bcj){ + .start_offset = 0, + }; + + parse_options(str, opts, &set_bcj, options); + + return options; +} + + +////////// +// LZMA // +////////// + +enum { + OPT_PRESET, + OPT_DICT, + OPT_LC, + OPT_LP, + OPT_PB, + OPT_MODE, + OPT_NICE, + OPT_MF, + OPT_DEPTH, +}; + + +static void lzma_attribute((noreturn)) +error_lzma_preset(const char *valuestr) +{ + message_fatal(_("Unsupported LZMA1/LZMA2 preset: %s"), valuestr); +} + + +static void +set_lzma(void *options, uint32_t key, uint64_t value, const char *valuestr) +{ + lzma_options_lzma *opt = options; + + switch (key) { + case OPT_PRESET: { + if (valuestr[0] < '0' || valuestr[0] > '9') + error_lzma_preset(valuestr); + + uint32_t preset = valuestr[0] - '0'; + + // Currently only "e" is supported as a modifier, + // so keep this simple for now. + if (valuestr[1] != '\0') { + if (valuestr[1] == 'e') + preset |= LZMA_PRESET_EXTREME; + else + error_lzma_preset(valuestr); + + if (valuestr[2] != '\0') + error_lzma_preset(valuestr); + } + + if (lzma_lzma_preset(options, preset)) + error_lzma_preset(valuestr); + + break; + } + + case OPT_DICT: + opt->dict_size = value; + break; + + case OPT_LC: + opt->lc = value; + break; + + case OPT_LP: + opt->lp = value; + break; + + case OPT_PB: + opt->pb = value; + break; + + case OPT_MODE: + opt->mode = value; + break; + + case OPT_NICE: + opt->nice_len = value; + break; + + case OPT_MF: + opt->mf = value; + break; + + case OPT_DEPTH: + opt->depth = value; + break; + } +} + + +extern lzma_options_lzma * +options_lzma(const char *str) +{ + static const name_id_map modes[] = { + { "fast", LZMA_MODE_FAST }, + { "normal", LZMA_MODE_NORMAL }, + { NULL, 0 } + }; + + static const name_id_map mfs[] = { + { "hc3", LZMA_MF_HC3 }, + { "hc4", LZMA_MF_HC4 }, + { "bt2", LZMA_MF_BT2 }, + { "bt3", LZMA_MF_BT3 }, + { "bt4", LZMA_MF_BT4 }, + { NULL, 0 } + }; + + static const option_map opts[] = { + { "preset", NULL, UINT64_MAX, 0 }, + { "dict", NULL, LZMA_DICT_SIZE_MIN, + (UINT32_C(1) << 30) + (UINT32_C(1) << 29) }, + { "lc", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "lp", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "pb", NULL, LZMA_PB_MIN, LZMA_PB_MAX }, + { "mode", modes, 0, 0 }, + { "nice", NULL, 2, 273 }, + { "mf", mfs, 0, 0 }, + { "depth", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_lzma *options = xmalloc(sizeof(lzma_options_lzma)); + *options = (lzma_options_lzma){ + .dict_size = LZMA_DICT_SIZE_DEFAULT, + .preset_dict = NULL, + .preset_dict_size = 0, + .lc = LZMA_LC_DEFAULT, + .lp = LZMA_LP_DEFAULT, + .pb = LZMA_PB_DEFAULT, + .mode = LZMA_MODE_NORMAL, + .nice_len = 64, + .mf = LZMA_MF_BT4, + .depth = 0, + }; + + parse_options(str, opts, &set_lzma, options); + + if (options->lc + options->lp > LZMA_LCLP_MAX) + message_fatal(_("The sum of lc and lp must be at " + "maximum of 4")); + + const uint32_t nice_len_min = options->mf & 0x0F; + if (options->nice_len < nice_len_min) + message_fatal(_("The selected match finder requires at " + "least nice=%" PRIu32), nice_len_min); + + return options; +} Index: contrib/xz/src/xz/args.c =================================================================== --- contrib/xz/src/xz/args.c (revision 0) +++ contrib/xz/src/xz/args.c (revision 0) @@ -0,0 +1,549 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.c +/// \brief Argument parsing +/// +/// \note Filter-specific options parsing is in options.c. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include "getopt.h" +#include + + +bool opt_stdout = false; +bool opt_force = false; +bool opt_keep_original = false; +bool opt_robot = false; + +// We don't modify or free() this, but we need to assign it in some +// non-const pointers. +const char *stdin_filename = "(stdin)"; + + +static void +parse_real(args_info *args, int argc, char **argv) +{ + enum { + OPT_SUBBLOCK = INT_MIN, + OPT_X86, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_SPARC, + OPT_DELTA, + OPT_LZMA1, + OPT_LZMA2, + + OPT_NO_SPARSE, + OPT_FILES, + OPT_FILES0, + OPT_INFO_MEMORY, + OPT_ROBOT, + }; + + static const char short_opts[] + = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; + + static const struct option long_opts[] = { + // Operation mode + { "compress", no_argument, NULL, 'z' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "test", no_argument, NULL, 't' }, + { "list", no_argument, NULL, 'l' }, + + // Operation modifiers + { "keep", no_argument, NULL, 'k' }, + { "force", no_argument, NULL, 'f' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, + { "suffix", required_argument, NULL, 'S' }, + // { "recursive", no_argument, NULL, 'r' }, // TODO + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + // Basic compression settings + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "memory", required_argument, NULL, 'M' }, + { "threads", required_argument, NULL, 'T' }, + + { "extreme", no_argument, NULL, 'e' }, + { "fast", no_argument, NULL, '0' }, + { "best", no_argument, NULL, '9' }, + + // Filters + { "lzma1", optional_argument, NULL, OPT_LZMA1 }, + { "lzma2", optional_argument, NULL, OPT_LZMA2 }, + { "x86", optional_argument, NULL, OPT_X86 }, + { "powerpc", optional_argument, NULL, OPT_POWERPC }, + { "ia64", optional_argument, NULL, OPT_IA64 }, + { "arm", optional_argument, NULL, OPT_ARM }, + { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, + { "sparc", optional_argument, NULL, OPT_SPARC }, + { "delta", optional_argument, NULL, OPT_DELTA }, + { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, + + // Other options + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "no-warn", no_argument, NULL, 'Q' }, + { "robot", no_argument, NULL, OPT_ROBOT }, + { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, + { "help", no_argument, NULL, 'h' }, + { "long-help", no_argument, NULL, 'H' }, + { "version", no_argument, NULL, 'V' }, + + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + // Compression preset (also for decompression if --format=raw) + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + coder_set_preset(c - '0'); + break; + + // --memory + case 'M': { + // Support specifying the limit as a percentage of + // installed physical RAM. + size_t len = strlen(optarg); + if (len > 0 && optarg[len - 1] == '%') { + optarg[len - 1] = '\0'; + hardware_memlimit_set_percentage( + str_to_uint64( + "memory%", optarg, 1, 100)); + } else { + // On 32-bit systems, SIZE_MAX would make more + // sense than UINT64_MAX. But use UINT64_MAX + // still so that scripts that assume > 4 GiB + // values don't break. + hardware_memlimit_set(str_to_uint64( + "memory", optarg, + 0, UINT64_MAX)); + } + + break; + } + + // --suffix + case 'S': + suffix_set(optarg); + break; + + case 'T': + hardware_threadlimit_set(str_to_uint64( + "threads", optarg, 0, UINT32_MAX)); + break; + + // --version + case 'V': + // This doesn't return. + message_version(); + + // --stdout + case 'c': + opt_stdout = true; + break; + + // --decompress + case 'd': + opt_mode = MODE_DECOMPRESS; + break; + + // --extreme + case 'e': + coder_set_extreme(); + break; + + // --force + case 'f': + opt_force = true; + break; + + // --info-memory + case OPT_INFO_MEMORY: + // This doesn't return. + message_memlimit(); + + // --help + case 'h': + // This doesn't return. + message_help(false); + + // --long-help + case 'H': + // This doesn't return. + message_help(true); + + // --list + case 'l': + opt_mode = MODE_LIST; + break; + + // --keep + case 'k': + opt_keep_original = true; + break; + + // --quiet + case 'q': + message_verbosity_decrease(); + break; + + case 'Q': + set_exit_no_warn(); + break; + + case 't': + opt_mode = MODE_TEST; + break; + + // --verbose + case 'v': + message_verbosity_increase(); + break; + + // --robot + case OPT_ROBOT: + opt_robot = true; + + // This is to make sure that floating point numbers + // always have a dot as decimal separator. + setlocale(LC_NUMERIC, "C"); + break; + + case 'z': + opt_mode = MODE_COMPRESS; + break; + + // Filter setup + + case OPT_SUBBLOCK: + coder_add_filter(LZMA_FILTER_SUBBLOCK, + options_subblock(optarg)); + break; + + case OPT_X86: + coder_add_filter(LZMA_FILTER_X86, + options_bcj(optarg)); + break; + + case OPT_POWERPC: + coder_add_filter(LZMA_FILTER_POWERPC, + options_bcj(optarg)); + break; + + case OPT_IA64: + coder_add_filter(LZMA_FILTER_IA64, + options_bcj(optarg)); + break; + + case OPT_ARM: + coder_add_filter(LZMA_FILTER_ARM, + options_bcj(optarg)); + break; + + case OPT_ARMTHUMB: + coder_add_filter(LZMA_FILTER_ARMTHUMB, + options_bcj(optarg)); + break; + + case OPT_SPARC: + coder_add_filter(LZMA_FILTER_SPARC, + options_bcj(optarg)); + break; + + case OPT_DELTA: + coder_add_filter(LZMA_FILTER_DELTA, + options_delta(optarg)); + break; + + case OPT_LZMA1: + coder_add_filter(LZMA_FILTER_LZMA1, + options_lzma(optarg)); + break; + + case OPT_LZMA2: + coder_add_filter(LZMA_FILTER_LZMA2, + options_lzma(optarg)); + break; + + // Other + + // --format + case 'F': { + // Just in case, support both "lzma" and "alone" since + // the latter was used for forward compatibility in + // LZMA Utils 4.32.x. + static const struct { + char str[8]; + enum format_type format; + } types[] = { + { "auto", FORMAT_AUTO }, + { "xz", FORMAT_XZ }, + { "lzma", FORMAT_LZMA }, + { "alone", FORMAT_LZMA }, + // { "gzip", FORMAT_GZIP }, + // { "gz", FORMAT_GZIP }, + { "raw", FORMAT_RAW }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown file " + "format type"), + optarg); + + opt_format = types[i].format; + break; + } + + // --check + case 'C': { + static const struct { + char str[8]; + lzma_check check; + } types[] = { + { "none", LZMA_CHECK_NONE }, + { "crc32", LZMA_CHECK_CRC32 }, + { "crc64", LZMA_CHECK_CRC64 }, + { "sha256", LZMA_CHECK_SHA256 }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) { + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unsupported " + "integrity " + "check type"), optarg); + } + + // Use a separate check in case we are using different + // liblzma than what was used to compile us. + if (!lzma_check_is_supported(types[i].check)) + message_fatal(_("%s: Unsupported integrity " + "check type"), optarg); + + coder_set_check(types[i].check); + break; + } + + case OPT_NO_SPARSE: + io_no_sparse(); + break; + + case OPT_FILES: + args->files_delim = '\n'; + + // Fall through + + case OPT_FILES0: + if (args->files_name != NULL) + message_fatal(_("Only one file can be " + "specified with `--files' " + "or `--files0'.")); + + if (optarg == NULL) { + args->files_name = (char *)stdin_filename; + args->files_file = stdin; + } else { + args->files_name = optarg; + args->files_file = fopen(optarg, + c == OPT_FILES ? "r" : "rb"); + if (args->files_file == NULL) + message_fatal("%s: %s", optarg, + strerror(errno)); + } + + break; + + default: + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + + return; +} + + +static void +parse_environment(args_info *args, char *argv0) +{ + char *env = getenv("XZ_OPT"); + if (env == NULL) + return; + + // We modify the string, so make a copy of it. + env = xstrdup(env); + + // Calculate the number of arguments in env. argc stats at one + // to include space for the program name. + int argc = 1; + bool prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + // NOTE: Cast to unsigned char is needed so that correct + // value gets passed to isspace(), which expects + // unsigned char cast to int. Casting to int is done + // automatically due to integer promotion, but we need to + // force char to unsigned char manually. Otherwise 8-bit + // characters would get promoted to wrong value if + // char is signed. + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + } else if (prev_was_space) { + prev_was_space = false; + + // Keep argc small enough to fit into a singed int + // and to keep it usable for memory allocation. + if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *))) + message_fatal(_("The environment variable " + "XZ_OPT contains too many " + "arguments")); + } + } + + // Allocate memory to hold pointers to the arguments. Add one to get + // space for the terminating NULL (if some systems happen to need it). + char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); + argv[0] = argv0; + argv[argc] = NULL; + + // Go through the string again. Split the arguments using '\0' + // characters and add pointers to the resulting strings to argv. + argc = 1; + prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + env[i] = '\0'; + } else if (prev_was_space) { + prev_was_space = false; + argv[argc++] = env + i; + } + } + + // Parse the argument list we got from the environment. All non-option + // arguments i.e. filenames are ignored. + parse_real(args, argc, argv); + + // Reset the state of the getopt_long() so that we can parse the + // command line options too. There are two incompatible ways to + // do it. +#ifdef HAVE_OPTRESET + // BSD + optind = 1; + optreset = 1; +#else + // GNU, Solaris + optind = 0; +#endif + + // We don't need the argument list from environment anymore. + free(argv); + free(env); + + return; +} + + +extern void +args_parse(args_info *args, int argc, char **argv) +{ + // Initialize those parts of *args that we need later. + args->files_name = NULL; + args->files_file = NULL; + args->files_delim = '\0'; + + // Check how we were called. + { + // Remove the leading path name, if any. + const char *name = strrchr(argv[0], '/'); + if (name == NULL) + name = argv[0]; + else + ++name; + + // NOTE: It's possible that name[0] is now '\0' if argv[0] + // is weird, but it doesn't matter here. + + // Look for full command names instead of substrings like + // "un", "cat", and "lz" to reduce possibility of false + // positives when the programs have been renamed. + if (strstr(name, "xzcat") != NULL) { + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unxz") != NULL) { + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzcat") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unlzma") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzma") != NULL) { + opt_format = FORMAT_LZMA; + } + } + + // First the flags from environment + parse_environment(args, argv[0]); + + // Then from the command line + parse_real(args, argc, argv); + + // Never remove the source file when the destination is not on disk. + // In test mode the data is written nowhere, but setting opt_stdout + // will make the rest of the code behave well. + if (opt_stdout || opt_mode == MODE_TEST) { + opt_keep_original = true; + opt_stdout = true; + } + + // When compressing, if no --format flag was used, or it + // was --format=auto, we compress to the .xz format. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) + opt_format = FORMAT_XZ; + + // Compression settings need to be validated (options themselves and + // their memory usage) when compressing to any file format. It has to + // be done also when uncompressing raw data, since for raw decoding + // the options given on the command line are used to know what kind + // of raw data we are supposed to decode. + if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) + coder_set_compression_settings(); + + // If no filenames are given, use stdin. + if (argv[optind] == NULL && args->files_name == NULL) { + // We don't modify or free() the "-" constant. The caller + // modifies this so don't make the struct itself const. + static char *names_stdin[2] = { (char *)"-", NULL }; + args->arg_names = names_stdin; + args->arg_count = 1; + } else { + // We got at least one filename from the command line, or + // --files or --files0 was specified. + args->arg_names = argv + optind; + args->arg_count = argc - optind; + } + + return; +} Index: contrib/xz/src/xz/coder.c =================================================================== --- contrib/xz/src/xz/coder.c (revision 0) +++ contrib/xz/src/xz/coder.c (revision 0) @@ -0,0 +1,659 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file coder.c +/// \brief Compresses or uncompresses a file +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/// Return value type for coder_init(). +enum coder_init_ret { + CODER_INIT_NORMAL, + CODER_INIT_PASSTHRU, + CODER_INIT_ERROR, +}; + + +enum operation_mode opt_mode = MODE_COMPRESS; + +enum format_type opt_format = FORMAT_AUTO; + + +/// Stream used to communicate with liblzma +static lzma_stream strm = LZMA_STREAM_INIT; + +/// Filters needed for all encoding all formats, and also decoding in raw data +static lzma_filter filters[LZMA_FILTERS_MAX + 1]; + +/// Input and output buffers +static io_buf in_buf; +static io_buf out_buf; + +/// Number of filters. Zero indicates that we are using a preset. +static size_t filters_count = 0; + +/// Number of the preset (0-9) +static size_t preset_number = 6; + +/// True if we should auto-adjust the compression settings to use less memory +/// if memory usage limit is too low for the original settings. +static bool auto_adjust = true; + +/// Indicate if no preset has been explicitly given. In that case, if we need +/// to auto-adjust for lower memory usage, we won't print a warning. +static bool preset_default = true; + +/// If a preset is used (no custom filter chain) and preset_extreme is true, +/// a significantly slower compression is used to achieve slightly better +/// compression ratio. +static bool preset_extreme = false; + +/// Integrity check type +static lzma_check check; + +/// This becomes false if the --check=CHECK option is used. +static bool check_default = true; + + +extern void +coder_set_check(lzma_check new_check) +{ + check = new_check; + check_default = false; + return; +} + + +extern void +coder_set_preset(size_t new_preset) +{ + preset_number = new_preset; + preset_default = false; + return; +} + + +extern void +coder_set_extreme(void) +{ + preset_extreme = true; + return; +} + + +extern void +coder_add_filter(lzma_vli id, void *options) +{ + if (filters_count == LZMA_FILTERS_MAX) + message_fatal(_("Maximum number of filters is four")); + + filters[filters_count].id = id; + filters[filters_count].options = options; + ++filters_count; + + return; +} + + +static void lzma_attribute((noreturn)) +memlimit_too_small(uint64_t memory_usage) +{ + message(V_ERROR, _("Memory usage limit is too low for the given " + "filter setup.")); + message_mem_needed(V_ERROR, memory_usage); + tuklib_exit(E_ERROR, E_ERROR, false); +} + + +extern void +coder_set_compression_settings(void) +{ + // Options for LZMA1 or LZMA2 in case we are using a preset. + static lzma_options_lzma opt_lzma; + + if (filters_count == 0) { + // We are using a preset. This is not a good idea in raw mode + // except when playing around with things. Different versions + // of this software may use different options in presets, and + // thus make uncompressing the raw data difficult. + if (opt_format == FORMAT_RAW) { + // The message is shown only if warnings are allowed + // but the exit status isn't changed. + message(V_WARNING, _("Using a preset in raw mode " + "is discouraged.")); + message(V_WARNING, _("The exact options of the " + "presets may vary between software " + "versions.")); + } + + // Get the preset for LZMA1 or LZMA2. + if (preset_extreme) + preset_number |= LZMA_PRESET_EXTREME; + + if (lzma_lzma_preset(&opt_lzma, preset_number)) + message_bug(); + + // Use LZMA2 except with --format=lzma we use LZMA1. + filters[0].id = opt_format == FORMAT_LZMA + ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; + filters[0].options = &opt_lzma; + filters_count = 1; + } else { + preset_default = false; + } + + // Terminate the filter options array. + filters[filters_count].id = LZMA_VLI_UNKNOWN; + + // If we are using the .lzma format, allow exactly one filter + // which has to be LZMA1. + if (opt_format == FORMAT_LZMA && (filters_count != 1 + || filters[0].id != LZMA_FILTER_LZMA1)) + message_fatal(_("The .lzma format supports only " + "the LZMA1 filter")); + + // If we are using the .xz format, make sure that there is no LZMA1 + // filter to prevent LZMA_PROG_ERROR. + if (opt_format == FORMAT_XZ) + for (size_t i = 0; i < filters_count; ++i) + if (filters[i].id == LZMA_FILTER_LZMA1) + message_fatal(_("LZMA1 cannot be used " + "with the .xz format")); + + // Print the selected filter chain. + message_filters(V_DEBUG, filters); + + // If using --format=raw, we can be decoding. The memusage function + // also validates the filter chain and the options used for the + // filters. + const uint64_t memory_limit = hardware_memlimit_get(); + uint64_t memory_usage; + if (opt_mode == MODE_COMPRESS) + memory_usage = lzma_raw_encoder_memusage(filters); + else + memory_usage = lzma_raw_decoder_memusage(filters); + + if (memory_usage == UINT64_MAX) + message_fatal(_("Unsupported filter chain or filter options")); + + // Print memory usage info before possible dictionary + // size auto-adjusting. + message_mem_needed(V_DEBUG, memory_usage); + + if (memory_usage > memory_limit) { + // If --no-auto-adjust was used or we didn't find LZMA1 or + // LZMA2 as the last filter, give an error immediately. + // --format=raw implies --no-auto-adjust. + if (!auto_adjust || opt_format == FORMAT_RAW) + memlimit_too_small(memory_usage); + + assert(opt_mode == MODE_COMPRESS); + + // Look for the last filter if it is LZMA2 or LZMA1, so + // we can make it use less RAM. With other filters we don't + // know what to do. + size_t i = 0; + while (filters[i].id != LZMA_FILTER_LZMA2 + && filters[i].id != LZMA_FILTER_LZMA1) { + if (filters[i].id == LZMA_VLI_UNKNOWN) + memlimit_too_small(memory_usage); + + ++i; + } + + // Decrease the dictionary size until we meet the memory + // usage limit. First round down to full mebibytes. + lzma_options_lzma *opt = filters[i].options; + const uint32_t orig_dict_size = opt->dict_size; + opt->dict_size &= ~((UINT32_C(1) << 20) - 1); + while (true) { + // If it is below 1 MiB, auto-adjusting failed. We + // could be more sophisticated and scale it down even + // more, but let's see if many complain about this + // version. + // + // FIXME: Displays the scaled memory usage instead + // of the original. + if (opt->dict_size < (UINT32_C(1) << 20)) + memlimit_too_small(memory_usage); + + memory_usage = lzma_raw_encoder_memusage(filters); + if (memory_usage == UINT64_MAX) + message_bug(); + + // Accept it if it is low enough. + if (memory_usage <= memory_limit) + break; + + // Otherwise 1 MiB down and try again. I hope this + // isn't too slow method for cases where the original + // dict_size is very big. + opt->dict_size -= UINT32_C(1) << 20; + } + + // Tell the user that we decreased the dictionary size. + // However, omit the message if no preset or custom chain + // was given. FIXME: Always warn? + if (!preset_default) + message(V_WARNING, _("Adjusted LZMA%c dictionary size " + "from %s MiB to %s MiB to not exceed " + "the memory usage limit of %s MiB"), + filters[i].id == LZMA_FILTER_LZMA2 + ? '2' : '1', + uint64_to_str(orig_dict_size >> 20, 0), + uint64_to_str(opt->dict_size >> 20, 1), + uint64_to_str(round_up_to_mib( + memory_limit), 2)); + } + +/* + // Limit the number of worker threads so that memory usage + // limit isn't exceeded. + assert(memory_usage > 0); + size_t thread_limit = memory_limit / memory_usage; + if (thread_limit == 0) + thread_limit = 1; + + if (opt_threads > thread_limit) + opt_threads = thread_limit; +*/ + + if (check_default) { + // The default check type is CRC64, but fallback to CRC32 + // if CRC64 isn't supported by the copy of liblzma we are + // using. CRC32 is always supported. + check = LZMA_CHECK_CRC64; + if (!lzma_check_is_supported(check)) + check = LZMA_CHECK_CRC32; + } + + return; +} + + +/// Return true if the data in in_buf seems to be in the .xz format. +static bool +is_format_xz(void) +{ + return strm.avail_in >= 6 && memcmp(in_buf.u8, "\3757zXZ", 6) == 0; +} + + +/// Return true if the data in in_buf seems to be in the .lzma format. +static bool +is_format_lzma(void) +{ + // The .lzma header is 13 bytes. + if (strm.avail_in < 13) + return false; + + // Decode the LZMA1 properties. + lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; + if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) + return false; + + // A hack to ditch tons of false positives: We allow only dictionary + // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone + // created only files with 2^n, but accepts any dictionary size. + // If someone complains, this will be reconsidered. + lzma_options_lzma *opt = filter.options; + const uint32_t dict_size = opt->dict_size; + free(opt); + + if (dict_size != UINT32_MAX) { + uint32_t d = dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + if (d != dict_size || dict_size == 0) + return false; + } + + // Another hack to ditch false positives: Assume that if the + // uncompressed size is known, it must be less than 256 GiB. + // Again, if someone complains, this will be reconsidered. + uint64_t uncompressed_size = 0; + for (size_t i = 0; i < 8; ++i) + uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); + + if (uncompressed_size != UINT64_MAX + && uncompressed_size > (UINT64_C(1) << 38)) + return false; + + return true; +} + + +/// Detect the input file type (for now, this done only when decompressing), +/// and initialize an appropriate coder. Return value indicates if a normal +/// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru +/// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred +/// (CODER_INIT_ERROR). +static enum coder_init_ret +coder_init(file_pair *pair) +{ + lzma_ret ret = LZMA_PROG_ERROR; + + if (opt_mode == MODE_COMPRESS) { + switch (opt_format) { + case FORMAT_AUTO: + // args.c ensures this. + assert(0); + break; + + case FORMAT_XZ: + ret = lzma_stream_encoder(&strm, filters, check); + break; + + case FORMAT_LZMA: + ret = lzma_alone_encoder(&strm, filters[0].options); + break; + + case FORMAT_RAW: + ret = lzma_raw_encoder(&strm, filters); + break; + } + } else { + const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK + | LZMA_CONCATENATED; + + // We abuse FORMAT_AUTO to indicate unknown file format, + // for which we may consider passthru mode. + enum format_type init_format = FORMAT_AUTO; + + switch (opt_format) { + case FORMAT_AUTO: + if (is_format_xz()) + init_format = FORMAT_XZ; + else if (is_format_lzma()) + init_format = FORMAT_LZMA; + break; + + case FORMAT_XZ: + if (is_format_xz()) + init_format = FORMAT_XZ; + break; + + case FORMAT_LZMA: + if (is_format_lzma()) + init_format = FORMAT_LZMA; + break; + + case FORMAT_RAW: + init_format = FORMAT_RAW; + break; + } + + switch (init_format) { + case FORMAT_AUTO: + // Uknown file format. If --decompress --stdout + // --force have been given, then we copy the input + // as is to stdout. Checking for MODE_DECOMPRESS + // is needed, because we don't want to do use + // passthru mode with --test. + if (opt_mode == MODE_DECOMPRESS + && opt_stdout && opt_force) + return CODER_INIT_PASSTHRU; + + ret = LZMA_FORMAT_ERROR; + break; + + case FORMAT_XZ: + ret = lzma_stream_decoder(&strm, + hardware_memlimit_get(), flags); + break; + + case FORMAT_LZMA: + ret = lzma_alone_decoder(&strm, + hardware_memlimit_get()); + break; + + case FORMAT_RAW: + // Memory usage has already been checked in + // coder_set_compression_settings(). + ret = lzma_raw_decoder(&strm, filters); + break; + } + + // Try to decode the headers. This will catch too low + // memory usage limit in case it happens in the first + // Block of the first Stream, which is where it very + // probably will happen if it is going to happen. + if (ret == LZMA_OK && init_format != FORMAT_RAW) { + strm.next_out = NULL; + strm.avail_out = 0; + ret = lzma_code(&strm, LZMA_RUN); + } + } + + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, message_strm(ret)); + if (ret == LZMA_MEMLIMIT_ERROR) + message_mem_needed(V_ERROR, lzma_memusage(&strm)); + + return CODER_INIT_ERROR; + } + + return CODER_INIT_NORMAL; +} + + +/// Compress or decompress using liblzma. +static bool +coder_normal(file_pair *pair) +{ + // Encoder needs to know when we have given all the input to it. + // The decoders need to know it too when we are using + // LZMA_CONCATENATED. We need to check for src_eof here, because + // the first input chunk has been already read, and that may + // have been the only chunk we will read. + lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; + + lzma_ret ret; + + // Assume that something goes wrong. + bool success = false; + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + + while (!user_abort) { + // Fill the input buffer if it is empty and we haven't reached + // end of file yet. + if (strm.avail_in == 0 && !pair->src_eof) { + strm.next_in = in_buf.u8; + strm.avail_in = io_read( + pair, &in_buf, IO_BUFFER_SIZE); + + if (strm.avail_in == SIZE_MAX) + break; + + if (pair->src_eof) + action = LZMA_FINISH; + } + + // Let liblzma do the actual work. + ret = lzma_code(&strm, action); + + // Write out if the output buffer became full. + if (strm.avail_out == 0) { + if (opt_mode != MODE_TEST && io_write(pair, &out_buf, + IO_BUFFER_SIZE - strm.avail_out)) + break; + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + } + + if (ret != LZMA_OK) { + // Determine if the return value indicates that we + // won't continue coding. + const bool stop = ret != LZMA_NO_CHECK + && ret != LZMA_UNSUPPORTED_CHECK; + + if (stop) { + // Write the remaining bytes even if something + // went wrong, because that way the user gets + // as much data as possible, which can be good + // when trying to get at least some useful + // data out of damaged files. + if (opt_mode != MODE_TEST && io_write(pair, + &out_buf, IO_BUFFER_SIZE + - strm.avail_out)) + break; + } + + if (ret == LZMA_STREAM_END) { + // Check that there is no trailing garbage. + // This is needed for LZMA_Alone and raw + // streams. + if (strm.avail_in == 0 && !pair->src_eof) { + // Try reading one more byte. + // Hopefully we don't get any more + // input, and thus pair->src_eof + // becomes true. + strm.avail_in = io_read( + pair, &in_buf, 1); + if (strm.avail_in == SIZE_MAX) + break; + + assert(strm.avail_in == 0 + || strm.avail_in == 1); + } + + if (strm.avail_in == 0) { + assert(pair->src_eof); + success = true; + break; + } + + // We hadn't reached the end of the file. + ret = LZMA_DATA_ERROR; + assert(stop); + } + + // If we get here and stop is true, something went + // wrong and we print an error. Otherwise it's just + // a warning and coding can continue. + if (stop) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + } else { + message_warning("%s: %s", pair->src_name, + message_strm(ret)); + + // When compressing, all possible errors set + // stop to true. + assert(opt_mode != MODE_COMPRESS); + } + + if (ret == LZMA_MEMLIMIT_ERROR) { + // Display how much memory it would have + // actually needed. + message_mem_needed(V_ERROR, + lzma_memusage(&strm)); + } + + if (stop) + break; + } + + // Show progress information under certain conditions. + message_progress_update(); + } + + return success; +} + + +/// Copy from input file to output file without processing the data in any +/// way. This is used only when trying to decompress unrecognized files +/// with --decompress --stdout --force, so the output is always stdout. +static bool +coder_passthru(file_pair *pair) +{ + while (strm.avail_in != 0) { + if (user_abort) + return false; + + if (io_write(pair, &in_buf, strm.avail_in)) + return false; + + strm.total_in += strm.avail_in; + strm.total_out = strm.total_in; + message_progress_update(); + + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + if (strm.avail_in == SIZE_MAX) + return false; + } + + return true; +} + + +extern void +coder_run(const char *filename) +{ + // Set and possibly print the filename for the progress message. + message_filename(filename); + + // Try to open the input file. + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + // Assume that something goes wrong. + bool success = false; + + // Read the first chunk of input data. This is needed to detect + // the input file type (for now, only for decompression). + strm.next_in = in_buf.u8; + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + + if (strm.avail_in != SIZE_MAX) { + // Initialize the coder. This will detect the file format + // and, in decompression or testing mode, check the memory + // usage of the first Block too. This way we don't try to + // open the destination file if we see that coding wouldn't + // work at all anyway. This also avoids deleting the old + // "target" file if --force was used. + const enum coder_init_ret init_ret = coder_init(pair); + + if (init_ret != CODER_INIT_ERROR && !user_abort) { + // Don't open the destination file when --test + // is used. + if (opt_mode == MODE_TEST || !io_open_dest(pair)) { + // Initialize the progress indicator. + const uint64_t in_size + = pair->src_st.st_size <= 0 + ? 0 : pair->src_st.st_size; + message_progress_start(&strm, in_size); + + // Do the actual coding or passthru. + if (init_ret == CODER_INIT_NORMAL) + success = coder_normal(pair); + else + success = coder_passthru(pair); + + message_progress_end(success); + } + } + } + + // Close the file pair. It needs to know if coding was successful to + // know if the source or target file should be unlinked. + io_close(pair, success); + + return; +} Index: contrib/xz/src/xz/hardware.c =================================================================== --- contrib/xz/src/xz/hardware.c (revision 0) +++ contrib/xz/src/xz/hardware.c (revision 0) @@ -0,0 +1,112 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.c +/// \brief Detection of available hardware resources +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_cpucores.h" + + +/// Maximum number of free *coder* threads. This can be set with +/// the --threads=NUM command line option. +static uint32_t threadlimit; + +/// Memory usage limit +static uint64_t memlimit; + +/// Total amount of physical RAM +static uint64_t total_ram; + + +extern void +hardware_threadlimit_set(uint32_t new_threadlimit) +{ + if (new_threadlimit == 0) { + // The default is the number of available CPU cores. + threadlimit = tuklib_cpucores(); + if (threadlimit == 0) + threadlimit = 1; + } else { + threadlimit = new_threadlimit; + } + + return; +} + + +extern uint32_t +hardware_threadlimit_get(void) +{ + return threadlimit; +} + + +extern void +hardware_memlimit_set(uint64_t new_memlimit) +{ + if (new_memlimit != 0) { + memlimit = new_memlimit; + } else { + // The default depends on the amount of RAM but so that + // on "low-memory" systems the relative limit is higher + // to make it more likely that files created with "xz -9" + // will still decompress without overriding the limit + // manually. + // + // If 40 % of RAM is 80 MiB or more, use 40 % of RAM as + // the limit. + memlimit = 40 * total_ram / 100; + if (memlimit < UINT64_C(80) * 1024 * 1024) { + // If 80 % of RAM is less than 80 MiB, + // use 80 % of RAM as the limit. + memlimit = 80 * total_ram / 100; + if (memlimit > UINT64_C(80) * 1024 * 1024) { + // Otherwise use 80 MiB as the limit. + memlimit = UINT64_C(80) * 1024 * 1024; + } + } + } + + return; +} + + +extern void +hardware_memlimit_set_percentage(uint32_t percentage) +{ + assert(percentage > 0); + assert(percentage <= 100); + + memlimit = percentage * total_ram / 100; + return; +} + + +extern uint64_t +hardware_memlimit_get(void) +{ + return memlimit; +} + + +extern void +hardware_init(void) +{ + // Get the amount of RAM. If we cannot determine it, + // use the assumption defined by the configure script. + total_ram = lzma_physmem(); + if (total_ram == 0) + total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024; + + // Set the defaults. + hardware_memlimit_set(0); + hardware_threadlimit_set(0); + return; +} Index: contrib/xz/src/common/tuklib_physmem.h =================================================================== --- contrib/xz/src/common/tuklib_physmem.h (revision 0) +++ contrib/xz/src/common/tuklib_physmem.h (revision 0) @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_physmem.h +/// \brief Get the amount of physical memory +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_PHYSMEM_H +#define TUKLIB_PHYSMEM_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_physmem TUKLIB_SYMBOL(tuklib_physmem) +extern uint64_t tuklib_physmem(void); +///< +/// \brief Get the amount of physical memory in bytes +/// +/// \return Amount of physical memory in bytes. On error, zero is +/// returned. + +TUKLIB_DECLS_END +#endif Index: contrib/xz/src/common/tuklib_cpucores.h =================================================================== --- contrib/xz/src/common/tuklib_cpucores.h (revision 0) +++ contrib/xz/src/common/tuklib_cpucores.h (revision 0) @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_cpucores.h +/// \brief Get the number of CPU cores online +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_CPUCORES_H +#define TUKLIB_CPUCORES_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_cpucores TUKLIB_SYMBOL(tuklib_cpucores) +extern uint32_t tuklib_cpucores(void); + +TUKLIB_DECLS_END +#endif Index: contrib/xz/src/common/tuklib_open_stdxxx.h =================================================================== --- contrib/xz/src/common/tuklib_open_stdxxx.h (revision 0) +++ contrib/xz/src/common/tuklib_open_stdxxx.h (revision 0) @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_open_stdxxx.h +/// \brief Make sure that file descriptors 0, 1, and 2 are open +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_OPEN_STDXXX_H +#define TUKLIB_OPEN_STDXXX_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_open_stdxx TUKLIB_SYMBOL(tuklib_open_stdxxx) +extern void tuklib_open_stdxxx(int err_status); + +TUKLIB_DECLS_END +#endif Index: contrib/xz/src/common/tuklib_gettext.h =================================================================== --- contrib/xz/src/common/tuklib_gettext.h (revision 0) +++ contrib/xz/src/common/tuklib_gettext.h (revision 0) @@ -0,0 +1,44 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_gettext.h +/// \brief Wrapper for gettext and friends +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_GETTEXT_H +#define TUKLIB_GETTEXT_H + +#include "tuklib_common.h" +#include + +#ifndef TUKLIB_GETTEXT +# ifdef ENABLE_NLS +# define TUKLIB_GETTEXT 1 +# else +# define TUKLIB_GETTEXT 0 +# endif +#endif + +#if TUKLIB_GETTEXT +# include +# define tuklib_gettext_init(package, localedir) \ + do { \ + setlocale(LC_ALL, ""); \ + bindtextdomain(package, localedir); \ + textdomain(package); \ + } while (0) +# define _(msgid) gettext(msgid) +# define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n) +#else +# define tuklib_gettext_init(package, localedir) \ + setlocale(LC_ALL, "") +# define _(msgid) (msgid) +# define N_(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2)) +#endif + +#endif Index: contrib/xz/src/common/tuklib_common.h =================================================================== --- contrib/xz/src/common/tuklib_common.h (revision 0) +++ contrib/xz/src/common/tuklib_common.h (revision 0) @@ -0,0 +1,71 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_common.h +/// \brief Common definitions for tuklib modules +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_COMMON_H +#define TUKLIB_COMMON_H + +// The config file may be replaced by a package-specific file. +// It should include at least stddef.h, inttypes.h, and limits.h. +#include "tuklib_config.h" + +// TUKLIB_SYMBOL_PREFIX is prefixed to all symbols exported by +// the tuklib modules. If you use a tuklib module in a library, +// you should use TUKLIB_SYMBOL_PREFIX to make sure that there +// are no symbol conflicts in case someone links your library +// into application that also uses the same tuklib module. +#ifndef TUKLIB_SYMBOL_PREFIX +# define TUKLIB_SYMBOL_PREFIX +#endif + +#define TUKLIB_CAT_X(a, b) a ## b +#define TUKLIB_CAT(a, b) TUKLIB_CAT_X(a, b) + +#ifndef TUKLIB_SYMBOL +# define TUKLIB_SYMBOL(sym) TUKLIB_CAT(TUKLIB_SYMBOL_PREFIX, sym) +#endif + +#ifndef TUKLIB_DECLS_BEGIN +# ifdef __cplusplus +# define TUKLIB_DECLS_BEGIN extern "C" { +# else +# define TUKLIB_DECLS_BEGIN +# endif +#endif + +#ifndef TUKLIB_DECLS_END +# ifdef __cplusplus +# define TUKLIB_DECLS_END } +# else +# define TUKLIB_DECLS_END +# endif +#endif + +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define TUKLIB_GNUC_REQ(major, minor) \ + ((__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) \ + || __GNUC__ > (major)) +#else +# define TUKLIB_GNUC_REQ(major, minor) 0 +#endif + +#if TUKLIB_GNUC_REQ(2, 5) +# define tuklib_attr_noreturn __attribute__((__noreturn__)) +#else +# define tuklib_attr_noreturn +#endif + +#if (defined(_WIN32) && !defined(__CYGWIN__)) \ + || defined(__OS2__) || defined(__MSDOS__) +# define TUKLIB_DOSLIKE 1 +#endif + +#endif Index: contrib/xz/src/common/tuklib_integer.h =================================================================== --- contrib/xz/src/common/tuklib_integer.h (revision 0) +++ contrib/xz/src/common/tuklib_integer.h (revision 0) @@ -0,0 +1,523 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_integer.h +/// \brief Various integer and bit operations +/// +/// This file provides macros or functions to do some basic integer and bit +/// operations. +/// +/// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l): +/// - Byte swapping: bswapXX(num) +/// - Byte order conversions to/from native: convXXYe(num) +/// - Aligned reads: readXXYe(ptr) +/// - Aligned writes: writeXXYe(ptr, num) +/// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr) +/// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num) +/// +/// Since they can macros, the arguments should have no side effects since +/// they may be evaluated more than once. +/// +/// \todo PowerPC and possibly some other architectures support +/// byte swapping load and store instructions. This file +/// doesn't take advantage of those instructions. +/// +/// Bit scan operations for non-zero 32-bit integers: +/// - Bit scan reverse (find highest non-zero bit): bsr32(num) +/// - Count leading zeros: clz32(num) +/// - Count trailing zeros: ctz32(num) +/// - Bit scan forward (simply an alias for ctz32()): bsf32(num) +/// +/// The above bit scan operations return 0-31. If num is zero, +/// the result is undefined. +// +// Authors: Lasse Collin +// Joachim Henke +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_INTEGER_H +#define TUKLIB_INTEGER_H + +#include "tuklib_common.h" + + +//////////////////////////////////////// +// Operating system specific features // +//////////////////////////////////////// + +#if defined(HAVE_BYTESWAP_H) + // glibc, uClibc, dietlibc +# include +# ifdef HAVE_BSWAP_16 +# define bswap16(num) bswap_16(num) +# endif +# ifdef HAVE_BSWAP_32 +# define bswap32(num) bswap_32(num) +# endif +# ifdef HAVE_BSWAP_64 +# define bswap64(num) bswap_64(num) +# endif + +#elif defined(HAVE_SYS_ENDIAN_H) + // *BSDs and Darwin +# include + +#elif defined(HAVE_SYS_BYTEORDER_H) + // Solaris +# include +# ifdef BSWAP_16 +# define bswap16(num) BSWAP_16(num) +# endif +# ifdef BSWAP_32 +# define bswap32(num) BSWAP_32(num) +# endif +# ifdef BSWAP_64 +# define bswap64(num) BSWAP_64(num) +# endif +# ifdef BE_16 +# define conv16be(num) BE_16(num) +# endif +# ifdef BE_32 +# define conv32be(num) BE_32(num) +# endif +# ifdef BE_64 +# define conv64be(num) BE_64(num) +# endif +# ifdef LE_16 +# define conv16le(num) LE_16(num) +# endif +# ifdef LE_32 +# define conv32le(num) LE_32(num) +# endif +# ifdef LE_64 +# define conv64le(num) LE_64(num) +# endif +#endif + + +/////////////////// +// Byte swapping // +/////////////////// + +#ifndef bswap16 +# define bswap16(num) \ + (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8)) +#endif + +#ifndef bswap32 +# define bswap32(num) \ + ( (((uint32_t)(num) << 24) ) \ + | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \ + | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \ + | (((uint32_t)(num) >> 24) ) ) +#endif + +#ifndef bswap64 +# define bswap64(num) \ + ( (((uint64_t)(num) << 56) ) \ + | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \ + | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \ + | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \ + | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \ + | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \ + | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \ + | (((uint64_t)(num) >> 56) ) ) +#endif + +// Define conversion macros using the basic byte swapping macros. +#ifdef WORDS_BIGENDIAN +# ifndef conv16be +# define conv16be(num) ((uint16_t)(num)) +# endif +# ifndef conv32be +# define conv32be(num) ((uint32_t)(num)) +# endif +# ifndef conv64be +# define conv64be(num) ((uint64_t)(num)) +# endif +# ifndef conv16le +# define conv16le(num) bswap16(num) +# endif +# ifndef conv32le +# define conv32le(num) bswap32(num) +# endif +# ifndef conv64le +# define conv64le(num) bswap64(num) +# endif +#else +# ifndef conv16be +# define conv16be(num) bswap16(num) +# endif +# ifndef conv32be +# define conv32be(num) bswap32(num) +# endif +# ifndef conv64be +# define conv64be(num) bswap64(num) +# endif +# ifndef conv16le +# define conv16le(num) ((uint16_t)(num)) +# endif +# ifndef conv32le +# define conv32le(num) ((uint32_t)(num)) +# endif +# ifndef conv64le +# define conv64le(num) ((uint64_t)(num)) +# endif +#endif + + +////////////////////////////// +// Aligned reads and writes // +////////////////////////////// + +static inline uint16_t +read16be(const uint8_t *buf) +{ + uint16_t num = *(const uint16_t *)buf; + return conv16be(num); +} + + +static inline uint16_t +read16le(const uint8_t *buf) +{ + uint16_t num = *(const uint16_t *)buf; + return conv16le(num); +} + + +static inline uint32_t +read32be(const uint8_t *buf) +{ + uint32_t num = *(const uint32_t *)buf; + return conv32be(num); +} + + +static inline uint32_t +read32le(const uint8_t *buf) +{ + uint32_t num = *(const uint32_t *)buf; + return conv32le(num); +} + + +static inline uint64_t +read64be(const uint8_t *buf) +{ + uint64_t num = *(const uint64_t *)buf; + return conv64be(num); +} + + +static inline uint64_t +read64le(const uint8_t *buf) +{ + uint64_t num = *(const uint64_t *)buf; + return conv64le(num); +} + + +// NOTE: Possible byte swapping must be done in a macro to allow GCC +// to optimize byte swapping of constants when using glibc's or *BSD's +// byte swapping macros. The actual write is done in an inline function +// to make type checking of the buf pointer possible similarly to readXXYe() +// functions. + +#define write16be(buf, num) write16ne((buf), conv16be(num)) +#define write16le(buf, num) write16ne((buf), conv16le(num)) +#define write32be(buf, num) write32ne((buf), conv32be(num)) +#define write32le(buf, num) write32ne((buf), conv32le(num)) +#define write64be(buf, num) write64ne((buf), conv64be(num)) +#define write64le(buf, num) write64ne((buf), conv64le(num)) + + +static inline void +write16ne(uint8_t *buf, uint16_t num) +{ + *(uint16_t *)buf = num; + return; +} + + +static inline void +write32ne(uint8_t *buf, uint32_t num) +{ + *(uint32_t *)buf = num; + return; +} + + +static inline void +write64ne(uint8_t *buf, uint64_t num) +{ + *(uint64_t *)buf = num; + return; +} + + +//////////////////////////////// +// Unaligned reads and writes // +//////////////////////////////// + +// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and +// 32-bit unaligned integer loads and stores. It's possible that 64-bit +// unaligned access doesn't work or is slower than byte-by-byte access. +// Since unaligned 64-bit is probably not needed as often as 16-bit or +// 32-bit, we simply don't support 64-bit unaligned access for now. +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define unaligned_read16be read16be +# define unaligned_read16le read16le +# define unaligned_read32be read32be +# define unaligned_read32le read32le +# define unaligned_write16be write16be +# define unaligned_write16le write16le +# define unaligned_write32be write32be +# define unaligned_write32le write32le + +#else + +static inline uint16_t +unaligned_read16be(const uint8_t *buf) +{ + uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; + return num; +} + + +static inline uint16_t +unaligned_read16le(const uint8_t *buf) +{ + uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); + return num; +} + + +static inline uint32_t +unaligned_read32be(const uint8_t *buf) +{ + uint32_t num = (uint32_t)buf[0] << 24; + num |= (uint32_t)buf[1] << 16; + num |= (uint32_t)buf[2] << 8; + num |= (uint32_t)buf[3]; + return num; +} + + +static inline uint32_t +unaligned_read32le(const uint8_t *buf) +{ + uint32_t num = (uint32_t)buf[0]; + num |= (uint32_t)buf[1] << 8; + num |= (uint32_t)buf[2] << 16; + num |= (uint32_t)buf[3] << 24; + return num; +} + + +static inline void +unaligned_write16be(uint8_t *buf, uint16_t num) +{ + buf[0] = num >> 8; + buf[1] = num; + return; +} + + +static inline void +unaligned_write16le(uint8_t *buf, uint16_t num) +{ + buf[0] = num; + buf[1] = num >> 8; + return; +} + + +static inline void +unaligned_write32be(uint8_t *buf, uint32_t num) +{ + buf[0] = num >> 24; + buf[1] = num >> 16; + buf[2] = num >> 8; + buf[3] = num; + return; +} + + +static inline void +unaligned_write32le(uint8_t *buf, uint32_t num) +{ + buf[0] = num; + buf[1] = num >> 8; + buf[2] = num >> 16; + buf[3] = num >> 24; + return; +} + +#endif + + +static inline uint32_t +bsr32(uint32_t n) +{ + // Check for ICC first, since it tends to define __GNUC__ too. +#if defined(__INTEL_COMPILER) + return _bit_scan_reverse(n); + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX + // GCC >= 3.4 has __builtin_clz(), which gives good results on + // multiple architectures. On x86, __builtin_clz() ^ 31U becomes + // either plain BSR (so the XOR gets optimized away) or LZCNT and + // XOR (if -march indicates that SSE4a instructions are supported). + return __builtin_clz(n) ^ 31U; + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + // MSVC isn't supported by tuklib, but since this code exists, + // it doesn't hurt to have it here anyway. + uint32_t i; + _BitScanReverse((DWORD *)&i, n); + return i; + +#else + uint32_t i = 31; + + if ((n & UINT32_C(0xFFFF0000)) == 0) { + n <<= 16; + i = 15; + } + + if ((n & UINT32_C(0xFF000000)) == 0) { + n <<= 8; + i -= 8; + } + + if ((n & UINT32_C(0xF0000000)) == 0) { + n <<= 4; + i -= 4; + } + + if ((n & UINT32_C(0xC0000000)) == 0) { + n <<= 2; + i -= 2; + } + + if ((n & UINT32_C(0x80000000)) == 0) + --i; + + return i; +#endif +} + + +static inline uint32_t +clz32(uint32_t n) +{ +#if defined(__INTEL_COMPILER) + return _bit_scan_reverse(n) ^ 31U; + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX + return __builtin_clz(n); + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsrl %1, %0\n\t" + "xorl $31, %0" + : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + uint32_t i; + _BitScanReverse((DWORD *)&i, n); + return i ^ 31U; + +#else + uint32_t i = 0; + + if ((n & UINT32_C(0xFFFF0000)) == 0) { + n <<= 16; + i = 16; + } + + if ((n & UINT32_C(0xFF000000)) == 0) { + n <<= 8; + i += 8; + } + + if ((n & UINT32_C(0xF0000000)) == 0) { + n <<= 4; + i += 4; + } + + if ((n & UINT32_C(0xC0000000)) == 0) { + n <<= 2; + i += 2; + } + + if ((n & UINT32_C(0x80000000)) == 0) + ++i; + + return i; +#endif +} + + +static inline uint32_t +ctz32(uint32_t n) +{ +#if defined(__INTEL_COMPILER) + return _bit_scan_forward(n); + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX + return __builtin_ctz(n); + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + uint32_t i; + _BitScanForward((DWORD *)&i, n); + return i; + +#else + uint32_t i = 0; + + if ((n & UINT32_C(0x0000FFFF)) == 0) { + n >>= 16; + i = 16; + } + + if ((n & UINT32_C(0x000000FF)) == 0) { + n >>= 8; + i += 8; + } + + if ((n & UINT32_C(0x0000000F)) == 0) { + n >>= 4; + i += 4; + } + + if ((n & UINT32_C(0x00000003)) == 0) { + n >>= 2; + i += 2; + } + + if ((n & UINT32_C(0x00000001)) == 0) + ++i; + + return i; +#endif +} + +#define bsf32 ctz32 + +#endif Index: contrib/xz/src/common/tuklib_progname.c =================================================================== --- contrib/xz/src/common/tuklib_progname.c (revision 0) +++ contrib/xz/src/common/tuklib_progname.c (revision 0) @@ -0,0 +1,50 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_progname.c +/// \brief Program name to be displayed in messages +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_progname.h" +#include + + +#if !HAVE_DECL_PROGRAM_INVOCATION_NAME +char *progname = NULL; +#endif + + +extern void +tuklib_progname_init(char **argv) +{ +#ifdef TUKLIB_DOSLIKE + // On these systems, argv[0] always has the full path and .exe + // suffix even if the user just types the plain program name. + // We modify argv[0] to make it nicer to read. + + // Strip the leading path. + char *p = argv[0] + strlen(argv[0]); + while (argv[0] < p && p[-1] != '/' && p[-1] != '\\') + --p; + + argv[0] = p; + + // Strip the .exe suffix. + p = strrchr(p, '.'); + if (p != NULL) + *p = '\0'; + + // Make it lowercase. + for (p = argv[0]; *p != '\0'; ++p) + if (*p >= 'A' && *p <= 'Z') + *p = *p - 'A' + 'a'; +#endif + + progname = argv[0]; + return; +} Index: contrib/xz/src/common/mythread.h =================================================================== --- contrib/xz/src/common/mythread.h (revision 0) +++ contrib/xz/src/common/mythread.h (revision 0) @@ -0,0 +1,42 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mythread.h +/// \brief Wrappers for threads +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" + + +#ifdef HAVE_PTHREAD +# include + +# define mythread_once(func) \ + do { \ + static pthread_once_t once_ = PTHREAD_ONCE_INIT; \ + pthread_once(&once_, &func); \ + } while (0) + +# define mythread_sigmask(how, set, oset) \ + pthread_sigmask(how, set, oset) + +#else + +# define mythread_once(func) \ + do { \ + static bool once_ = false; \ + if (!once_) { \ + func(); \ + once_ = true; \ + } \ + } while (0) + +# define mythread_sigmask(how, set, oset) \ + sigprocmask(how, set, oset) + +#endif Index: contrib/xz/src/common/tuklib_exit.c =================================================================== --- contrib/xz/src/common/tuklib_exit.c (revision 0) +++ contrib/xz/src/common/tuklib_exit.c (revision 0) @@ -0,0 +1,57 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_exit.c +/// \brief Close stdout and stderr, and exit +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_common.h" + +#include +#include + +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" + + +extern void +tuklib_exit(int status, int err_status, int show_error) +{ + if (status != err_status) { + // Close stdout. If something goes wrong, + // print an error message to stderr. + const int ferror_err = ferror(stdout); + const int fclose_err = fclose(stdout); + if (ferror_err || fclose_err) { + status = err_status; + + // If it was fclose() that failed, we have the reason + // in errno. If only ferror() indicated an error, + // we have no idea what the reason was. + if (show_error) + fprintf(stderr, "%s: %s: %s\n", progname, + _("Writing to standard " + "output failed"), + fclose_err ? strerror(errno) + : _("Unknown error")); + } + } + + if (status != err_status) { + // Close stderr. If something goes wrong, there's + // nothing where we could print an error message. + // Just set the exit status. + const int ferror_err = ferror(stderr); + const int fclose_err = fclose(stderr); + if (fclose_err || ferror_err) + status = err_status; + } + + exit(status); +} Index: contrib/xz/src/common/tuklib_config.h =================================================================== --- contrib/xz/src/common/tuklib_config.h (revision 0) +++ contrib/xz/src/common/tuklib_config.h (revision 0) @@ -0,0 +1,7 @@ +#ifdef HAVE_CONFIG_H +# include "sysdefs.h" +#else +# include +# include +# include +#endif Index: contrib/xz/src/common/sysdefs.h =================================================================== --- contrib/xz/src/common/sysdefs.h (revision 0) +++ contrib/xz/src/common/sysdefs.h (revision 0) @@ -0,0 +1,171 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sysdefs.h +/// \brief Common includes, definitions, system-specific things etc. +/// +/// This file is used also by the lzma command line tool, that's why this +/// file is separate from common.h. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SYSDEFS_H +#define LZMA_SYSDEFS_H + +////////////// +// Includes // +////////////// + +#ifdef HAVE_CONFIG_H +# include +#endif + +// size_t and NULL +#include + +#ifdef HAVE_INTTYPES_H +# include +#endif + +// C99 says that inttypes.h always includes stdint.h, but some systems +// don't do that, and require including stdint.h separately. +#ifdef HAVE_STDINT_H +# include +#endif + +// Some pre-C99 systems have SIZE_MAX in limits.h instead of stdint.h. The +// limits are also used to figure out some macros missing from pre-C99 systems. +#ifdef HAVE_LIMITS_H +# include +#endif + +// Be more compatible with systems that have non-conforming inttypes.h. +// We assume that int is 32-bit and that long is either 32-bit or 64-bit. +// Full Autoconf test could be more correct, but this should work well enough. +// Note that this duplicates some code from lzma.h, but this is better since +// we can work without inttypes.h thanks to Autoconf tests. +#ifndef UINT32_C +# if UINT_MAX != 4294967295U +# error UINT32_C is not defined and unsigned int is not 32-bit. +# endif +# define UINT32_C(n) n ## U +#endif +#ifndef UINT32_MAX +# define UINT32_MAX UINT32_C(4294967295) +#endif +#ifndef PRIu32 +# define PRIu32 "u" +#endif +#ifndef PRIX32 +# define PRIX32 "X" +#endif + +#if ULONG_MAX == 4294967295UL +# ifndef UINT64_C +# define UINT64_C(n) n ## ULL +# endif +# ifndef PRIu64 +# define PRIu64 "llu" +# endif +# ifndef PRIX64 +# define PRIX64 "llX" +# endif +#else +# ifndef UINT64_C +# define UINT64_C(n) n ## UL +# endif +# ifndef PRIu64 +# define PRIu64 "lu" +# endif +# ifndef PRIX64 +# define PRIX64 "lX" +# endif +#endif +#ifndef UINT64_MAX +# define UINT64_MAX UINT64_C(18446744073709551615) +#endif + +// Interix has broken header files, which typedef size_t to unsigned long, +// but a few lines later define SIZE_MAX to INT32_MAX. +#ifdef __INTERIX +# undef SIZE_MAX +#endif + +// The code currently assumes that size_t is either 32-bit or 64-bit. +#ifndef SIZE_MAX +# if SIZEOF_SIZE_T == 4 +# define SIZE_MAX UINT32_MAX +# elif SIZEOF_SIZE_T == 8 +# define SIZE_MAX UINT64_MAX +# else +# error sizeof(size_t) is not 32-bit or 64-bit +# endif +#endif +#if SIZE_MAX != UINT32_MAX && SIZE_MAX != UINT64_MAX +# error sizeof(size_t) is not 32-bit or 64-bit +#endif + +#include +#include + +// Pre-C99 systems lack stdbool.h. All the code in LZMA Utils must be written +// so that it works with fake bool type, for example: +// +// bool foo = (flags & 0x100) != 0; +// bool bar = !!(flags & 0x100); +// +// This works with the real C99 bool but breaks with fake bool: +// +// bool baz = (flags & 0x100); +// +#ifdef HAVE_STDBOOL_H +# include +#else +# if ! HAVE__BOOL +typedef unsigned char _Bool; +# endif +# define bool _Bool +# define false 0 +# define true 1 +# define __bool_true_false_are_defined 1 +#endif + +// string.h should be enough but let's include strings.h and memory.h too if +// they exists, since that shouldn't do any harm, but may improve portability. +#ifdef HAVE_STRING_H +# include +#endif + +#ifdef HAVE_STRINGS_H +# include +#endif + +#ifdef HAVE_MEMORY_H +# include +#endif + + +//////////// +// Macros // +//////////// + +#undef memzero +#define memzero(s, n) memset(s, 0, n) + +#ifndef MIN +# define MIN(x, y) ((x) < (y) ? (x) : (y)) +#endif + +#ifndef MAX +# define MAX(x, y) ((x) > (y) ? (x) : (y)) +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) +#endif + +#endif Index: contrib/xz/src/common/tuklib_progname.h =================================================================== --- contrib/xz/src/common/tuklib_progname.h (revision 0) +++ contrib/xz/src/common/tuklib_progname.h (revision 0) @@ -0,0 +1,32 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_progname.h +/// \brief Program name to be displayed in messages +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_PROGNAME_H +#define TUKLIB_PROGNAME_H + +#include "tuklib_common.h" +#include + +TUKLIB_DECLS_BEGIN + +#if HAVE_DECL_PROGRAM_INVOCATION_NAME +# define progname program_invocation_name +#else +# define progname TUKLIB_SYMBOL(tuklib_progname) + extern char *progname; +#endif + +#define tuklib_progname_init TUKLIB_SYMBOL(tuklib_progname_init) +extern void tuklib_progname_init(char **argv); + +TUKLIB_DECLS_END +#endif Index: contrib/xz/src/common/tuklib_exit.h =================================================================== --- contrib/xz/src/common/tuklib_exit.h (revision 0) +++ contrib/xz/src/common/tuklib_exit.h (revision 0) @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_exit.h +/// \brief Close stdout and stderr, and exit +/// \note Requires tuklib_progname and tuklib_gettext modules +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_EXIT_H +#define TUKLIB_EXIT_H + +#include "tuklib_common.h" +TUKLIB_DECLS_BEGIN + +#define tuklib_exit TUKLIB_SYMBOL(tuklib_exit) +extern void tuklib_exit(int status, int err_status, int show_error) + tuklib_attr_noreturn; + +TUKLIB_DECLS_END +#endif Index: contrib/xz/src/common/tuklib_physmem.c =================================================================== --- contrib/xz/src/common/tuklib_physmem.c (revision 0) +++ contrib/xz/src/common/tuklib_physmem.c (revision 0) @@ -0,0 +1,165 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_physmem.c +/// \brief Get the amount of physical memory +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_physmem.h" + +// We want to use Windows-specific code on Cygwin, which also has memory +// information available via sysconf(), but on Cygwin 1.5 and older it +// gives wrong results (from our point of view). +#if defined(_WIN32) || defined(__CYGWIN__) +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0500 +# endif +# include + +#elif defined(__OS2__) +# define INCL_DOSMISC +# include + +#elif defined(__DJGPP__) +# include + +#elif defined(__VMS) +# include +# include +# include + +#elif defined(TUKLIB_PHYSMEM_SYSCONF) +# include + +#elif defined(TUKLIB_PHYSMEM_SYSCTL) +# ifdef HAVE_SYS_PARAM_H +# include +# endif +# include + +// IRIX +#elif defined(TUKLIB_PHYSMEM_GETINVENT_R) +# include + +// This sysinfo() is Linux-specific. +#elif defined(TUKLIB_PHYSMEM_SYSINFO) +# include +#endif + + +extern uint64_t +tuklib_physmem(void) +{ + uint64_t ret = 0; + +#if defined(_WIN32) || defined(__CYGWIN__) + if ((GetVersion() & 0xFF) >= 5) { + // Windows 2000 and later have GlobalMemoryStatusEx() which + // supports reporting values greater than 4 GiB. To keep the + // code working also on older Windows versions, use + // GlobalMemoryStatusEx() conditionally. + HMODULE kernel32 = GetModuleHandle("kernel32.dll"); + if (kernel32 != NULL) { + BOOL (WINAPI *gmse)(LPMEMORYSTATUSEX) = GetProcAddress( + kernel32, "GlobalMemoryStatusEx"); + if (gmse != NULL) { + MEMORYSTATUSEX meminfo; + meminfo.dwLength = sizeof(meminfo); + if (gmse(&meminfo)) + ret = meminfo.ullTotalPhys; + } + } + } + + if (ret == 0) { + // GlobalMemoryStatus() is supported by Windows 95 and later, + // so it is fine to link against it unconditionally. Note that + // GlobalMemoryStatus() has no return value. + MEMORYSTATUS meminfo; + meminfo.dwLength = sizeof(meminfo); + GlobalMemoryStatus(&meminfo); + ret = meminfo.dwTotalPhys; + } + +#elif defined(__OS2__) + unsigned long mem; + if (DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, + &mem, sizeof(mem)) == 0) + ret = mem; + +#elif defined(__DJGPP__) + __dpmi_free_mem_info meminfo; + if (__dpmi_get_free_memory_information(&meminfo) == 0 + && meminfo.total_number_of_physical_pages + != (unsigned long)-1) + ret = (uint64_t)meminfo.total_number_of_physical_pages * 4096; + +#elif defined(__VMS) + int vms_mem; + int val = SYI$_MEMSIZE; + if (LIB$GETSYI(&val, &vms_mem, 0, 0, 0, 0) == SS$_NORMAL) + ret = (uint64_t)vms_mem * 8192; + +#elif defined(TUKLIB_PHYSMEM_SYSCONF) + const long pagesize = sysconf(_SC_PAGESIZE); + const long pages = sysconf(_SC_PHYS_PAGES); + if (pagesize != -1 || pages != -1) + // According to docs, pagesize * pages can overflow. + // Simple case is 32-bit box with 4 GiB or more RAM, + // which may report exactly 4 GiB of RAM, and "long" + // being 32-bit will overflow. Casting to uint64_t + // hopefully avoids overflows in the near future. + ret = (uint64_t)pagesize * (uint64_t)pages; + +#elif defined(TUKLIB_PHYSMEM_SYSCTL) + int name[2] = { + CTL_HW, +#ifdef HW_PHYSMEM64 + HW_PHYSMEM64 +#else + HW_PHYSMEM +#endif + }; + union { + uint32_t u32; + uint64_t u64; + } mem; + size_t mem_ptr_size = sizeof(mem.u64); + if (sysctl(name, 2, &mem.u64, &mem_ptr_size, NULL, 0) != -1) { + // IIRC, 64-bit "return value" is possible on some 64-bit + // BSD systems even with HW_PHYSMEM (instead of HW_PHYSMEM64), + // so support both. + if (mem_ptr_size == sizeof(mem.u64)) + ret = mem.u64; + else if (mem_ptr_size == sizeof(mem.u32)) + ret = mem.u32; + } + +#elif defined(TUKLIB_PHYSMEM_GETINVENT_R) + inv_state_t *st = NULL; + if (setinvent_r(&st) != -1) { + inventory_t *i; + while ((i = getinvent_r(st)) != NULL) { + if (i->inv_class == INV_MEMORY + && i->inv_type == INV_MAIN_MB) { + ret = (uint64_t)i->inv_state << 20; + break; + } + } + + endinvent_r(st); + } + +#elif defined(TUKLIB_PHYSMEM_SYSINFO) + struct sysinfo si; + if (sysinfo(&si) == 0) + ret = (uint64_t)si.totalram * si.mem_unit; +#endif + + return ret; +} Index: contrib/xz/src/common/tuklib_cpucores.c =================================================================== --- contrib/xz/src/common/tuklib_cpucores.c (revision 0) +++ contrib/xz/src/common/tuklib_cpucores.c (revision 0) @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_cpucores.c +/// \brief Get the number of CPU cores online +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_cpucores.h" + +#if defined(TUKLIB_CPUCORES_SYSCTL) +# ifdef HAVE_SYS_PARAM_H +# include +# endif +# include + +#elif defined(TUKLIB_CPUCORES_SYSCONF) +# include +#endif + + +extern uint32_t +tuklib_cpucores(void) +{ + uint32_t ret = 0; + +#if defined(TUKLIB_CPUCORES_SYSCTL) + int name[2] = { CTL_HW, HW_NCPU }; + int cpus; + size_t cpus_size = sizeof(cpus); + if (sysctl(name, 2, &cpus, &cpus_size, NULL, 0) != -1 + && cpus_size == sizeof(cpus) && cpus > 0) + ret = (uint32_t)cpus; + +#elif defined(TUKLIB_CPUCORES_SYSCONF) +# ifdef _SC_NPROCESSORS_ONLN + // Most systems + const long cpus = sysconf(_SC_NPROCESSORS_ONLN); +# else + // IRIX + const long cpus = sysconf(_SC_NPROC_ONLN); +# endif + if (cpus > 0) + ret = (uint32_t)cpus; +#endif + + return ret; +} Index: contrib/xz/src/common/tuklib_open_stdxxx.c =================================================================== --- contrib/xz/src/common/tuklib_open_stdxxx.c (revision 0) +++ contrib/xz/src/common/tuklib_open_stdxxx.c (revision 0) @@ -0,0 +1,55 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_open_stdxxx.c +/// \brief Make sure that file descriptors 0, 1, and 2 are open +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "tuklib_open_stdxxx.h" + +#ifndef TUKLIB_DOSLIKE +# include +# include +# include +# include +#endif + + +extern void +tuklib_open_stdxxx(int err_status) +{ +#ifdef TUKLIB_DOSLIKE + // Do nothing, just silence warnings. + (void)err_status; + +#else + for (int i = 0; i <= 2; ++i) { + // We use fcntl() to check if the file descriptor is open. + if (fcntl(i, F_GETFD) == -1 && errno == EBADF) { + // With stdin, we could use /dev/full so that + // writing to stdin would fail. However, /dev/full + // is Linux specific, and if the program tries to + // write to stdin, there's already a problem anyway. + const int fd = open("/dev/null", O_NOCTTY + | (i == 0 ? O_WRONLY : O_RDONLY)); + + if (fd != i) { + // Something went wrong. Exit with the + // exit status we were given. Don't try + // to print an error message, since stderr + // may very well be non-existent. This + // error should be extremely rare. + (void)close(fd); + exit(err_status); + } + } + } +#endif + + return; +} Index: contrib/xz/src/liblzma/lzma/lzma_encoder_private.h =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_encoder_private.h (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_encoder_private.h (revision 0) @@ -0,0 +1,148 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_private.h +/// \brief Private definitions for LZMA encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_ENCODER_PRIVATE_H +#define LZMA_LZMA_ENCODER_PRIVATE_H + +#include "lz_encoder.h" +#include "range_encoder.h" +#include "lzma_common.h" +#include "lzma_encoder.h" + + +// Macro to compare if the first two bytes in two buffers differ. This is +// needed in lzma_lzma_optimum_*() to test if the match is at least +// MATCH_LEN_MIN bytes. Unaligned access gives tiny gain so there's no +// reason to not use it when it is supported. +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define not_equal_16(a, b) \ + (*(const uint16_t *)(a) != *(const uint16_t *)(b)) +#else +# define not_equal_16(a, b) \ + ((a)[0] != (b)[0] || (a)[1] != (b)[1]) +#endif + + +// Optimal - Number of entries in the optimum array. +#define OPTS (1 << 12) + + +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; + + uint32_t prices[POS_STATES_MAX][LEN_SYMBOLS]; + uint32_t table_size; + uint32_t counters[POS_STATES_MAX]; + +} lzma_length_encoder; + + +typedef struct { + lzma_lzma_state state; + + bool prev_1_is_literal; + bool prev_2; + + uint32_t pos_prev_2; + uint32_t back_prev_2; + + uint32_t price; + uint32_t pos_prev; // pos_next; + uint32_t back_prev; + + uint32_t backs[REP_DISTANCES]; + +} lzma_optimal; + + +struct lzma_coder_s { + /// Range encoder + lzma_range_encoder rc; + + /// State + lzma_lzma_state state; + + /// The four most recent match distances + uint32_t reps[REP_DISTANCES]; + + /// Array of match candidates + lzma_match matches[MATCH_LEN_MAX + 1]; + + /// Number of match candidates in matches[] + uint32_t matches_count; + + /// Variable to hold the length of the longest match between calls + /// to lzma_lzma_optimum_*(). + uint32_t longest_match_length; + + /// True if using getoptimumfast + bool fast_mode; + + /// True if the encoder has been initialized by encoding the first + /// byte as a literal. + bool is_initialized; + + /// True if the range encoder has been flushed, but not all bytes + /// have been written to the output buffer yet. + bool is_flushed; + + uint32_t pos_mask; ///< (1 << pos_bits) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; + + // These are the same as in lzma_decoder.c. See comments there. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; + probability is_match[STATES][POS_STATES_MAX]; + probability is_rep[STATES]; + probability is_rep0[STATES]; + probability is_rep1[STATES]; + probability is_rep2[STATES]; + probability is_rep0_long[STATES][POS_STATES_MAX]; + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; + probability pos_align[ALIGN_TABLE_SIZE]; + + // These are the same as in lzma_decoder.c except that the encoders + // include also price tables. + lzma_length_encoder match_len_encoder; + lzma_length_encoder rep_len_encoder; + + // Price tables + uint32_t pos_slot_prices[LEN_TO_POS_STATES][POS_SLOTS]; + uint32_t distances_prices[LEN_TO_POS_STATES][FULL_DISTANCES]; + uint32_t dist_table_size; + uint32_t match_price_count; + + uint32_t align_prices[ALIGN_TABLE_SIZE]; + uint32_t align_price_count; + + // Optimal + uint32_t opts_end_index; + uint32_t opts_current_index; + lzma_optimal opts[OPTS]; +}; + + +extern void lzma_lzma_optimum_fast( + lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res); + +extern void lzma_lzma_optimum_normal(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint32_t *restrict back_res, + uint32_t *restrict len_res, uint32_t position); + +#endif Index: contrib/xz/src/liblzma/lzma/lzma_encoder.c =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_encoder.c (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_encoder.c (revision 0) @@ -0,0 +1,675 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder.c +/// \brief LZMA encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_encoder.h" +#include "lzma_encoder_private.h" +#include "fastpos.h" + + +///////////// +// Literal // +///////////// + +static inline void +literal_matched(lzma_range_encoder *rc, probability *subcoder, + uint32_t match_byte, uint32_t symbol) +{ + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; + + do { + match_byte <<= 1; + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + rc_bit(rc, &subcoder[subcoder_index], bit); + + symbol <<= 1; + offset &= ~(match_byte ^ symbol); + + } while (symbol < (UINT32_C(1) << 16)); +} + + +static inline void +literal(lzma_coder *coder, lzma_mf *mf, uint32_t position) +{ + // Locate the literal byte to be encoded and the subcoder. + const uint8_t cur_byte = mf->buffer[ + mf->read_pos - mf->read_ahead]; + probability *subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_pos_mask, + position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); + + if (is_literal_state(coder->state)) { + // Previous LZMA-symbol was a literal. Encode a normal + // literal without a match byte. + rc_bittree(&coder->rc, subcoder, 8, cur_byte); + } else { + // Previous LZMA-symbol was a match. Use the last byte of + // the match as a "match byte". That is, compare the bits + // of the current literal and the match byte. + const uint8_t match_byte = mf->buffer[ + mf->read_pos - coder->reps[0] - 1 + - mf->read_ahead]; + literal_matched(&coder->rc, subcoder, match_byte, cur_byte); + } + + update_literal(coder->state); +} + + +////////////////// +// Match length // +////////////////// + +static void +length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) +{ + const uint32_t table_size = lc->table_size; + lc->counters[pos_state] = table_size; + + const uint32_t a0 = rc_bit_0_price(lc->choice); + const uint32_t a1 = rc_bit_1_price(lc->choice); + const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); + const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); + uint32_t *const prices = lc->prices[pos_state]; + + uint32_t i; + for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) + prices[i] = a0 + rc_bittree_price(lc->low[pos_state], + LEN_LOW_BITS, i); + + for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) + prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], + LEN_MID_BITS, i - LEN_LOW_SYMBOLS); + + for (; i < table_size; ++i) + prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, + i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); + + return; +} + + +static inline void +length(lzma_range_encoder *rc, lzma_length_encoder *lc, + const uint32_t pos_state, uint32_t len, const bool fast_mode) +{ + assert(len <= MATCH_LEN_MAX); + len -= MATCH_LEN_MIN; + + if (len < LEN_LOW_SYMBOLS) { + rc_bit(rc, &lc->choice, 0); + rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); + } else { + rc_bit(rc, &lc->choice, 1); + len -= LEN_LOW_SYMBOLS; + + if (len < LEN_MID_SYMBOLS) { + rc_bit(rc, &lc->choice2, 0); + rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); + } else { + rc_bit(rc, &lc->choice2, 1); + len -= LEN_MID_SYMBOLS; + rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); + } + } + + // Only getoptimum uses the prices so don't update the table when + // in fast mode. + if (!fast_mode) + if (--lc->counters[pos_state] == 0) + length_update_prices(lc, pos_state); +} + + +/////////// +// Match // +/////////// + +static inline void +match(lzma_coder *coder, const uint32_t pos_state, + const uint32_t distance, const uint32_t len) +{ + update_match(coder->state); + + length(&coder->rc, &coder->match_len_encoder, pos_state, len, + coder->fast_mode); + + const uint32_t pos_slot = get_pos_slot(distance); + const uint32_t len_to_pos_state = get_len_to_pos_state(len); + rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state], + POS_SLOT_BITS, pos_slot); + + if (pos_slot >= START_POS_MODEL_INDEX) { + const uint32_t footer_bits = (pos_slot >> 1) - 1; + const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; + const uint32_t pos_reduced = distance - base; + + if (pos_slot < END_POS_MODEL_INDEX) { + // Careful here: base - pos_slot - 1 can be -1, but + // rc_bittree_reverse starts at probs[1], not probs[0]. + rc_bittree_reverse(&coder->rc, + coder->pos_special + base - pos_slot - 1, + footer_bits, pos_reduced); + } else { + rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS, + footer_bits - ALIGN_BITS); + rc_bittree_reverse( + &coder->rc, coder->pos_align, + ALIGN_BITS, pos_reduced & ALIGN_MASK); + ++coder->align_price_count; + } + } + + coder->reps[3] = coder->reps[2]; + coder->reps[2] = coder->reps[1]; + coder->reps[1] = coder->reps[0]; + coder->reps[0] = distance; + ++coder->match_price_count; +} + + +//////////////////// +// Repeated match // +//////////////////// + +static inline void +rep_match(lzma_coder *coder, const uint32_t pos_state, + const uint32_t rep, const uint32_t len) +{ + if (rep == 0) { + rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); + rc_bit(&coder->rc, + &coder->is_rep0_long[coder->state][pos_state], + len != 1); + } else { + const uint32_t distance = coder->reps[rep]; + rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); + + if (rep == 1) { + rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); + } else { + rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); + rc_bit(&coder->rc, &coder->is_rep2[coder->state], + rep - 2); + + if (rep == 3) + coder->reps[3] = coder->reps[2]; + + coder->reps[2] = coder->reps[1]; + } + + coder->reps[1] = coder->reps[0]; + coder->reps[0] = distance; + } + + if (len == 1) { + update_short_rep(coder->state); + } else { + length(&coder->rc, &coder->rep_len_encoder, pos_state, len, + coder->fast_mode); + update_long_rep(coder->state); + } +} + + +////////// +// Main // +////////// + +static void +encode_symbol(lzma_coder *coder, lzma_mf *mf, + uint32_t back, uint32_t len, uint32_t position) +{ + const uint32_t pos_state = position & coder->pos_mask; + + if (back == UINT32_MAX) { + // Literal i.e. eight-bit byte + assert(len == 1); + rc_bit(&coder->rc, + &coder->is_match[coder->state][pos_state], 0); + literal(coder, mf, position); + } else { + // Some type of match + rc_bit(&coder->rc, + &coder->is_match[coder->state][pos_state], 1); + + if (back < REP_DISTANCES) { + // It's a repeated match i.e. the same distance + // has been used earlier. + rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); + rep_match(coder, pos_state, back, len); + } else { + // Normal match + rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); + match(coder, pos_state, back - REP_DISTANCES, len); + } + } + + assert(mf->read_ahead >= len); + mf->read_ahead -= len; +} + + +static bool +encode_init(lzma_coder *coder, lzma_mf *mf) +{ + assert(mf_position(mf) == 0); + + if (mf->read_pos == mf->read_limit) { + if (mf->action == LZMA_RUN) + return false; // We cannot do anything. + + // We are finishing (we cannot get here when flushing). + assert(mf->write_pos == mf->read_pos); + assert(mf->action == LZMA_FINISH); + } else { + // Do the actual initialization. The first LZMA symbol must + // always be a literal. + mf_skip(mf, 1); + mf->read_ahead = 0; + rc_bit(&coder->rc, &coder->is_match[0][0], 0); + rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); + } + + // Initialization is done (except if empty file). + coder->is_initialized = true; + + return true; +} + + +static void +encode_eopm(lzma_coder *coder, uint32_t position) +{ + const uint32_t pos_state = position & coder->pos_mask; + rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); + rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); + match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); +} + + +/// Number of bytes that a single encoding loop in lzma_lzma_encode() can +/// consume from the dictionary. This limit comes from lzma_lzma_optimum() +/// and may need to be updated if that function is significantly modified. +#define LOOP_INPUT_MAX (OPTS + 1) + + +extern lzma_ret +lzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, uint32_t limit) +{ + // Initialize the stream if no data has been encoded yet. + if (!coder->is_initialized && !encode_init(coder, mf)) + return LZMA_OK; + + // Get the lowest bits of the uncompressed offset from the LZ layer. + uint32_t position = mf_position(mf); + + while (true) { + // Encode pending bits, if any. Calling this before encoding + // the next symbol is needed only with plain LZMA, since + // LZMA2 always provides big enough buffer to flush + // everything out from the range encoder. For the same reason, + // rc_encode() never returns true when this function is used + // as part of LZMA2 encoder. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + assert(limit == UINT32_MAX); + return LZMA_OK; + } + + // With LZMA2 we need to take care that compressed size of + // a chunk doesn't get too big. + // TODO + if (limit != UINT32_MAX + && (mf->read_pos - mf->read_ahead >= limit + || *out_pos + rc_pending(&coder->rc) + >= LZMA2_CHUNK_MAX + - LOOP_INPUT_MAX)) + break; + + // Check that there is some input to process. + if (mf->read_pos >= mf->read_limit) { + if (mf->action == LZMA_RUN) + return LZMA_OK; + + if (mf->read_ahead == 0) + break; + } + + // Get optimal match (repeat position and length). + // Value ranges for pos: + // - [0, REP_DISTANCES): repeated match + // - [REP_DISTANCES, UINT32_MAX): + // match at (pos - REP_DISTANCES) + // - UINT32_MAX: not a match but a literal + // Value ranges for len: + // - [MATCH_LEN_MIN, MATCH_LEN_MAX] + uint32_t len; + uint32_t back; + + if (coder->fast_mode) + lzma_lzma_optimum_fast(coder, mf, &back, &len); + else + lzma_lzma_optimum_normal( + coder, mf, &back, &len, position); + + encode_symbol(coder, mf, back, len, position); + + position += len; + } + + if (!coder->is_flushed) { + coder->is_flushed = true; + + // We don't support encoding plain LZMA streams without EOPM, + // and LZMA2 doesn't use EOPM at LZMA level. + if (limit == UINT32_MAX) + encode_eopm(coder, position); + + // Flush the remaining bytes from the range encoder. + rc_flush(&coder->rc); + + // Copy the remaining bytes to the output buffer. If there + // isn't enough output space, we will copy out the remaining + // bytes on the next call to this function by using + // the rc_encode() call in the encoding loop above. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + assert(limit == UINT32_MAX); + return LZMA_OK; + } + } + + // Make it ready for the next LZMA2 chunk. + coder->is_flushed = false; + + return LZMA_STREAM_END; +} + + +static lzma_ret +lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + // Plain LZMA has no support for sync-flushing. + if (unlikely(mf->action == LZMA_SYNC_FLUSH)) + return LZMA_OPTIONS_ERROR; + + return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); +} + + +//////////////////// +// Initialization // +//////////////////// + +static bool +is_options_valid(const lzma_options_lzma *options) +{ + // Validate some of the options. LZ encoder validates nice_len too + // but we need a valid value here earlier. + return is_lclppb_valid(options) + && options->nice_len >= MATCH_LEN_MIN + && options->nice_len <= MATCH_LEN_MAX + && (options->mode == LZMA_MODE_FAST + || options->mode == LZMA_MODE_NORMAL); +} + + +static void +set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) +{ + // LZ encoder initialization does the validation for these so we + // don't need to validate here. + lz_options->before_size = OPTS; + lz_options->dict_size = options->dict_size; + lz_options->after_size = LOOP_INPUT_MAX; + lz_options->match_len_max = MATCH_LEN_MAX; + lz_options->nice_len = options->nice_len; + lz_options->match_finder = options->mf; + lz_options->depth = options->depth; + lz_options->preset_dict = options->preset_dict; + lz_options->preset_dict_size = options->preset_dict_size; + return; +} + + +static void +length_encoder_reset(lzma_length_encoder *lencoder, + const uint32_t num_pos_states, const bool fast_mode) +{ + bit_reset(lencoder->choice); + bit_reset(lencoder->choice2); + + for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); + bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); + } + + bittree_reset(lencoder->high, LEN_HIGH_BITS); + + if (!fast_mode) + for (size_t pos_state = 0; pos_state < num_pos_states; + ++pos_state) + length_update_prices(lencoder, pos_state); + + return; +} + + +extern lzma_ret +lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options) +{ + if (!is_options_valid(options)) + return LZMA_OPTIONS_ERROR; + + coder->pos_mask = (1U << options->pb) - 1; + coder->literal_context_bits = options->lc; + coder->literal_pos_mask = (1U << options->lp) - 1; + + // Range coder + rc_reset(&coder->rc); + + // State + coder->state = STATE_LIT_LIT; + for (size_t i = 0; i < REP_DISTANCES; ++i) + coder->reps[i] = 0; + + literal_init(coder->literal, options->lc, options->lp); + + // Bit encoders + for (size_t i = 0; i < STATES; ++i) { + for (size_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); + } + + for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(coder->pos_special[i]); + + // Bit tree encoders + for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); + + bittree_reset(coder->pos_align, ALIGN_BITS); + + // Length encoders + length_encoder_reset(&coder->match_len_encoder, + 1U << options->pb, coder->fast_mode); + + length_encoder_reset(&coder->rep_len_encoder, + 1U << options->pb, coder->fast_mode); + + // Price counts are incremented every time appropriate probabilities + // are changed. price counts are set to zero when the price tables + // are updated, which is done when the appropriate price counts have + // big enough value, and lzma_mf.read_ahead == 0 which happens at + // least every OPTS (a few thousand) possible price count increments. + // + // By resetting price counts to UINT32_MAX / 2, we make sure that the + // price tables will be initialized before they will be used (since + // the value is definitely big enough), and that it is OK to increment + // price counts without risk of integer overflow (since UINT32_MAX / 2 + // is small enough). The current code doesn't increment price counts + // before initializing price tables, but it maybe done in future if + // we add support for saving the state between LZMA2 chunks. + coder->match_price_count = UINT32_MAX / 2; + coder->align_price_count = UINT32_MAX / 2; + + coder->opts_end_index = 0; + coder->opts_current_index = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options) +{ + // Allocate lzma_coder if it wasn't already allocated. + if (*coder_ptr == NULL) { + *coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator); + if (*coder_ptr == NULL) + return LZMA_MEM_ERROR; + } + + lzma_coder *coder = *coder_ptr; + + // Set compression mode. We haven't validates the options yet, + // but it's OK here, since nothing bad happens with invalid + // options in the code below, and they will get rejected by + // lzma_lzma_encoder_reset() call at the end of this function. + switch (options->mode) { + case LZMA_MODE_FAST: + coder->fast_mode = true; + break; + + case LZMA_MODE_NORMAL: { + coder->fast_mode = false; + + // Set dist_table_size. + // Round the dictionary size up to next 2^n. + uint32_t log_size = 0; + while ((UINT32_C(1) << log_size) < options->dict_size) + ++log_size; + + coder->dist_table_size = log_size * 2; + + // Length encoders' price table size + coder->match_len_encoder.table_size + = options->nice_len + 1 - MATCH_LEN_MIN; + coder->rep_len_encoder.table_size + = options->nice_len + 1 - MATCH_LEN_MIN; + break; + } + + default: + return LZMA_OPTIONS_ERROR; + } + + // We don't need to write the first byte as literal if there is + // a non-empty preset dictionary. encode_init() wouldn't even work + // if there is a non-empty preset dictionary, because encode_init() + // assumes that position is zero and previous byte is also zero. + coder->is_initialized = options->preset_dict != NULL + && options->preset_dict_size > 0; + coder->is_flushed = false; + + set_lz_options(lz_options, options); + + return lzma_lzma_encoder_reset(coder, options); +} + + +static lzma_ret +lzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + lz->code = &lzma_encode; + return lzma_lzma_encoder_create( + &lz->coder, allocator, options, lz_options); +} + + +extern lzma_ret +lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_lz_encoder_init( + next, allocator, filters, &lzma_encoder_init); +} + + +extern uint64_t +lzma_lzma_encoder_memusage(const void *options) +{ + if (!is_options_valid(options)) + return UINT64_MAX; + + lzma_lz_options lz_options; + set_lz_options(&lz_options, options); + + const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); + if (lz_memusage == UINT64_MAX) + return UINT64_MAX; + + return (uint64_t)(sizeof(lzma_coder)) + lz_memusage; +} + + +extern bool +lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) +{ + if (!is_lclppb_valid(options)) + return true; + + *byte = (options->pb * 5 + options->lp) * 9 + options->lc; + assert(*byte <= (4 * 5 + 4) * 9 + 8); + + return false; +} + + +#ifdef HAVE_ENCODER_LZMA1 +extern lzma_ret +lzma_lzma_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_lzma *const opt = options; + + if (lzma_lzma_lclppb_encode(opt, out)) + return LZMA_PROG_ERROR; + + unaligned_write32le(out + 1, opt->dict_size); + + return LZMA_OK; +} +#endif + + +extern LZMA_API(lzma_bool) +lzma_mode_is_supported(lzma_mode mode) +{ + return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; +} Index: contrib/xz/src/liblzma/lzma/lzma_encoder.h =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_encoder.h (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_encoder.h (revision 0) @@ -0,0 +1,54 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder.h +/// \brief LZMA encoder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_ENCODER_H +#define LZMA_LZMA_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern uint64_t lzma_lzma_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out); + + +/// Encodes lc/lp/pb into one byte. Returns false on success and true on error. +extern bool lzma_lzma_lclppb_encode( + const lzma_options_lzma *options, uint8_t *byte); + + +#ifdef LZMA_LZ_ENCODER_H + +/// Initializes raw LZMA encoder; this is used by LZMA2. +extern lzma_ret lzma_lzma_encoder_create( + lzma_coder **coder_ptr, lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options); + + +/// Resets an already initialized LZMA encoder; this is used by LZMA2. +extern lzma_ret lzma_lzma_encoder_reset( + lzma_coder *coder, const lzma_options_lzma *options); + + +extern lzma_ret lzma_lzma_encode(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + uint32_t read_limit); + +#endif + +#endif Index: contrib/xz/src/liblzma/lzma/lzma_encoder_presets.c =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_encoder_presets.c (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_encoder_presets.c (revision 0) @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_presets.c +/// \brief Encoder presets +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_bool) +lzma_lzma_preset(lzma_options_lzma *options, uint32_t preset) +{ + const uint32_t level = preset & LZMA_PRESET_LEVEL_MASK; + const uint32_t flags = preset & ~LZMA_PRESET_LEVEL_MASK; + const uint32_t supported_flags = LZMA_PRESET_EXTREME; + + if (level > 9 || (flags & ~supported_flags)) + return true; + + const uint32_t dict_shift = level <= 1 ? 16 : level + 17; + options->dict_size = UINT32_C(1) << dict_shift; + + options->preset_dict = NULL; + options->preset_dict_size = 0; + + options->lc = LZMA_LC_DEFAULT; + options->lp = LZMA_LP_DEFAULT; + options->pb = LZMA_PB_DEFAULT; + + options->mode = level <= 2 ? LZMA_MODE_FAST : LZMA_MODE_NORMAL; + + options->nice_len = level == 0 ? 8 : level <= 5 ? 32 : 64; + options->mf = level <= 1 ? LZMA_MF_HC3 : level <= 2 ? LZMA_MF_HC4 + : LZMA_MF_BT4; + options->depth = 0; + + if (flags & LZMA_PRESET_EXTREME) { + options->lc = 4; // FIXME? + options->mode = LZMA_MODE_NORMAL; + options->mf = LZMA_MF_BT4; + options->nice_len = 273; + options->depth = 512; + } + + return false; +} Index: contrib/xz/src/liblzma/lzma/lzma2_decoder.c =================================================================== --- contrib/xz/src/liblzma/lzma/lzma2_decoder.c (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma2_decoder.c (revision 0) @@ -0,0 +1,305 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.c +/// \brief LZMA2 decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_decoder.h" +#include "lz_decoder.h" +#include "lzma_decoder.h" + + +struct lzma_coder_s { + enum sequence { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA, + SEQ_COPY, + } sequence; + + /// Sequence after the size fields have been decoded. + enum sequence next_sequence; + + /// LZMA decoder + lzma_lz_decoder lzma; + + /// Uncompressed size of LZMA chunk + size_t uncompressed_size; + + /// Compressed size of the chunk (naturally equals to uncompressed + /// size of uncompressed chunk) + size_t compressed_size; + + /// True if properties are needed. This is false before the + /// first LZMA chunk. + bool need_properties; + + /// True if dictionary reset is needed. This is false before the + /// first chunk (LZMA or uncompressed). + bool need_dictionary_reset; + + lzma_options_lzma options; +}; + + +static lzma_ret +lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // With SEQ_LZMA it is possible that no new input is needed to do + // some progress. The rest of the sequences assume that there is + // at least one byte of input. + while (*in_pos < in_size || coder->sequence == SEQ_LZMA) + switch (coder->sequence) { + case SEQ_CONTROL: { + const uint32_t control = in[*in_pos]; + ++*in_pos; + + if (control >= 0xE0 || control == 1) { + // Dictionary reset implies that next LZMA chunk has + // to set new properties. + coder->need_properties = true; + coder->need_dictionary_reset = true; + } else if (coder->need_dictionary_reset) { + return LZMA_DATA_ERROR; + } + + if (control >= 0x80) { + // LZMA chunk. The highest five bits of the + // uncompressed size are taken from the control byte. + coder->uncompressed_size = (control & 0x1F) << 16; + coder->sequence = SEQ_UNCOMPRESSED_1; + + // See if there are new properties or if we need to + // reset the state. + if (control >= 0xC0) { + // When there are new properties, state reset + // is done at SEQ_PROPERTIES. + coder->need_properties = false; + coder->next_sequence = SEQ_PROPERTIES; + + } else if (coder->need_properties) { + return LZMA_DATA_ERROR; + + } else { + coder->next_sequence = SEQ_LZMA; + + // If only state reset is wanted with old + // properties, do the resetting here for + // simplicity. + if (control >= 0xA0) + coder->lzma.reset(coder->lzma.coder, + &coder->options); + } + } else { + // End marker + if (control == 0x00) + return LZMA_STREAM_END; + + // Invalid control values + if (control > 2) + return LZMA_DATA_ERROR; + + // It's uncompressed chunk + coder->sequence = SEQ_COMPRESSED_0; + coder->next_sequence = SEQ_COPY; + } + + if (coder->need_dictionary_reset) { + // Finish the dictionary reset and let the caller + // flush the dictionary to the actual output buffer. + coder->need_dictionary_reset = false; + dict_reset(dict); + return LZMA_OK; + } + + break; + } + + case SEQ_UNCOMPRESSED_1: + coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + coder->uncompressed_size += in[(*in_pos)++] + 1; + coder->sequence = SEQ_COMPRESSED_0; + coder->lzma.set_uncompressed(coder->lzma.coder, + coder->uncompressed_size); + break; + + case SEQ_COMPRESSED_0: + coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + coder->compressed_size += in[(*in_pos)++] + 1; + coder->sequence = coder->next_sequence; + break; + + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) + return LZMA_DATA_ERROR; + + coder->lzma.reset(coder->lzma.coder, &coder->options); + + coder->sequence = SEQ_LZMA; + break; + + case SEQ_LZMA: { + // Store the start offset so that we can update + // coder->compressed_size later. + const size_t in_start = *in_pos; + + // Decode from in[] to *dict. + const lzma_ret ret = coder->lzma.code(coder->lzma.coder, + dict, in, in_pos, in_size); + + // Validate and update coder->compressed_size. + const size_t in_used = *in_pos - in_start; + if (in_used > coder->compressed_size) + return LZMA_DATA_ERROR; + + coder->compressed_size -= in_used; + + // Return if we didn't finish the chunk, or an error occurred. + if (ret != LZMA_STREAM_END) + return ret; + + // The LZMA decoder must have consumed the whole chunk now. + // We don't need to worry about uncompressed size since it + // is checked by the LZMA decoder. + if (coder->compressed_size != 0) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_CONTROL; + break; + } + + case SEQ_COPY: { + // Copy from input to the dictionary as is. + // FIXME Can copy too much? + dict_write(dict, in, in_pos, in_size, &coder->compressed_size); + if (coder->compressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_CONTROL; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + assert(coder->lzma.end == NULL); + lzma_free(coder->lzma.coder, allocator); + + lzma_free(coder, allocator); + + return; +} + + +static lzma_ret +lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_decode; + lz->end = &lzma2_decoder_end; + + lz->coder->lzma = LZMA_LZ_DECODER_INIT; + } + + const lzma_options_lzma *options = opt; + + lz->coder->sequence = SEQ_CONTROL; + lz->coder->need_properties = true; + lz->coder->need_dictionary_reset = options->preset_dict == NULL + || options->preset_dict_size == 0; + + return lzma_lzma_decoder_create(&lz->coder->lzma, + allocator, options, lz_options); +} + + +extern lzma_ret +lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA2 can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma2_decoder_init); +} + + +extern uint64_t +lzma_lzma2_decoder_memusage(const void *options) +{ + return sizeof(lzma_coder) + + lzma_lzma_decoder_memusage_nocheck(options); +} + + +extern lzma_ret +lzma_lzma2_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + // Check that reserved bits are unset. + if (props[0] & 0xC0) + return LZMA_OPTIONS_ERROR; + + // Decode the dictionary size. + if (props[0] > 40) + return LZMA_OPTIONS_ERROR; + + lzma_options_lzma *opt = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (props[0] == 40) { + opt->dict_size = UINT32_MAX; + } else { + opt->dict_size = 2 | (props[0] & 1); + opt->dict_size <<= props[0] / 2 + 11; + } + + opt->preset_dict = NULL; + opt->preset_dict_size = 0; + + *options = opt; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/lzma/lzma2_decoder.h =================================================================== --- contrib/xz/src/liblzma/lzma/lzma2_decoder.h (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma2_decoder.h (revision 0) @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.h +/// \brief LZMA2 decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_DECODER_H +#define LZMA_LZMA2_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif Index: contrib/xz/src/liblzma/lzma/lzma_common.h =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_common.h (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_common.h (revision 0) @@ -0,0 +1,223 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_common.h +/// \brief Private definitions common to LZMA encoder and decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_COMMON_H +#define LZMA_LZMA_COMMON_H + +#include "common.h" +#include "range_common.h" + + +/////////////////// +// Miscellaneous // +/////////////////// + +/// Maximum number of position states. A position state is the lowest pos bits +/// number of bits of the current uncompressed offset. In some places there +/// are different sets of probabilities for different pos states. +#define POS_STATES_MAX (1 << LZMA_PB_MAX) + + +/// Validates lc, lp, and pb. +static inline bool +is_lclppb_valid(const lzma_options_lzma *options) +{ + return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX + && options->lc + options->lp <= LZMA_LCLP_MAX + && options->pb <= LZMA_PB_MAX; +} + + +/////////// +// State // +/////////// + +/// This enum is used to track which events have occurred most recently and +/// in which order. This information is used to predict the next event. +/// +/// Events: +/// - Literal: One 8-bit byte +/// - Match: Repeat a chunk of data at some distance +/// - Long repeat: Multi-byte match at a recently seen distance +/// - Short repeat: One-byte repeat at a recently seen distance +/// +/// The event names are in from STATE_oldest_older_previous. REP means +/// either short or long repeated match, and NONLIT means any non-literal. +typedef enum { + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_LIT_MATCH, + STATE_LIT_LONGREP, + STATE_LIT_SHORTREP, + STATE_NONLIT_MATCH, + STATE_NONLIT_REP, +} lzma_lzma_state; + + +/// Total number of states +#define STATES 12 + +/// The lowest 7 states indicate that the previous state was a literal. +#define LIT_STATES 7 + + +/// Indicate that the latest state was a literal. +#define update_literal(state) \ + state = ((state) <= STATE_SHORTREP_LIT_LIT \ + ? STATE_LIT_LIT \ + : ((state) <= STATE_LIT_SHORTREP \ + ? (state) - 3 \ + : (state) - 6)) + +/// Indicate that the latest state was a match. +#define update_match(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH) + +/// Indicate that the latest state was a long repeated match. +#define update_long_rep(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP) + +/// Indicate that the latest state was a short match. +#define update_short_rep(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP) + +/// Test if the previous state was a literal. +#define is_literal_state(state) \ + ((state) < LIT_STATES) + + +///////////// +// Literal // +///////////// + +/// Each literal coder is divided in three sections: +/// - 0x001-0x0FF: Without match byte +/// - 0x101-0x1FF: With match byte; match bit is 0 +/// - 0x201-0x2FF: With match byte; match bit is 1 +/// +/// Match byte is used when the previous LZMA symbol was something else than +/// a literal (that is, it was some kind of match). +#define LITERAL_CODER_SIZE 0x300 + +/// Maximum number of literal coders +#define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX) + +/// Locate the literal coder for the next literal byte. The choice depends on +/// - the lowest literal_pos_bits bits of the position of the current +/// byte; and +/// - the highest literal_context_bits bits of the previous byte. +#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \ + ((probs)[(((pos) & lp_mask) << lc) + ((prev_byte) >> (8 - lc))]) + + +static inline void +literal_init(probability (*probs)[LITERAL_CODER_SIZE], + uint32_t lc, uint32_t lp) +{ + assert(lc + lp <= LZMA_LCLP_MAX); + + const uint32_t coders = 1U << (lc + lp); + + for (uint32_t i = 0; i < coders; ++i) + for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j) + bit_reset(probs[i][j]); + + return; +} + + +////////////////// +// Match length // +////////////////// + +// Minimum length of a match is two bytes. +#define MATCH_LEN_MIN 2 + +// Match length is encoded with 4, 5, or 10 bits. +// +// Length Bits +// 2-9 4 = Choice=0 + 3 bits +// 10-17 5 = Choice=1 + Choice2=0 + 3 bits +// 18-273 10 = Choice=1 + Choice2=1 + 8 bits +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +// Maximum length of a match is 273 which is a result of the encoding +// described above. +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + + +//////////////////// +// Match distance // +//////////////////// + +// Different set of probabilities is used for match distances that have very +// short match length: Lengths of 2, 3, and 4 bytes have a separate set of +// probabilities for each length. The matches with longer length use a shared +// set of probabilities. +#define LEN_TO_POS_STATES 4 + +// Macro to get the index of the appropriate probability array. +#define get_len_to_pos_state(len) \ + ((len) < LEN_TO_POS_STATES + MATCH_LEN_MIN \ + ? (len) - MATCH_LEN_MIN \ + : LEN_TO_POS_STATES - 1) + +// The highest two bits of a match distance (pos slot) are encoded using six +// bits. See fastpos.h for more explanation. +#define POS_SLOT_BITS 6 +#define POS_SLOTS (1 << POS_SLOT_BITS) + +// Match distances up to 127 are fully encoded using probabilities. Since +// the highest two bits (pos slot) are always encoded using six bits, the +// distances 0-3 don't need any additional bits to encode, since the pos +// slot itself is the same as the actual distance. START_POS_MODEL_INDEX +// indicates the first pos slot where at least one additional bit is needed. +#define START_POS_MODEL_INDEX 4 + +// Match distances greater than 127 are encoded in three pieces: +// - pos slot: the highest two bits +// - direct bits: 2-26 bits below the highest two bits +// - alignment bits: four lowest bits +// +// Direct bits don't use any probabilities. +// +// The pos slot value of 14 is for distances 128-191 (see the table in +// fastpos.h to understand why). +#define END_POS_MODEL_INDEX 14 + +// Pos slots that indicate a distance <= 127. +#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +// For match distances greater than 127, only the highest two bits and the +// lowest four bits (alignment) is encoded using probabilities. +#define ALIGN_BITS 4 +#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1) + +// LZMA remembers the four most recent match distances. Reusing these distances +// tends to take less space than re-encoding the actual distance value. +#define REP_DISTANCES 4 + +#endif Index: contrib/xz/src/liblzma/lzma/fastpos_tablegen.c =================================================================== --- contrib/xz/src/liblzma/lzma/fastpos_tablegen.c (revision 0) +++ contrib/xz/src/liblzma/lzma/fastpos_tablegen.c (revision 0) @@ -0,0 +1,56 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file fastpos_tablegen.c +/// \brief Generates the lzma_fastpos[] lookup table +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include "fastpos.h" + + +int +main(void) +{ + uint8_t fastpos[1 << FASTPOS_BITS]; + + const uint8_t fast_slots = 2 * FASTPOS_BITS; + uint32_t c = 2; + + fastpos[0] = 0; + fastpos[1] = 1; + + for (uint8_t slot_fast = 2; slot_fast < fast_slots; ++slot_fast) { + const uint32_t k = 1 << ((slot_fast >> 1) - 1); + for (uint32_t j = 0; j < k; ++j, ++c) + fastpos[c] = slot_fast; + } + + printf("/* This file has been automatically generated " + "by fastpos_tablegen.c. */\n\n" + "#include \"common.h\"\n" + "#include \"fastpos.h\"\n\n" + "const uint8_t lzma_fastpos[1 << FASTPOS_BITS] = {"); + + for (size_t i = 0; i < (1 << FASTPOS_BITS); ++i) { + if (i % 16 == 0) + printf("\n\t"); + + printf("%3u", (unsigned int)(fastpos[i])); + + if (i != (1 << FASTPOS_BITS) - 1) + printf(","); + } + + printf("\n};\n"); + + return 0; +} Index: contrib/xz/src/liblzma/lzma/fastpos.h =================================================================== --- contrib/xz/src/liblzma/lzma/fastpos.h (revision 0) +++ contrib/xz/src/liblzma/lzma/fastpos.h (revision 0) @@ -0,0 +1,140 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file fastpos.h +/// \brief Kind of two-bit version of bit scan reverse +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FASTPOS_H +#define LZMA_FASTPOS_H + +// LZMA encodes match distances (positions) by storing the highest two +// bits using a six-bit value [0, 63], and then the missing lower bits. +// Dictionary size is also stored using this encoding in the new .lzma +// file format header. +// +// fastpos.h provides a way to quickly find out the correct six-bit +// values. The following table gives some examples of this encoding: +// +// pos return +// 0 0 +// 1 1 +// 2 2 +// 3 3 +// 4 4 +// 5 4 +// 6 5 +// 7 5 +// 8 6 +// 11 6 +// 12 7 +// ... ... +// 15 7 +// 16 8 +// 17 8 +// ... ... +// 23 8 +// 24 9 +// 25 9 +// ... ... +// +// +// Provided functions or macros +// ---------------------------- +// +// get_pos_slot(pos) is the basic version. get_pos_slot_2(pos) +// assumes that pos >= FULL_DISTANCES, thus the result is at least +// FULL_DISTANCES_BITS * 2. Using get_pos_slot(pos) instead of +// get_pos_slot_2(pos) would give the same result, but get_pos_slot_2(pos) +// should be tiny bit faster due to the assumption being made. +// +// +// Size vs. speed +// -------------- +// +// With some CPUs that have fast BSR (bit scan reverse) instruction, the +// size optimized version is slightly faster than the bigger table based +// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III +// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that +// would still have speed roughly comparable to the table version. Older +// x86 CPUs like the original Pentium have very slow BSR; on those systems +// the table version is a lot faster. +// +// On some CPUs, the table version is a lot faster when using position +// dependent code, but with position independent code the size optimized +// version is slightly faster. This occurs at least on 32-bit SPARC (no +// ASM optimizations). +// +// I'm making the table version the default, because that has good speed +// on all systems I have tried. The size optimized version is sometimes +// slightly faster, but sometimes it is a lot slower. + +#ifdef HAVE_SMALL +# define get_pos_slot(pos) ((pos) <= 4 ? (pos) : get_pos_slot_2(pos)) + +static inline uint32_t +get_pos_slot_2(uint32_t pos) +{ + const uint32_t i = bsr32(pos); + return (i + i) + ((pos >> (i - 1)) & 1); +} + + +#else + +#define FASTPOS_BITS 13 + +extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS]; + + +#define fastpos_shift(extra, n) \ + ((extra) + (n) * (FASTPOS_BITS - 1)) + +#define fastpos_limit(extra, n) \ + (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n))) + +#define fastpos_result(pos, extra, n) \ + lzma_fastpos[(pos) >> fastpos_shift(extra, n)] \ + + 2 * fastpos_shift(extra, n) + + +static inline uint32_t +get_pos_slot(uint32_t pos) +{ + // If it is small enough, we can pick the result directly from + // the precalculated table. + if (pos < fastpos_limit(0, 0)) + return lzma_fastpos[pos]; + + if (pos < fastpos_limit(0, 1)) + return fastpos_result(pos, 0, 1); + + return fastpos_result(pos, 0, 2); +} + + +#ifdef FULL_DISTANCES_BITS +static inline uint32_t +get_pos_slot_2(uint32_t pos) +{ + assert(pos >= FULL_DISTANCES); + + if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0); + + if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 1)) + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 1); + + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 2); +} +#endif + +#endif + +#endif Index: contrib/xz/src/liblzma/lzma/lzma_encoder_optimum_normal.c =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_encoder_optimum_normal.c (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_encoder_optimum_normal.c (revision 0) @@ -0,0 +1,868 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_normal.c +// +// Author: Igor Pavlov +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" +#include "fastpos.h" + + +//////////// +// Prices // +//////////// + +static uint32_t +get_literal_price(const lzma_coder *const coder, const uint32_t pos, + const uint32_t prev_byte, const bool match_mode, + uint32_t match_byte, uint32_t symbol) +{ + const probability *const subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_pos_mask, + pos, prev_byte); + + uint32_t price = 0; + + if (!match_mode) { + price = rc_bittree_price(subcoder, 8, symbol); + } else { + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; + + do { + match_byte <<= 1; + + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + price += rc_bit_price(subcoder[subcoder_index], bit); + + symbol <<= 1; + offset &= ~(match_byte ^ symbol); + + } while (symbol < (UINT32_C(1) << 16)); + } + + return price; +} + + +static inline uint32_t +get_len_price(const lzma_length_encoder *const lencoder, + const uint32_t len, const uint32_t pos_state) +{ + // NOTE: Unlike the other price tables, length prices are updated + // in lzma_encoder.c + return lencoder->prices[pos_state][len - MATCH_LEN_MIN]; +} + + +static inline uint32_t +get_short_rep_price(const lzma_coder *const coder, + const lzma_lzma_state state, const uint32_t pos_state) +{ + return rc_bit_0_price(coder->is_rep0[state]) + + rc_bit_0_price(coder->is_rep0_long[state][pos_state]); +} + + +static inline uint32_t +get_pure_rep_price(const lzma_coder *const coder, const uint32_t rep_index, + const lzma_lzma_state state, uint32_t pos_state) +{ + uint32_t price; + + if (rep_index == 0) { + price = rc_bit_0_price(coder->is_rep0[state]); + price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]); + } else { + price = rc_bit_1_price(coder->is_rep0[state]); + + if (rep_index == 1) { + price += rc_bit_0_price(coder->is_rep1[state]); + } else { + price += rc_bit_1_price(coder->is_rep1[state]); + price += rc_bit_price(coder->is_rep2[state], + rep_index - 2); + } + } + + return price; +} + + +static inline uint32_t +get_rep_price(const lzma_coder *const coder, const uint32_t rep_index, + const uint32_t len, const lzma_lzma_state state, + const uint32_t pos_state) +{ + return get_len_price(&coder->rep_len_encoder, len, pos_state) + + get_pure_rep_price(coder, rep_index, state, pos_state); +} + + +static inline uint32_t +get_pos_len_price(const lzma_coder *const coder, const uint32_t pos, + const uint32_t len, const uint32_t pos_state) +{ + const uint32_t len_to_pos_state = get_len_to_pos_state(len); + uint32_t price; + + if (pos < FULL_DISTANCES) { + price = coder->distances_prices[len_to_pos_state][pos]; + } else { + const uint32_t pos_slot = get_pos_slot_2(pos); + price = coder->pos_slot_prices[len_to_pos_state][pos_slot] + + coder->align_prices[pos & ALIGN_MASK]; + } + + price += get_len_price(&coder->match_len_encoder, len, pos_state); + + return price; +} + + +static void +fill_distances_prices(lzma_coder *coder) +{ + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) { + + uint32_t *const pos_slot_prices + = coder->pos_slot_prices[len_to_pos_state]; + + // Price to encode the pos_slot. + for (uint32_t pos_slot = 0; + pos_slot < coder->dist_table_size; ++pos_slot) + pos_slot_prices[pos_slot] = rc_bittree_price( + coder->pos_slot[len_to_pos_state], + POS_SLOT_BITS, pos_slot); + + // For matches with distance >= FULL_DISTANCES, add the price + // of the direct bits part of the match distance. (Align bits + // are handled by fill_align_prices()). + for (uint32_t pos_slot = END_POS_MODEL_INDEX; + pos_slot < coder->dist_table_size; ++pos_slot) + pos_slot_prices[pos_slot] += rc_direct_price( + ((pos_slot >> 1) - 1) - ALIGN_BITS); + + // Distances in the range [0, 3] are fully encoded with + // pos_slot, so they are used for coder->distances_prices + // as is. + for (uint32_t i = 0; i < START_POS_MODEL_INDEX; ++i) + coder->distances_prices[len_to_pos_state][i] + = pos_slot_prices[i]; + } + + // Distances in the range [4, 127] depend on pos_slot and pos_special. + // We do this in a loop separate from the above loop to avoid + // redundant calls to get_pos_slot(). + for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) { + const uint32_t pos_slot = get_pos_slot(i); + const uint32_t footer_bits = ((pos_slot >> 1) - 1); + const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; + const uint32_t price = rc_bittree_reverse_price( + coder->pos_special + base - pos_slot - 1, + footer_bits, i - base); + + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) + coder->distances_prices[len_to_pos_state][i] + = price + coder->pos_slot_prices[ + len_to_pos_state][pos_slot]; + } + + coder->match_price_count = 0; + return; +} + + +static void +fill_align_prices(lzma_coder *coder) +{ + for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i) + coder->align_prices[i] = rc_bittree_reverse_price( + coder->pos_align, ALIGN_BITS, i); + + coder->align_price_count = 0; + return; +} + + +///////////// +// Optimal // +///////////// + +static inline void +make_literal(lzma_optimal *optimal) +{ + optimal->back_prev = UINT32_MAX; + optimal->prev_1_is_literal = false; +} + + +static inline void +make_short_rep(lzma_optimal *optimal) +{ + optimal->back_prev = 0; + optimal->prev_1_is_literal = false; +} + + +#define is_short_rep(optimal) \ + ((optimal).back_prev == 0) + + +static void +backward(lzma_coder *restrict coder, uint32_t *restrict len_res, + uint32_t *restrict back_res, uint32_t cur) +{ + coder->opts_end_index = cur; + + uint32_t pos_mem = coder->opts[cur].pos_prev; + uint32_t back_mem = coder->opts[cur].back_prev; + + do { + if (coder->opts[cur].prev_1_is_literal) { + make_literal(&coder->opts[pos_mem]); + coder->opts[pos_mem].pos_prev = pos_mem - 1; + + if (coder->opts[cur].prev_2) { + coder->opts[pos_mem - 1].prev_1_is_literal + = false; + coder->opts[pos_mem - 1].pos_prev + = coder->opts[cur].pos_prev_2; + coder->opts[pos_mem - 1].back_prev + = coder->opts[cur].back_prev_2; + } + } + + const uint32_t pos_prev = pos_mem; + const uint32_t back_cur = back_mem; + + back_mem = coder->opts[pos_prev].back_prev; + pos_mem = coder->opts[pos_prev].pos_prev; + + coder->opts[pos_prev].back_prev = back_cur; + coder->opts[pos_prev].pos_prev = cur; + cur = pos_prev; + + } while (cur != 0); + + coder->opts_current_index = coder->opts[0].pos_prev; + *len_res = coder->opts[0].pos_prev; + *back_res = coder->opts[0].back_prev; + + return; +} + + +////////// +// Main // +////////// + +static inline uint32_t +helper1(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + const uint32_t nice_len = mf->nice_len; + + uint32_t len_main; + uint32_t matches_count; + + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX); + if (buf_avail < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + const uint8_t *const buf = mf_ptr(mf) - 1; + + uint32_t rep_lens[REP_DISTANCES]; + uint32_t rep_max_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) { + rep_lens[i] = 0; + continue; + } + + uint32_t len_test; + for (len_test = 2; len_test < buf_avail + && buf[len_test] == buf_back[len_test]; + ++len_test) ; + + rep_lens[i] = len_test; + if (len_test > rep_lens[rep_max_index]) + rep_max_index = i; + } + + if (rep_lens[rep_max_index] >= nice_len) { + *back_res = rep_max_index; + *len_res = rep_lens[rep_max_index]; + mf_skip(mf, *len_res - 1); + return UINT32_MAX; + } + + + if (len_main >= nice_len) { + *back_res = coder->matches[matches_count - 1].dist + + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return UINT32_MAX; + } + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - coder->reps[0] - 1); + + if (len_main < 2 && current_byte != match_byte + && rep_lens[rep_max_index] < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[0].state = coder->state; + + const uint32_t pos_state = position & coder->pos_mask; + + coder->opts[1].price = rc_bit_0_price( + coder->is_match[coder->state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(coder->state), + match_byte, current_byte); + + make_literal(&coder->opts[1]); + + const uint32_t match_price = rc_bit_1_price( + coder->is_match[coder->state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[coder->state]); + + if (match_byte == current_byte) { + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price( + coder, coder->state, pos_state); + + if (short_rep_price < coder->opts[1].price) { + coder->opts[1].price = short_rep_price; + make_short_rep(&coder->opts[1]); + } + } + + const uint32_t len_end = MAX(len_main, rep_lens[rep_max_index]); + + if (len_end < 2) { + *back_res = coder->opts[1].back_prev; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[1].pos_prev = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->opts[0].backs[i] = coder->reps[i]; + + uint32_t len = len_end; + do { + coder->opts[len].price = RC_INFINITY_PRICE; + } while (--len >= 2); + + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + uint32_t rep_len = rep_lens[i]; + if (rep_len < 2) + continue; + + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, i, coder->state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price( + &coder->rep_len_encoder, + rep_len, pos_state); + + if (cur_and_len_price < coder->opts[rep_len].price) { + coder->opts[rep_len].price = cur_and_len_price; + coder->opts[rep_len].pos_prev = 0; + coder->opts[rep_len].back_prev = i; + coder->opts[rep_len].prev_1_is_literal = false; + } + } while (--rep_len >= 2); + } + + + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[coder->state]); + + len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2; + if (len <= len_main) { + uint32_t i = 0; + while (len > coder->matches[i].len) + ++i; + + for(; ; ++len) { + const uint32_t dist = coder->matches[i].dist; + const uint32_t cur_and_len_price = normal_match_price + + get_pos_len_price(coder, + dist, len, pos_state); + + if (cur_and_len_price < coder->opts[len].price) { + coder->opts[len].price = cur_and_len_price; + coder->opts[len].pos_prev = 0; + coder->opts[len].back_prev + = dist + REP_DISTANCES; + coder->opts[len].prev_1_is_literal = false; + } + + if (len == coder->matches[i].len) + if (++i == matches_count) + break; + } + } + + return len_end; +} + + +static inline uint32_t +helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf, + uint32_t len_end, uint32_t position, const uint32_t cur, + const uint32_t nice_len, const uint32_t buf_avail_full) +{ + uint32_t matches_count = coder->matches_count; + uint32_t new_len = coder->longest_match_length; + uint32_t pos_prev = coder->opts[cur].pos_prev; + lzma_lzma_state state; + + if (coder->opts[cur].prev_1_is_literal) { + --pos_prev; + + if (coder->opts[cur].prev_2) { + state = coder->opts[coder->opts[cur].pos_prev_2].state; + + if (coder->opts[cur].back_prev_2 < REP_DISTANCES) + update_long_rep(state); + else + update_match(state); + + } else { + state = coder->opts[pos_prev].state; + } + + update_literal(state); + + } else { + state = coder->opts[pos_prev].state; + } + + if (pos_prev == cur - 1) { + if (is_short_rep(coder->opts[cur])) + update_short_rep(state); + else + update_literal(state); + } else { + uint32_t pos; + if (coder->opts[cur].prev_1_is_literal + && coder->opts[cur].prev_2) { + pos_prev = coder->opts[cur].pos_prev_2; + pos = coder->opts[cur].back_prev_2; + update_long_rep(state); + } else { + pos = coder->opts[cur].back_prev; + if (pos < REP_DISTANCES) + update_long_rep(state); + else + update_match(state); + } + + if (pos < REP_DISTANCES) { + reps[0] = coder->opts[pos_prev].backs[pos]; + + uint32_t i; + for (i = 1; i <= pos; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + + for (; i < REP_DISTANCES; ++i) + reps[i] = coder->opts[pos_prev].backs[i]; + + } else { + reps[0] = pos - REP_DISTANCES; + + for (uint32_t i = 1; i < REP_DISTANCES; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + } + } + + coder->opts[cur].state = state; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->opts[cur].backs[i] = reps[i]; + + const uint32_t cur_price = coder->opts[cur].price; + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - reps[0] - 1); + + const uint32_t pos_state = position & coder->pos_mask; + + const uint32_t cur_and_1_price = cur_price + + rc_bit_0_price(coder->is_match[state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(state), match_byte, current_byte); + + bool next_is_literal = false; + + if (cur_and_1_price < coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = cur_and_1_price; + coder->opts[cur + 1].pos_prev = cur; + make_literal(&coder->opts[cur + 1]); + next_is_literal = true; + } + + const uint32_t match_price = cur_price + + rc_bit_1_price(coder->is_match[state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[state]); + + if (match_byte == current_byte + && !(coder->opts[cur + 1].pos_prev < cur + && coder->opts[cur + 1].back_prev == 0)) { + + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price(coder, state, pos_state); + + if (short_rep_price <= coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = short_rep_price; + coder->opts[cur + 1].pos_prev = cur; + make_short_rep(&coder->opts[cur + 1]); + next_is_literal = true; + } + } + + if (buf_avail_full < 2) + return len_end; + + const uint32_t buf_avail = MIN(buf_avail_full, nice_len); + + if (!next_is_literal && match_byte != current_byte) { // speed optimization + // try literal + rep0 + const uint8_t *const buf_back = buf - reps[0] - 1; + const uint32_t limit = MIN(buf_avail_full, nice_len + 1); + + uint32_t len_test = 1; + while (len_test < limit && buf[len_test] == buf_back[len_test]) + ++len_test; + + --len_test; + + if (len_test >= 2) { + lzma_lzma_state state_2 = state; + update_literal(state_2); + + const uint32_t pos_state_next = (position + 1) & coder->pos_mask; + const uint32_t next_rep_match_price = cur_and_1_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for (; len_test >= 2; --len_test) { + const uint32_t offset = cur + 1 + len_test; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = false; + } + //} + } + } + + + uint32_t start_len = 2; // speed optimization + + for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) { + const uint8_t *const buf_back = buf - reps[rep_index] - 1; + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len_test; + for (len_test = 2; len_test < buf_avail + && buf[len_test] == buf_back[len_test]; + ++len_test) ; + + while (len_end < cur + len_test) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t len_test_temp = len_test; + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, rep_index, state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev = rep_index; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + } while (--len_test >= 2); + + len_test = len_test_temp; + + if (rep_index == 0) + start_len = len_test + 1; + + + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(buf_avail_full, + len_test_2 + nice_len); + for (; len_test_2 < limit + && buf[len_test_2] == buf_back[len_test_2]; + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + lzma_lzma_state state_2 = state; + update_long_rep(state_2); + + uint32_t pos_state_next = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state) + + rc_bit_0_price(coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, position + len_test, + buf[len_test - 1], true, + buf_back[len_test], buf[len_test]); + + update_literal(state_2); + + pos_state_next = (position + len_test + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price = cur_and_len_literal_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for(; len_test_2 >= 2; len_test_2--) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 = rep_index; + } + //} + } + } + + + //for (uint32_t len_test = 2; len_test <= new_len; ++len_test) + if (new_len > buf_avail) { + new_len = buf_avail; + + matches_count = 0; + while (new_len > coder->matches[matches_count].len) + ++matches_count; + + coder->matches[matches_count++].len = new_len; + } + + + if (new_len >= start_len) { + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[state]); + + while (len_end < cur + new_len) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + uint32_t i = 0; + while (start_len > coder->matches[i].len) + ++i; + + for (uint32_t len_test = start_len; ; ++len_test) { + const uint32_t cur_back = coder->matches[i].dist; + uint32_t cur_and_len_price = normal_match_price + + get_pos_len_price(coder, + cur_back, len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev + = cur_back + REP_DISTANCES; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + + if (len_test == coder->matches[i].len) { + // Try Match + Literal + Rep0 + const uint8_t *const buf_back = buf - cur_back - 1; + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(buf_avail_full, + len_test_2 + nice_len); + + for (; len_test_2 < limit && + buf[len_test_2] == buf_back[len_test_2]; + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + lzma_lzma_state state_2 = state; + update_match(state_2); + uint32_t pos_state_next + = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = cur_and_len_price + + rc_bit_0_price( + coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, + position + len_test, + buf[len_test - 1], + true, + buf_back[len_test], + buf[len_test]); + + update_literal(state_2); + pos_state_next = (pos_state_next + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price + = cur_and_len_literal_price + + rc_bit_1_price( + coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + // for(; len_test_2 >= 2; --len_test_2) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 + = cur_back + REP_DISTANCES; + } + //} + } + + if (++i == matches_count) + break; + } + } + } + + return len_end; +} + + +extern void +lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + // If we have symbols pending, return the next pending symbol. + if (coder->opts_end_index != coder->opts_current_index) { + assert(mf->read_ahead > 0); + *len_res = coder->opts[coder->opts_current_index].pos_prev + - coder->opts_current_index; + *back_res = coder->opts[coder->opts_current_index].back_prev; + coder->opts_current_index = coder->opts[ + coder->opts_current_index].pos_prev; + return; + } + + // Update the price tables. In LZMA SDK <= 4.60 (and possibly later) + // this was done in both initialization function and in the main loop. + // In liblzma they were moved into this single place. + if (mf->read_ahead == 0) { + if (coder->match_price_count >= (1 << 7)) + fill_distances_prices(coder); + + if (coder->align_price_count >= ALIGN_TABLE_SIZE) + fill_align_prices(coder); + } + + // TODO: This needs quite a bit of cleaning still. But splitting + // the original function into two pieces makes it at least a little + // more readable, since those two parts don't share many variables. + + uint32_t len_end = helper1(coder, mf, back_res, len_res, position); + if (len_end == UINT32_MAX) + return; + + uint32_t reps[REP_DISTANCES]; + memcpy(reps, coder->reps, sizeof(reps)); + + uint32_t cur; + for (cur = 1; cur < len_end; ++cur) { + assert(cur < OPTS); + + coder->longest_match_length = mf_find( + mf, &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= mf->nice_len) + break; + + len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end, + position + cur, cur, mf->nice_len, + MIN(mf_avail(mf) + 1, OPTS - 1 - cur)); + } + + backward(coder, len_res, back_res, cur); + return; +} Index: contrib/xz/src/liblzma/lzma/lzma2_encoder.c =================================================================== --- contrib/xz/src/liblzma/lzma/lzma2_encoder.c (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma2_encoder.c (revision 0) @@ -0,0 +1,393 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.c +/// \brief LZMA2 encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lzma_encoder.h" +#include "fastpos.h" +#include "lzma2_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_INIT, + SEQ_LZMA_ENCODE, + SEQ_LZMA_COPY, + SEQ_UNCOMPRESSED_HEADER, + SEQ_UNCOMPRESSED_COPY, + } sequence; + + /// LZMA encoder + lzma_coder *lzma; + + /// LZMA options currently in use. + lzma_options_lzma opt_cur; + + bool need_properties; + bool need_state_reset; + bool need_dictionary_reset; + + /// Uncompressed size of a chunk + size_t uncompressed_size; + + /// Compressed size of a chunk (excluding headers); this is also used + /// to indicate the end of buf[] in SEQ_LZMA_COPY. + size_t compressed_size; + + /// Read position in buf[] + size_t buf_pos; + + /// Buffer to hold the chunk header and LZMA compressed data + uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; +}; + + +static void +lzma2_header_lzma(lzma_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + assert(coder->compressed_size > 0); + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + + size_t pos; + + if (coder->need_properties) { + pos = 0; + + if (coder->need_dictionary_reset) + coder->buf[pos] = 0x80 + (3 << 5); + else + coder->buf[pos] = 0x80 + (2 << 5); + } else { + pos = 1; + + if (coder->need_state_reset) + coder->buf[pos] = 0x80 + (1 << 5); + else + coder->buf[pos] = 0x80; + } + + // Set the start position for copying. + coder->buf_pos = pos; + + // Uncompressed size + size_t size = coder->uncompressed_size - 1; + coder->buf[pos++] += size >> 16; + coder->buf[pos++] = (size >> 8) & 0xFF; + coder->buf[pos++] = size & 0xFF; + + // Compressed size + size = coder->compressed_size - 1; + coder->buf[pos++] = size >> 8; + coder->buf[pos++] = size & 0xFF; + + // Properties, if needed + if (coder->need_properties) + lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); + + coder->need_properties = false; + coder->need_state_reset = false; + coder->need_dictionary_reset = false; + + // The copying code uses coder->compressed_size to indicate the end + // of coder->buf[], so we need add the maximum size of the header here. + coder->compressed_size += LZMA2_HEADER_MAX; + + return; +} + + +static void +lzma2_header_uncompressed(lzma_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); + + // If this is the first chunk, we need to include dictionary + // reset indicator. + if (coder->need_dictionary_reset) + coder->buf[0] = 1; + else + coder->buf[0] = 2; + + coder->need_dictionary_reset = false; + + // "Compressed" size + coder->buf[1] = (coder->uncompressed_size - 1) >> 8; + coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; + + // Set the start position for copying. + coder->buf_pos = 0; + return; +} + + +static lzma_ret +lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INIT: + // If there's no input left and we are flushing or finishing, + // don't start a new chunk. + if (mf_unencoded(mf) == 0) { + // Write end of payload marker if finishing. + if (mf->action == LZMA_FINISH) + out[(*out_pos)++] = 0; + + return mf->action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + } + + if (coder->need_state_reset) + return_if_error(lzma_lzma_encoder_reset( + coder->lzma, &coder->opt_cur)); + + coder->uncompressed_size = 0; + coder->compressed_size = 0; + coder->sequence = SEQ_LZMA_ENCODE; + + // Fall through + + case SEQ_LZMA_ENCODE: { + // Calculate how much more uncompressed data this chunk + // could accept. + const uint32_t left = LZMA2_UNCOMPRESSED_MAX + - coder->uncompressed_size; + uint32_t limit; + + if (left < mf->match_len_max) { + // Must flush immediately since the next LZMA symbol + // could make the uncompressed size of the chunk too + // big. + limit = 0; + } else { + // Calculate maximum read_limit that is OK from point + // of view of LZMA2 chunk size. + limit = mf->read_pos - mf->read_ahead + + left - mf->match_len_max; + } + + // Save the start position so that we can update + // coder->uncompressed_size. + const uint32_t read_start = mf->read_pos - mf->read_ahead; + + // Call the LZMA encoder until the chunk is finished. + const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, + coder->buf + LZMA2_HEADER_MAX, + &coder->compressed_size, + LZMA2_CHUNK_MAX, limit); + + coder->uncompressed_size += mf->read_pos - mf->read_ahead + - read_start; + + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + + if (ret != LZMA_STREAM_END) + return LZMA_OK; + + // See if the chunk compressed. If it didn't, we encode it + // as uncompressed chunk. This saves a few bytes of space + // and makes decoding faster. + if (coder->compressed_size >= coder->uncompressed_size) { + coder->uncompressed_size += mf->read_ahead; + assert(coder->uncompressed_size + <= LZMA2_UNCOMPRESSED_MAX); + mf->read_ahead = 0; + lzma2_header_uncompressed(coder); + coder->need_state_reset = true; + coder->sequence = SEQ_UNCOMPRESSED_HEADER; + break; + } + + // The chunk did compress at least by one byte, so we store + // the chunk as LZMA. + lzma2_header_lzma(coder); + + coder->sequence = SEQ_LZMA_COPY; + } + + // Fall through + + case SEQ_LZMA_COPY: + // Copy the compressed chunk along its headers to the + // output buffer. + lzma_bufcpy(coder->buf, &coder->buf_pos, + coder->compressed_size, + out, out_pos, out_size); + if (coder->buf_pos != coder->compressed_size) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + + case SEQ_UNCOMPRESSED_HEADER: + // Copy the three-byte header to indicate uncompressed chunk. + lzma_bufcpy(coder->buf, &coder->buf_pos, + LZMA2_HEADER_UNCOMPRESSED, + out, out_pos, out_size); + if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) + return LZMA_OK; + + coder->sequence = SEQ_UNCOMPRESSED_COPY; + + // Fall through + + case SEQ_UNCOMPRESSED_COPY: + // Copy the uncompressed data as is from the dictionary + // to the output buffer. + mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); + if (coder->uncompressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + } + + return LZMA_OK; +} + + +static void +lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder->lzma, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lzma2_encoder_options_update(lzma_coder *coder, const lzma_filter *filter) +{ + // New options can be set only when there is no incomplete chunk. + // This is the case at the beginning of the raw stream and right + // after LZMA_SYNC_FLUSH. + if (filter->options == NULL || coder->sequence != SEQ_INIT) + return LZMA_PROG_ERROR; + + // Look if there are new options. At least for now, + // only lc/lp/pb can be changed. + const lzma_options_lzma *opt = filter->options; + if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp + || coder->opt_cur.pb != opt->pb) { + // Validate the options. + if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX + || opt->lc + opt->lp > LZMA_LCLP_MAX + || opt->pb > LZMA_PB_MAX) + return LZMA_OPTIONS_ERROR; + + // The new options will be used when the encoder starts + // a new LZMA2 chunk. + coder->opt_cur.lc = opt->lc; + coder->opt_cur.lp = opt->lp; + coder->opt_cur.pb = opt->pb; + coder->need_properties = true; + coder->need_state_reset = true; + } + + return LZMA_OK; +} + + +static lzma_ret +lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_encode; + lz->end = &lzma2_encoder_end; + lz->options_update = &lzma2_encoder_options_update; + + lz->coder->lzma = NULL; + } + + lz->coder->opt_cur = *(const lzma_options_lzma *)(options); + + lz->coder->sequence = SEQ_INIT; + lz->coder->need_properties = true; + lz->coder->need_state_reset = false; + lz->coder->need_dictionary_reset + = lz->coder->opt_cur.preset_dict == NULL + || lz->coder->opt_cur.preset_dict_size == 0; + + // Initialize LZMA encoder + return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator, + &lz->coder->opt_cur, lz_options)); + + // Make sure that we will always have enough history available in + // case we need to use uncompressed chunks. They are used when the + // compressed size of a chunk is not smaller than the uncompressed + // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes + // history available. + if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX) + lz_options->before_size + = LZMA2_CHUNK_MAX - lz_options->dict_size; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_lz_encoder_init( + next, allocator, filters, &lzma2_encoder_init); +} + + +extern uint64_t +lzma_lzma2_encoder_memusage(const void *options) +{ + const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options); + if (lzma_mem == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lzma_mem; +} + + +extern lzma_ret +lzma_lzma2_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_lzma *const opt = options; + uint32_t d = MAX(opt->dict_size, LZMA_DICT_SIZE_MIN); + + // Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending + // on which one is the next: + --d; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + + // Get the highest two bits using the proper encoding: + if (d == UINT32_MAX) + out[0] = 40; + else + out[0] = get_pos_slot(d + 1) - 24; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/lzma/fastpos_table.c =================================================================== --- contrib/xz/src/liblzma/lzma/fastpos_table.c (revision 0) +++ contrib/xz/src/liblzma/lzma/fastpos_table.c (revision 0) @@ -0,0 +1,519 @@ +/* This file has been automatically generated by fastpos_tablegen.c. */ + +#include "common.h" +#include "fastpos.h" + +const uint8_t lzma_fastpos[1 << FASTPOS_BITS] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; Index: contrib/xz/src/liblzma/lzma/lzma_decoder.c =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_decoder.c (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_decoder.c (revision 0) @@ -0,0 +1,1057 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.c +/// \brief LZMA decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_decoder.h" +#include "lzma_common.h" +#include "lzma_decoder.h" +#include "range_decoder.h" + + +#ifdef HAVE_SMALL + +// Macros for (somewhat) size-optimized code. +#define seq_4(seq) seq + +#define seq_6(seq) seq + +#define seq_8(seq) seq + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _CHOICE2, \ + seq ## _BITTREE + +#define len_decode(target, ld, pos_state, seq) \ +do { \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + probs = ld.low[pos_state];\ + limit = LEN_LOW_SYMBOLS; \ + target = MATCH_LEN_MIN; \ + } else { \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + probs = ld.mid[pos_state]; \ + limit = LEN_MID_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + } else { \ + rc_update_1(ld.choice2); \ + probs = ld.high; \ + limit = LEN_HIGH_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ + } \ + } \ + symbol = 1; \ +case seq ## _BITTREE: \ + do { \ + rc_bit(probs[symbol], , , seq ## _BITTREE); \ + } while (symbol < limit); \ + target += symbol - limit; \ +} while (0) + +#else // HAVE_SMALL + +// Unrolled versions +#define seq_4(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3 + +#define seq_6(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5 + +#define seq_8(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5, \ + seq ## 6, \ + seq ## 7 + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _LOW0, \ + seq ## _LOW1, \ + seq ## _LOW2, \ + seq ## _CHOICE2, \ + seq ## _MID0, \ + seq ## _MID1, \ + seq ## _MID2, \ + seq ## _HIGH0, \ + seq ## _HIGH1, \ + seq ## _HIGH2, \ + seq ## _HIGH3, \ + seq ## _HIGH4, \ + seq ## _HIGH5, \ + seq ## _HIGH6, \ + seq ## _HIGH7 + +#define len_decode(target, ld, pos_state, seq) \ +do { \ + symbol = 1; \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ + target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ + } else { \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID0); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID1); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID2); \ + target = symbol - LEN_MID_SYMBOLS \ + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + } else { \ + rc_update_1(ld.choice2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ + target = symbol - LEN_HIGH_SYMBOLS \ + + MATCH_LEN_MIN \ + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ + } \ + } \ +} while (0) + +#endif // HAVE_SMALL + + +/// Length decoder probabilities; see comments in lzma_common.h. +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; +} lzma_length_decoder; + + +struct lzma_coder_s { + /////////////////// + // Probabilities // + /////////////////// + + /// Literals; see comments in lzma_common.h. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; + + /// If 1, it's a match. Otherwise it's a single 8-bit literal. + probability is_match[STATES][POS_STATES_MAX]; + + /// If 1, it's a repeated match. The distance is one of rep0 .. rep3. + probability is_rep[STATES]; + + /// If 0, distance of a repeated match is rep0. + /// Otherwise check is_rep1. + probability is_rep0[STATES]; + + /// If 0, distance of a repeated match is rep1. + /// Otherwise check is_rep2. + probability is_rep1[STATES]; + + /// If 0, distance of a repeated match is rep2. Otherwise it is rep3. + probability is_rep2[STATES]; + + /// If 1, the repeated match has length of one byte. Otherwise + /// the length is decoded from rep_len_decoder. + probability is_rep0_long[STATES][POS_STATES_MAX]; + + /// Probability tree for the highest two bits of the match distance. + /// There is a separate probability tree for match lengths of + /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; + + /// Probability trees for additional bits for match distance when the + /// distance is in the range [4, 127]. + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; + + /// Probability tree for the lowest four bits of a match distance + /// that is equal to or greater than 128. + probability pos_align[ALIGN_TABLE_SIZE]; + + /// Length of a normal match + lzma_length_decoder match_len_decoder; + + /// Length of a repeated match + lzma_length_decoder rep_len_decoder; + + /////////////////// + // Decoder state // + /////////////////// + + // Range coder + lzma_range_decoder rc; + + // Types of the most recently seen LZMA symbols + lzma_lzma_state state; + + uint32_t rep0; ///< Distance of the latest match + uint32_t rep1; ///< Distance of second latest match + uint32_t rep2; ///< Distance of third latest match + uint32_t rep3; ///< Distance of fourth latest match + + uint32_t pos_mask; // (1U << pb) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; + + /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of + /// payload marker is expected. + lzma_vli uncompressed_size; + + //////////////////////////////// + // State of incomplete symbol // + //////////////////////////////// + + /// Position where to continue the decoder loop + enum { + SEQ_NORMALIZE, + SEQ_IS_MATCH, + seq_8(SEQ_LITERAL), + seq_8(SEQ_LITERAL_MATCHED), + SEQ_LITERAL_WRITE, + SEQ_IS_REP, + seq_len(SEQ_MATCH_LEN), + seq_6(SEQ_POS_SLOT), + SEQ_POS_MODEL, + SEQ_DIRECT, + seq_4(SEQ_ALIGN), + SEQ_EOPM, + SEQ_IS_REP0, + SEQ_SHORTREP, + SEQ_IS_REP0_LONG, + SEQ_IS_REP1, + SEQ_IS_REP2, + seq_len(SEQ_REP_LEN), + SEQ_COPY, + } sequence; + + /// Base of the current probability tree + probability *probs; + + /// Symbol being decoded. This is also used as an index variable in + /// bittree decoders: probs[symbol] + uint32_t symbol; + + /// Used as a loop termination condition on bittree decoders and + /// direct bits decoder. + uint32_t limit; + + /// Matched literal decoder: 0x100 or 0 to help avoiding branches. + /// Bittree reverse decoders: Offset of the next bit: 1 << offset + uint32_t offset; + + /// If decoding a literal: match byte. + /// If decoding a match: length of the match. + uint32_t len; +}; + + +static lzma_ret +lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr, + const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + //////////////////// + // Initialization // + //////////////////// + + if (!rc_read_init(&coder->rc, in, in_pos, in_size)) + return LZMA_OK; + + /////////////// + // Variables // + /////////////// + + // Making local copies of often-used variables improves both + // speed and readability. + + lzma_dict dict = *dictptr; + + const size_t dict_start = dict.pos; + + // Range decoder + rc_to_local(coder->rc, *in_pos); + + // State + uint32_t state = coder->state; + uint32_t rep0 = coder->rep0; + uint32_t rep1 = coder->rep1; + uint32_t rep2 = coder->rep2; + uint32_t rep3 = coder->rep3; + + const uint32_t pos_mask = coder->pos_mask; + + // These variables are actually needed only if we last time ran + // out of input in the middle of the decoder loop. + probability *probs = coder->probs; + uint32_t symbol = coder->symbol; + uint32_t limit = coder->limit; + uint32_t offset = coder->offset; + uint32_t len = coder->len; + + const uint32_t literal_pos_mask = coder->literal_pos_mask; + const uint32_t literal_context_bits = coder->literal_context_bits; + + // Temporary variables + uint32_t pos_state = dict.pos & pos_mask; + + lzma_ret ret = LZMA_OK; + + // If uncompressed size is known, there must be no end of payload + // marker. + const bool no_eopm = coder->uncompressed_size + != LZMA_VLI_UNKNOWN; + if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos) + dict.limit = dict.pos + (size_t)(coder->uncompressed_size); + + // The main decoder loop. The "switch" is used to restart the decoder at + // correct location. Once restarted, the "switch" is no longer used. + switch (coder->sequence) + while (true) { + // Calculate new pos_state. This is skipped on the first loop + // since we already calculated it when setting up the local + // variables. + pos_state = dict.pos & pos_mask; + + case SEQ_NORMALIZE: + case SEQ_IS_MATCH: + if (unlikely(no_eopm && dict.pos == dict.limit)) + break; + + rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { + rc_update_0(coder->is_match[state][pos_state]); + + // It's a literal i.e. a single 8-bit byte. + + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_pos_mask, + dict.pos, dict_get(&dict, 0)); + symbol = 1; + + if (is_literal_state(state)) { + // Decode literal without match byte. +#ifdef HAVE_SMALL + case SEQ_LITERAL: + do { + rc_bit(probs[symbol], , , SEQ_LITERAL); + } while (symbol < (1 << 8)); +#else + rc_bit_case(probs[symbol], , , SEQ_LITERAL0); + rc_bit_case(probs[symbol], , , SEQ_LITERAL1); + rc_bit_case(probs[symbol], , , SEQ_LITERAL2); + rc_bit_case(probs[symbol], , , SEQ_LITERAL3); + rc_bit_case(probs[symbol], , , SEQ_LITERAL4); + rc_bit_case(probs[symbol], , , SEQ_LITERAL5); + rc_bit_case(probs[symbol], , , SEQ_LITERAL6); + rc_bit_case(probs[symbol], , , SEQ_LITERAL7); +#endif + } else { + // Decode literal with match byte. + // + // We store the byte we compare against + // ("match byte") to "len" to minimize the + // number of variables we need to store + // between decoder calls. + len = dict_get(&dict, rep0) << 1; + + // The usage of "offset" allows omitting some + // branches, which should give tiny speed + // improvement on some CPUs. "offset" gets + // set to zero if match_bit didn't match. + offset = 0x100; + +#ifdef HAVE_SMALL + case SEQ_LITERAL_MATCHED: + do { + const uint32_t match_bit + = len & offset; + const uint32_t subcoder_index + = offset + match_bit + + symbol; + + rc_bit(probs[subcoder_index], + offset &= ~match_bit, + offset &= match_bit, + SEQ_LITERAL_MATCHED); + + // It seems to be faster to do this + // here instead of putting it to the + // beginning of the loop and then + // putting the "case" in the middle + // of the loop. + len <<= 1; + + } while (symbol < (1 << 8)); +#else + // Unroll the loop. + uint32_t match_bit; + uint32_t subcoder_index; + +# define d(seq) \ + case seq: \ + match_bit = len & offset; \ + subcoder_index = offset + match_bit + symbol; \ + rc_bit(probs[subcoder_index], \ + offset &= ~match_bit, \ + offset &= match_bit, \ + seq) + + d(SEQ_LITERAL_MATCHED0); + len <<= 1; + d(SEQ_LITERAL_MATCHED1); + len <<= 1; + d(SEQ_LITERAL_MATCHED2); + len <<= 1; + d(SEQ_LITERAL_MATCHED3); + len <<= 1; + d(SEQ_LITERAL_MATCHED4); + len <<= 1; + d(SEQ_LITERAL_MATCHED5); + len <<= 1; + d(SEQ_LITERAL_MATCHED6); + len <<= 1; + d(SEQ_LITERAL_MATCHED7); +# undef d +#endif + } + + //update_literal(state); + // Use a lookup table to update to literal state, + // since compared to other state updates, this would + // need two branches. + static const lzma_lzma_state next_state[] = { + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT + }; + state = next_state[state]; + + case SEQ_LITERAL_WRITE: + if (unlikely(dict_put(&dict, symbol))) { + coder->sequence = SEQ_LITERAL_WRITE; + goto out; + } + + continue; + } + + // Instead of a new byte we are going to get a byte range + // (distance and length) which will be repeated from our + // output history. + + rc_update_1(coder->is_match[state][pos_state]); + + case SEQ_IS_REP: + rc_if_0(coder->is_rep[state], SEQ_IS_REP) { + // Not a repeated match + rc_update_0(coder->is_rep[state]); + update_match(state); + + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + len_decode(len, coder->match_len_decoder, + pos_state, SEQ_MATCH_LEN); + + // Prepare to decode the highest two bits of the + // match distance. + probs = coder->pos_slot[get_len_to_pos_state(len)]; + symbol = 1; + +#ifdef HAVE_SMALL + case SEQ_POS_SLOT: + do { + rc_bit(probs[symbol], , , SEQ_POS_SLOT); + } while (symbol < POS_SLOTS); +#else + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5); +#endif + // Get rid of the highest bit that was needed for + // indexing of the probability array. + symbol -= POS_SLOTS; + assert(symbol <= 63); + + if (symbol < START_POS_MODEL_INDEX) { + // Match distances [0, 3] have only two bits. + rep0 = symbol; + } else { + // Decode the lowest [1, 29] bits of + // the match distance. + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < END_POS_MODEL_INDEX) { + // Prepare to decode the low bits for + // a distance of [4, 127]. + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 0; + case SEQ_POS_MODEL: +#ifdef HAVE_SMALL + do { + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } while (++offset < limit); +#else + switch (limit) { + case 5: + assert(offset == 0); + rc_bit(probs[symbol], , + rep0 += 1, + SEQ_POS_MODEL); + ++offset; + --limit; + case 4: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 3: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 2: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 1: + // We need "symbol" only for + // indexing the probability + // array, thus we can use + // rc_bit_last() here to omit + // the unneeded updating of + // "symbol". + rc_bit_last(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } +#endif + } else { + // The distance is >= 128. Decode the + // lower bits without probabilities + // except the lowest four bits. + assert(symbol >= 14); + assert(limit >= 6); + limit -= ALIGN_BITS; + assert(limit >= 2); + case SEQ_DIRECT: + // Not worth manual unrolling + do { + rc_direct(rep0, SEQ_DIRECT); + } while (--limit > 0); + + // Decode the lowest four bits using + // probabilities. + rep0 <<= ALIGN_BITS; + symbol = 1; +#ifdef HAVE_SMALL + offset = 0; + case SEQ_ALIGN: + do { + rc_bit(coder->pos_align[ + symbol], , + rep0 += 1 << offset, + SEQ_ALIGN); + } while (++offset < ALIGN_BITS); +#else + case SEQ_ALIGN0: + rc_bit(coder->pos_align[symbol], , + rep0 += 1, SEQ_ALIGN0); + case SEQ_ALIGN1: + rc_bit(coder->pos_align[symbol], , + rep0 += 2, SEQ_ALIGN1); + case SEQ_ALIGN2: + rc_bit(coder->pos_align[symbol], , + rep0 += 4, SEQ_ALIGN2); + case SEQ_ALIGN3: + // Like in SEQ_POS_MODEL, we don't + // need "symbol" for anything else + // than indexing the probability array. + rc_bit_last(coder->pos_align[symbol], , + rep0 += 8, SEQ_ALIGN3); +#endif + + if (rep0 == UINT32_MAX) { + // End of payload marker was + // found. It must not be + // present if uncompressed + // size is known. + if (coder->uncompressed_size + != LZMA_VLI_UNKNOWN) { + ret = LZMA_DATA_ERROR; + goto out; + } + + case SEQ_EOPM: + // TODO Comment + rc_normalize(SEQ_EOPM); + ret = LZMA_STREAM_END; + goto out; + } + } + } + + // Validate the distance we just decoded. + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + } else { + rc_update_1(coder->is_rep[state]); + + // Repeated match + // + // The match distance is a value that we have had + // earlier. The latest four match distances are + // available as rep0, rep1, rep2 and rep3. We will + // now decode which of them is the new distance. + // + // There cannot be a match if we haven't produced + // any output, so check that first. + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + case SEQ_IS_REP0: + rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { + rc_update_0(coder->is_rep0[state]); + // The distance is rep0. + + case SEQ_IS_REP0_LONG: + rc_if_0(coder->is_rep0_long[state][pos_state], + SEQ_IS_REP0_LONG) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); + + update_short_rep(state); + + case SEQ_SHORTREP: + if (unlikely(dict_put(&dict, dict_get( + &dict, rep0)))) { + coder->sequence = SEQ_SHORTREP; + goto out; + } + + continue; + } + + // Repeating more than one byte at + // distance of rep0. + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + + } else { + rc_update_1(coder->is_rep0[state]); + + case SEQ_IS_REP1: + // The distance is rep1, rep2 or rep3. Once + // we find out which one of these three, it + // is stored to rep0 and rep1, rep2 and rep3 + // are updated accordingly. + rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { + rc_update_0(coder->is_rep1[state]); + + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep1[state]); + case SEQ_IS_REP2: + rc_if_0(coder->is_rep2[state], + SEQ_IS_REP2) { + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + } + } + } + + update_long_rep(state); + + // Decode the length of the repeated match. + len_decode(len, coder->rep_len_decoder, + pos_state, SEQ_REP_LEN); + } + + ///////////////////////////////// + // Repeat from history buffer. // + ///////////////////////////////// + + // The length is always between these limits. There is no way + // to trigger the algorithm to set len outside this range. + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + + case SEQ_COPY: + // Repeat len bytes from distance of rep0. + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } + } + + rc_normalize(SEQ_NORMALIZE); + coder->sequence = SEQ_IS_MATCH; + +out: + // Save state + + // NOTE: Must not copy dict.limit. + dictptr->pos = dict.pos; + dictptr->full = dict.full; + + rc_from_local(coder->rc, *in_pos); + + coder->state = state; + coder->rep0 = rep0; + coder->rep1 = rep1; + coder->rep2 = rep2; + coder->rep3 = rep3; + + coder->probs = probs; + coder->symbol = symbol; + coder->limit = limit; + coder->offset = offset; + coder->len = len; + + // Update the remaining amount of uncompressed data if uncompressed + // size was known. + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) { + coder->uncompressed_size -= dict.pos - dict_start; + + // Since there cannot be end of payload marker if the + // uncompressed size was known, we check here if we + // finished decoding. + if (coder->uncompressed_size == 0 && ret == LZMA_OK + && coder->sequence != SEQ_NORMALIZE) + ret = coder->sequence == SEQ_IS_MATCH + ? LZMA_STREAM_END : LZMA_DATA_ERROR; + } + + // We can do an additional check in the range decoder to catch some + // corrupted files. + if (ret == LZMA_STREAM_END) { + if (!rc_is_finished(coder->rc)) + ret = LZMA_DATA_ERROR; + + // Reset the range decoder so that it is ready to reinitialize + // for a new LZMA2 chunk. + rc_reset(coder->rc); + } + + return ret; +} + + + +static void +lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) +{ + coder->uncompressed_size = uncompressed_size; +} + +/* +extern void +lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size) +{ + // This is hack. + (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size; +} +*/ + +static void +lzma_decoder_reset(lzma_coder *coder, const void *opt) +{ + const lzma_options_lzma *options = opt; + + // NOTE: We assume that lc/lp/pb are valid since they were + // successfully decoded with lzma_lzma_decode_properties(). + // FIXME? + + // Calculate pos_mask. We don't need pos_bits as is for anything. + coder->pos_mask = (1U << options->pb) - 1; + + // Initialize the literal decoder. + literal_init(coder->literal, options->lc, options->lp); + + coder->literal_context_bits = options->lc; + coder->literal_pos_mask = (1U << options->lp) - 1; + + // State + coder->state = STATE_LIT_LIT; + coder->rep0 = 0; + coder->rep1 = 0; + coder->rep2 = 0; + coder->rep3 = 0; + coder->pos_mask = (1U << options->pb) - 1; + + // Range decoder + rc_reset(coder->rc); + + // Bit and bittree decoders + for (uint32_t i = 0; i < STATES; ++i) { + for (uint32_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); + } + + for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); + + for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(coder->pos_special[i]); + + bittree_reset(coder->pos_align, ALIGN_BITS); + + // Len decoders (also bit/bittree) + const uint32_t num_pos_states = 1U << options->pb; + bit_reset(coder->match_len_decoder.choice); + bit_reset(coder->match_len_decoder.choice2); + bit_reset(coder->rep_len_decoder.choice); + bit_reset(coder->rep_len_decoder.choice2); + + for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(coder->match_len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(coder->match_len_decoder.mid[pos_state], + LEN_MID_BITS); + + bittree_reset(coder->rep_len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(coder->rep_len_decoder.mid[pos_state], + LEN_MID_BITS); + } + + bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); + + coder->sequence = SEQ_IS_MATCH; + coder->probs = NULL; + coder->symbol = 0; + coder->limit = 0; + coder->offset = 0; + coder->len = 0; + + return; +} + + +extern lzma_ret +lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma_decode; + lz->reset = &lzma_decoder_reset; + lz->set_uncompressed = &lzma_decoder_uncompressed; + } + + // All dictionary sizes are OK here. LZ decoder will take care of + // the special cases. + const lzma_options_lzma *options = opt; + lz_options->dict_size = options->dict_size; + lz_options->preset_dict = options->preset_dict; + lz_options->preset_dict_size = options->preset_dict_size; + + return LZMA_OK; +} + + +/// Allocate and initialize LZMA decoder. This is used only via LZ +/// initialization (lzma_lzma_decoder_init() passes function pointer to +/// the LZ initialization). +static lzma_ret +lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + if (!is_lclppb_valid(options)) + return LZMA_PROG_ERROR; + + return_if_error(lzma_lzma_decoder_create( + lz, allocator, options, lz_options)); + + lzma_decoder_reset(lz->coder, options); + lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN); + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma_decoder_init); +} + + +extern bool +lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) +{ + if (byte > (4 * 5 + 4) * 9 + 8) + return true; + + // See the file format specification to understand this. + options->pb = byte / (9 * 5); + byte -= options->pb * 9 * 5; + options->lp = byte / 9; + options->lc = byte - options->lp * 9; + + return options->lc + options->lp > LZMA_LCLP_MAX; +} + + +extern uint64_t +lzma_lzma_decoder_memusage_nocheck(const void *options) +{ + const lzma_options_lzma *const opt = options; + return sizeof(lzma_coder) + lzma_lz_decoder_memusage(opt->dict_size); +} + + +extern uint64_t +lzma_lzma_decoder_memusage(const void *options) +{ + if (!is_lclppb_valid(options)) + return UINT64_MAX; + + return lzma_lzma_decoder_memusage_nocheck(options); +} + + +extern lzma_ret +lzma_lzma_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 5) + return LZMA_OPTIONS_ERROR; + + lzma_options_lzma *opt + = lzma_alloc(sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (lzma_lzma_lclppb_decode(opt, props[0])) + goto error; + + // All dictionary sizes are accepted, including zero. LZ decoder + // will automatically use a dictionary at least a few KiB even if + // a smaller dictionary is requested. + opt->dict_size = unaligned_read32le(props + 1); + + opt->preset_dict = NULL; + opt->preset_dict_size = 0; + + *options = opt; + + return LZMA_OK; + +error: + lzma_free(opt, allocator); + return LZMA_OPTIONS_ERROR; +} Index: contrib/xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_encoder_optimum_fast.c (revision 0) @@ -0,0 +1,179 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_fast.c +// +// Author: Igor Pavlov +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" + + +#define change_pair(small_dist, big_dist) \ + (((big_dist) >> 7) > (small_dist)) + + +extern void +lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + const uint32_t nice_len = mf->nice_len; + + uint32_t len_main; + uint32_t matches_count; + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint8_t *buf = mf_ptr(mf) - 1; + const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX); + + if (buf_avail < 2) { + // There's not enough input left to encode a match. + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + // Look for repeated matches; scan the previous four match distances + uint32_t rep_len = 0; + uint32_t rep_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + // Pointer to the beginning of the match candidate + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + // If the first two bytes (2 == MATCH_LEN_MIN) do not match, + // this rep is not useful. + if (not_equal_16(buf, buf_back)) + continue; + + // The first two bytes matched. + // Calculate the length of the match. + uint32_t len; + for (len = 2; len < buf_avail + && buf[len] == buf_back[len]; ++len) ; + + // If we have found a repeated match that is at least + // nice_len long, return it immediately. + if (len >= nice_len) { + *back_res = i; + *len_res = len; + mf_skip(mf, len - 1); + return; + } + + if (len > rep_len) { + rep_index = i; + rep_len = len; + } + } + + // We didn't find a long enough repeated match. Encode it as a normal + // match if the match length is at least nice_len. + if (len_main >= nice_len) { + *back_res = coder->matches[matches_count - 1].dist + + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return; + } + + uint32_t back_main = 0; + if (len_main >= 2) { + back_main = coder->matches[matches_count - 1].dist; + + while (matches_count > 1 && len_main == + coder->matches[matches_count - 2].len + 1) { + if (!change_pair(coder->matches[ + matches_count - 2].dist, + back_main)) + break; + + --matches_count; + len_main = coder->matches[matches_count - 1].len; + back_main = coder->matches[matches_count - 1].dist; + } + + if (len_main == 2 && back_main >= 0x80) + len_main = 1; + } + + if (rep_len >= 2) { + if (rep_len + 1 >= len_main + || (rep_len + 2 >= len_main + && back_main > (UINT32_C(1) << 9)) + || (rep_len + 3 >= len_main + && back_main > (UINT32_C(1) << 15))) { + *back_res = rep_index; + *len_res = rep_len; + mf_skip(mf, rep_len - 1); + return; + } + } + + if (len_main < 2 || buf_avail <= 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + + // Get the matches for the next byte. If we find a better match, + // the current byte is encoded as a literal. + coder->longest_match_length = mf_find(mf, + &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= 2) { + const uint32_t new_dist = coder->matches[ + coder->matches_count - 1].dist; + + if ((coder->longest_match_length >= len_main + && new_dist < back_main) + || (coder->longest_match_length == len_main + 1 + && !change_pair(back_main, new_dist)) + || (coder->longest_match_length > len_main + 1) + || (coder->longest_match_length + 1 >= len_main + && len_main >= 3 + && change_pair(new_dist, back_main))) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + } + + // In contrast to LZMA SDK, dictionary could not have been moved + // between mf_find() calls, thus it is safe to just increment + // the old buf pointer instead of recalculating it with mf_ptr(). + ++buf; + + const uint32_t limit = len_main - 1; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len; + for (len = 2; len < limit + && buf[len] == buf_back[len]; ++len) ; + + if (len >= limit) { + *back_res = UINT32_MAX; + *len_res = 1; + return; + } + } + + *back_res = back_main + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 2); + return; +} Index: contrib/xz/src/liblzma/lzma/lzma2_encoder.h =================================================================== --- contrib/xz/src/liblzma/lzma/lzma2_encoder.h (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma2_encoder.h (revision 0) @@ -0,0 +1,41 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.h +/// \brief LZMA2 encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_ENCODER_H +#define LZMA_LZMA2_ENCODER_H + +#include "common.h" + + +/// Maximum number of bytes of actual data per chunk (no headers) +#define LZMA2_CHUNK_MAX (UINT32_C(1) << 16) + +/// Maximum uncompressed size of LZMA chunk (no headers) +#define LZMA2_UNCOMPRESSED_MAX (UINT32_C(1) << 21) + +/// Maximum size of LZMA2 headers +#define LZMA2_HEADER_MAX 6 + +/// Size of a header for uncompressed chunk +#define LZMA2_HEADER_UNCOMPRESSED 3 + + +extern lzma_ret lzma_lzma2_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_encode(const void *options, uint8_t *out); + +#endif Index: contrib/xz/src/liblzma/lzma/lzma_decoder.h =================================================================== --- contrib/xz/src/liblzma/lzma/lzma_decoder.h (revision 0) +++ contrib/xz/src/liblzma/lzma/lzma_decoder.h (revision 0) @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.h +/// \brief LZMA decoder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_DECODER_H +#define LZMA_LZMA_DECODER_H + +#include "common.h" + + +/// Allocates and initializes LZMA decoder +extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern uint64_t lzma_lzma_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + + +/// \brief Decodes the LZMA Properties byte (lc/lp/pb) +/// +/// \return true if error occurred, false on success +/// +extern bool lzma_lzma_lclppb_decode( + lzma_options_lzma *options, uint8_t byte); + + +#ifdef LZMA_LZ_DECODER_H +/// Allocate and setup function pointers only. This is used by LZMA1 and +/// LZMA2 decoders. +extern lzma_ret lzma_lzma_decoder_create( + lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options); + +/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2 +/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb. +extern uint64_t lzma_lzma_decoder_memusage_nocheck(const void *options); + +#endif + +#endif Index: contrib/xz/src/liblzma/subblock/subblock_decoder.h =================================================================== --- contrib/xz/src/liblzma/subblock/subblock_decoder.h (revision 0) +++ contrib/xz/src/liblzma/subblock/subblock_decoder.h (revision 0) @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder.h +/// \brief Decoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_DECODER_H +#define LZMA_SUBBLOCK_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_subblock_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif Index: contrib/xz/src/liblzma/subblock/subblock_encoder.c =================================================================== --- contrib/xz/src/liblzma/subblock/subblock_encoder.c (revision 0) +++ contrib/xz/src/liblzma/subblock/subblock_encoder.c (revision 0) @@ -0,0 +1,984 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_encoder.c +/// \brief Encoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_encoder.h" +#include "filter_encoder.h" + + +/// Maximum number of repeats that a single Repeating Data can indicate. +/// This is directly from the file format specification. +#define REPEAT_COUNT_MAX (1U << 28) + +/// Number of bytes the data chunk (not including the header part) must be +/// before we care about alignment. This is somewhat arbitrary. It just +/// doesn't make sense to waste bytes for alignment when the data chunk +/// is very small. +#define MIN_CHUNK_SIZE_FOR_ALIGN 4 + +/// Number of bytes of the header part of Subblock Type `Data'. This is +/// used as the `skew' argument for subblock_align(). +#define ALIGN_SKEW_DATA 4 + +/// Like above but for Repeating Data. +#define ALIGN_SKEW_REPEATING_DATA 5 + +/// Writes one byte to output buffer and updates the alignment counter. +#define write_byte(b) \ +do { \ + assert(*out_pos < out_size); \ + out[*out_pos] = b; \ + ++*out_pos; \ + ++coder->alignment.out_pos; \ +} while (0) + + +struct lzma_coder_s { + lzma_next_coder next; + bool next_finished; + + enum { + SEQ_FILL, + SEQ_FLUSH, + SEQ_RLE_COUNT_0, + SEQ_RLE_COUNT_1, + SEQ_RLE_COUNT_2, + SEQ_RLE_COUNT_3, + SEQ_RLE_SIZE, + SEQ_RLE_DATA, + SEQ_DATA_SIZE_0, + SEQ_DATA_SIZE_1, + SEQ_DATA_SIZE_2, + SEQ_DATA_SIZE_3, + SEQ_DATA, + SEQ_SUBFILTER_INIT, + SEQ_SUBFILTER_FLAGS, + } sequence; + + /// Pointer to the options given by the application. This is used + /// for two-way communication with the application. + lzma_options_subblock *options; + + /// Position in various arrays. + size_t pos; + + /// Holds subblock.size - 1 or rle.size - 1 when encoding size + /// of Data or Repeat Count. + uint32_t tmp; + + struct { + /// This is a copy of options->alignment, or + /// LZMA_SUBBLOCK_ALIGNMENT_DEFAULT if options is NULL. + uint32_t multiple; + + /// Number of input bytes which we have processed and started + /// writing out. 32-bit integer is enough since we care only + /// about the lowest bits when fixing alignment. + uint32_t in_pos; + + /// Number of bytes written out. + uint32_t out_pos; + } alignment; + + struct { + /// Pointer to allocated buffer holding the Data field + /// of Subblock Type "Data". + uint8_t *data; + + /// Number of bytes in the buffer. + size_t size; + + /// Allocated size of the buffer. + size_t limit; + + /// Number of input bytes that we have already read but + /// not yet started writing out. This can be different + /// to `size' when using Subfilter. That's why we track + /// in_pending separately for RLE (see below). + uint32_t in_pending; + } subblock; + + struct { + /// Buffer to hold the data that may be coded with + /// Subblock Type `Repeating Data'. + uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; + + /// Number of bytes in buffer[]. + size_t size; + + /// Number of times the first `size' bytes of buffer[] + /// will be repeated. + uint64_t count; + + /// Like subblock.in_pending above, but for RLE. + uint32_t in_pending; + } rle; + + struct { + enum { + SUB_NONE, + SUB_SET, + SUB_RUN, + SUB_FLUSH, + SUB_FINISH, + SUB_END_MARKER, + } mode; + + /// This is a copy of options->allow_subfilters. We use + /// this to verify that the application doesn't change + /// the value of allow_subfilters. + bool allow; + + /// When this is true, application is not allowed to modify + /// options->subblock_mode. We may still modify it here. + bool mode_locked; + + /// True if we have encoded at least one byte of data with + /// the Subfilter. + bool got_input; + + /// Track the amount of input available once + /// LZMA_SUBFILTER_FINISH has been enabled. + /// This is needed for sanity checking (kind + /// of duplicating what common/code.c does). + size_t in_avail; + + /// Buffer for the Filter Flags field written after + /// the `Set Subfilter' indicator. + uint8_t *flags; + + /// Size of Filter Flags field. + uint32_t flags_size; + + /// Pointers to Subfilter. + lzma_next_coder subcoder; + + } subfilter; + + /// Temporary buffer used when we are not the last filter in the chain. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +/// \brief Aligns the output buffer +/// +/// Aligns the output buffer so that after skew bytes the output position is +/// a multiple of coder->alignment.multiple. +static bool +subblock_align(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + size_t chunk_size, uint32_t skew) +{ + assert(*out_pos < out_size); + + // Fix the alignment only if it makes sense at least a little. + if (chunk_size >= MIN_CHUNK_SIZE_FOR_ALIGN) { + const uint32_t target = coder->alignment.in_pos + % coder->alignment.multiple; + + while ((coder->alignment.out_pos + skew) + % coder->alignment.multiple != target) { + // Zero indicates padding. + write_byte(0x00); + + // Check if output buffer got full and indicate it to + // the caller. + if (*out_pos == out_size) + return true; + } + } + + // Output buffer is not full. + return false; +} + + +/// \brief Checks if buffer contains repeated data +/// +/// \param needle Buffer containing a single repeat chunk +/// \param needle_size Size of needle in bytes +/// \param buf Buffer to search for repeated needles +/// \param buf_chunks Buffer size is buf_chunks * needle_size. +/// +/// \return True if the whole buf is filled with repeated needles. +/// +static bool +is_repeating(const uint8_t *restrict needle, size_t needle_size, + const uint8_t *restrict buf, size_t buf_chunks) +{ + while (buf_chunks-- != 0) { + if (memcmp(buf, needle, needle_size) != 0) + return false; + + buf += needle_size; + } + + return true; +} + + +/// \brief Optimizes the repeating style and updates coder->sequence +static void +subblock_rle_flush(lzma_coder *coder) +{ + // The Subblock decoder can use memset() when the size of the data + // being repeated is one byte, so we check if the RLE buffer is + // filled with a single repeating byte. + if (coder->rle.size > 1) { + const uint8_t b = coder->rle.buffer[0]; + size_t i = 0; + while (true) { + if (coder->rle.buffer[i] != b) + break; + + if (++i == coder->rle.size) { + // TODO Integer overflow check maybe, + // although this needs at least 2**63 bytes + // of input until it gets triggered... + coder->rle.count *= coder->rle.size; + coder->rle.size = 1; + break; + } + } + } + + if (coder->rle.count == 1) { + // The buffer should be repeated only once. It is + // waste of space to use Repeating Data. Instead, + // write a regular Data Subblock. See SEQ_RLE_COUNT_0 + // in subblock_buffer() for more info. + coder->tmp = coder->rle.size - 1; + } else if (coder->rle.count > REPEAT_COUNT_MAX) { + // There's so much to repeat that it doesn't fit into + // 28-bit integer. We will write two or more Subblocks + // of type Repeating Data. + coder->tmp = REPEAT_COUNT_MAX - 1; + } else { + coder->tmp = coder->rle.count - 1; + } + + coder->sequence = SEQ_RLE_COUNT_0; + + return; +} + + +/// \brief Resizes coder->subblock.data for a new size limit +static lzma_ret +subblock_data_size(lzma_coder *coder, lzma_allocator *allocator, + size_t new_limit) +{ + // Verify that the new limit is valid. + if (new_limit < LZMA_SUBBLOCK_DATA_SIZE_MIN + || new_limit > LZMA_SUBBLOCK_DATA_SIZE_MAX) + return LZMA_OPTIONS_ERROR; + + // Ff the new limit is different than the previous one, we need + // to reallocate the data buffer. + if (new_limit != coder->subblock.limit) { + lzma_free(coder->subblock.data, allocator); + coder->subblock.data = lzma_alloc(new_limit, allocator); + if (coder->subblock.data == NULL) + return LZMA_MEM_ERROR; + } + + coder->subblock.limit = new_limit; + + return LZMA_OK; +} + + +static lzma_ret +subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Changing allow_subfilter is not allowed. + if (coder->options != NULL && coder->subfilter.allow + != coder->options->allow_subfilters) + return LZMA_PROG_ERROR; + + // Check if we need to do something special with the Subfilter. + if (coder->subfilter.allow) { + assert(coder->options != NULL); + + // See if subfilter_mode has been changed. + switch (coder->options->subfilter_mode) { + case LZMA_SUBFILTER_NONE: + if (coder->subfilter.mode != SUB_NONE) + return LZMA_PROG_ERROR; + break; + + case LZMA_SUBFILTER_SET: + if (coder->subfilter.mode_locked + || coder->subfilter.mode != SUB_NONE) + return LZMA_PROG_ERROR; + + coder->subfilter.mode = SUB_SET; + coder->subfilter.got_input = false; + + if (coder->sequence == SEQ_FILL) + coder->sequence = SEQ_FLUSH; + + break; + + case LZMA_SUBFILTER_RUN: + if (coder->subfilter.mode != SUB_RUN) + return LZMA_PROG_ERROR; + + break; + + case LZMA_SUBFILTER_FINISH: { + const size_t in_avail = in_size - *in_pos; + + if (coder->subfilter.mode == SUB_RUN) { + if (coder->subfilter.mode_locked) + return LZMA_PROG_ERROR; + + coder->subfilter.mode = SUB_FINISH; + coder->subfilter.in_avail = in_avail; + + } else if (coder->subfilter.mode != SUB_FINISH + || coder->subfilter.in_avail + != in_avail) { + return LZMA_PROG_ERROR; + } + + break; + } + + default: + return LZMA_OPTIONS_ERROR; + } + + // If we are sync-flushing or finishing, the application may + // no longer change subfilter_mode. Note that this check is + // done after checking the new subfilter_mode above; this + // way the application may e.g. set LZMA_SUBFILTER_SET and + // LZMA_SYNC_FLUSH at the same time, but it cannot modify + // subfilter_mode on the later lzma_code() calls before + // we have returned LZMA_STREAM_END. + if (action != LZMA_RUN) + coder->subfilter.mode_locked = true; + } + + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_FILL: + // Grab the new Subblock Data Size and reallocate the buffer. + if (coder->subblock.size == 0 && coder->options != NULL + && coder->options->subblock_data_size + != coder->subblock.limit) + return_if_error(subblock_data_size(coder, + allocator, coder->options + ->subblock_data_size)); + + if (coder->subfilter.mode == SUB_NONE) { + assert(coder->subfilter.subcoder.code == NULL); + + // No Subfilter is enabled, just copy the data as is. + coder->subblock.in_pending += lzma_bufcpy( + in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit); + + // If we ran out of input before the whole buffer + // was filled, return to application. + if (coder->subblock.size < coder->subblock.limit + && action == LZMA_RUN) + return LZMA_OK; + + } else { + assert(coder->options->subfilter_mode + != LZMA_SUBFILTER_SET); + + // Using LZMA_FINISH automatically toggles + // LZMA_SUBFILTER_FINISH. + // + // NOTE: It is possible that application had set + // LZMA_SUBFILTER_SET and LZMA_FINISH at the same + // time. In that case it is possible that we will + // cycle to LZMA_SUBFILTER_RUN, LZMA_SUBFILTER_FINISH, + // and back to LZMA_SUBFILTER_NONE in a single + // Subblock encoder function call. + if (action == LZMA_FINISH) { + coder->options->subfilter_mode + = LZMA_SUBFILTER_FINISH; + coder->subfilter.mode = SUB_FINISH; + } + + const size_t in_start = *in_pos; + + const lzma_ret ret = coder->subfilter.subcoder.code( + coder->subfilter.subcoder.coder, + allocator, in, in_pos, in_size, + coder->subblock.data, + &coder->subblock.size, + coder->subblock.limit, + coder->subfilter.mode == SUB_FINISH + ? LZMA_FINISH : action); + + const size_t in_used = *in_pos - in_start; + coder->subblock.in_pending += in_used; + if (in_used > 0) + coder->subfilter.got_input = true; + + coder->subfilter.in_avail = in_size - *in_pos; + + if (ret == LZMA_STREAM_END) { + // All currently available input must have + // been processed. + assert(*in_pos == in_size); + + // Flush now. Even if coder->subblock.size + // happened to be zero, we still need to go + // to SEQ_FLUSH to possibly finish RLE or + // write the Subfilter Unset indicator. + coder->sequence = SEQ_FLUSH; + + if (coder->subfilter.mode == SUB_RUN) { + // Flushing with Subfilter enabled. + assert(action == LZMA_SYNC_FLUSH); + coder->subfilter.mode = SUB_FLUSH; + break; + } + + // Subfilter finished its job. + assert(coder->subfilter.mode == SUB_FINISH + || action == LZMA_FINISH); + + // At least one byte of input must have been + // encoded with the Subfilter. This is + // required by the file format specification. + if (!coder->subfilter.got_input) + return LZMA_PROG_ERROR; + + // We don't strictly need to do this, but + // doing it sounds like a good idea, because + // otherwise the Subfilter's memory could be + // left allocated for long time, and would + // just waste memory. + lzma_next_end(&coder->subfilter.subcoder, + allocator); + + // We need to flush the currently buffered + // data and write Unset Subfilter marker. + // Note that we cannot set + // coder->options->subfilter_mode to + // LZMA_SUBFILTER_NONE yet, because we + // haven't written the Unset Subfilter + // marker yet. + coder->subfilter.mode = SUB_END_MARKER; + coder->sequence = SEQ_FLUSH; + break; + } + + // Return if we couldn't fill the buffer or + // if an error occurred. + if (coder->subblock.size < coder->subblock.limit + || ret != LZMA_OK) + return ret; + } + + coder->sequence = SEQ_FLUSH; + + // SEQ_FILL doesn't produce any output so falling through + // to SEQ_FLUSH is safe. + assert(*out_pos < out_size); + + // Fall through + + case SEQ_FLUSH: + if (coder->options != NULL) { + // Update the alignment variable. + coder->alignment.multiple = coder->options->alignment; + if (coder->alignment.multiple + < LZMA_SUBBLOCK_ALIGNMENT_MIN + || coder->alignment.multiple + > LZMA_SUBBLOCK_ALIGNMENT_MAX) + return LZMA_OPTIONS_ERROR; + + // Run-length encoder + // + // First check if there is some data pending and we + // have an obvious need to flush it immediately. + if (coder->rle.count > 0 + && (coder->rle.size + != coder->options->rle + || coder->subblock.size + % coder->rle.size)) { + subblock_rle_flush(coder); + break; + } + + // Grab the (possibly new) RLE chunk size and + // validate it. + coder->rle.size = coder->options->rle; + if (coder->rle.size > LZMA_SUBBLOCK_RLE_MAX) + return LZMA_OPTIONS_ERROR; + + if (coder->subblock.size != 0 + && coder->rle.size + != LZMA_SUBBLOCK_RLE_OFF + && coder->subblock.size + % coder->rle.size == 0) { + + // Initialize coder->rle.buffer if we don't + // have RLE already running. + if (coder->rle.count == 0) + memcpy(coder->rle.buffer, + coder->subblock.data, + coder->rle.size); + + // Test if coder->subblock.data is repeating. + // If coder->rle.count would overflow, we + // force flushing. Forced flushing shouldn't + // really happen in real-world situations. + const size_t count = coder->subblock.size + / coder->rle.size; + if (UINT64_MAX - count > coder->rle.count + && is_repeating( + coder->rle.buffer, + coder->rle.size, + coder->subblock.data, + count)) { + coder->rle.count += count; + coder->rle.in_pending += coder + ->subblock.in_pending; + coder->subblock.in_pending = 0; + coder->subblock.size = 0; + + } else if (coder->rle.count > 0) { + // It's not repeating or at least not + // with the same byte sequence as the + // earlier Subblock Data buffers. We + // have some data pending in the RLE + // buffer already, so do a flush. + // Once flushed, we will check again + // if the Subblock Data happens to + // contain a different repeating + // sequence. + subblock_rle_flush(coder); + break; + } + } + } + + // If we now have some data left in coder->subblock, the RLE + // buffer is empty and we must write a regular Subblock Data. + if (coder->subblock.size > 0) { + assert(coder->rle.count == 0); + coder->tmp = coder->subblock.size - 1; + coder->sequence = SEQ_DATA_SIZE_0; + break; + } + + // Check if we should enable Subfilter. + if (coder->subfilter.mode == SUB_SET) { + if (coder->rle.count > 0) + subblock_rle_flush(coder); + else + coder->sequence = SEQ_SUBFILTER_INIT; + break; + } + + // Check if we have just finished Subfiltering. + if (coder->subfilter.mode == SUB_END_MARKER) { + if (coder->rle.count > 0) { + subblock_rle_flush(coder); + break; + } + + coder->options->subfilter_mode = LZMA_SUBFILTER_NONE; + coder->subfilter.mode = SUB_NONE; + + write_byte(0x50); + if (*out_pos == out_size) + return LZMA_OK; + } + + // Check if we have already written everything. + if (action != LZMA_RUN && *in_pos == in_size + && (coder->subfilter.mode == SUB_NONE + || coder->subfilter.mode == SUB_FLUSH)) { + if (coder->rle.count > 0) { + subblock_rle_flush(coder); + break; + } + + if (action == LZMA_SYNC_FLUSH) { + if (coder->subfilter.mode == SUB_FLUSH) + coder->subfilter.mode = SUB_RUN; + + coder->subfilter.mode_locked = false; + coder->sequence = SEQ_FILL; + + } else { + assert(action == LZMA_FINISH); + + // Write EOPM. + // NOTE: No need to use write_byte() here + // since we are finishing. + out[*out_pos] = 0x10; + ++*out_pos; + } + + return LZMA_STREAM_END; + } + + // Otherwise we have more work to do. + coder->sequence = SEQ_FILL; + break; + + case SEQ_RLE_COUNT_0: + assert(coder->rle.count > 0); + + if (coder->rle.count == 1) { + // The buffer should be repeated only once. Fix + // the alignment and write the first byte of + // Subblock Type `Data'. + if (subblock_align(coder, out, out_pos, out_size, + coder->rle.size, ALIGN_SKEW_DATA)) + return LZMA_OK; + + write_byte(0x20 | (coder->tmp & 0x0F)); + + } else { + // We have something to actually repeat, which should + // mean that it takes less space with run-length + // encoding. + if (subblock_align(coder, out, out_pos, out_size, + coder->rle.size, + ALIGN_SKEW_REPEATING_DATA)) + return LZMA_OK; + + write_byte(0x30 | (coder->tmp & 0x0F)); + } + + // NOTE: If we have to write more than one Repeating Data + // due to rle.count > REPEAT_COUNT_MAX, the subsequent + // Repeating Data Subblocks may get wrong alignment, because + // we add rle.in_pending to alignment.in_pos at once instead + // of adding only as much as this particular Repeating Data + // consumed input data. Correct alignment is always restored + // after all the required Repeating Data Subblocks have been + // written. This problem occurs in such a weird cases that + // it's not worth fixing. + coder->alignment.out_pos += coder->rle.size; + coder->alignment.in_pos += coder->rle.in_pending; + coder->rle.in_pending = 0; + + coder->sequence = SEQ_RLE_COUNT_1; + break; + + case SEQ_RLE_COUNT_1: + write_byte(coder->tmp >> 4); + coder->sequence = SEQ_RLE_COUNT_2; + break; + + case SEQ_RLE_COUNT_2: + write_byte(coder->tmp >> 12); + coder->sequence = SEQ_RLE_COUNT_3; + break; + + case SEQ_RLE_COUNT_3: + write_byte(coder->tmp >> 20); + + // Again, see if we are writing regular Data or Repeating Data. + // In the former case, we skip SEQ_RLE_SIZE. + if (coder->rle.count == 1) + coder->sequence = SEQ_RLE_DATA; + else + coder->sequence = SEQ_RLE_SIZE; + + if (coder->rle.count > REPEAT_COUNT_MAX) + coder->rle.count -= REPEAT_COUNT_MAX; + else + coder->rle.count = 0; + + break; + + case SEQ_RLE_SIZE: + assert(coder->rle.size >= LZMA_SUBBLOCK_RLE_MIN); + assert(coder->rle.size <= LZMA_SUBBLOCK_RLE_MAX); + write_byte(coder->rle.size - 1); + coder->sequence = SEQ_RLE_DATA; + break; + + case SEQ_RLE_DATA: + lzma_bufcpy(coder->rle.buffer, &coder->pos, coder->rle.size, + out, out_pos, out_size); + if (coder->pos < coder->rle.size) + return LZMA_OK; + + coder->pos = 0; + coder->sequence = SEQ_FLUSH; + break; + + case SEQ_DATA_SIZE_0: + // We need four bytes for the Size field. + if (subblock_align(coder, out, out_pos, out_size, + coder->subblock.size, ALIGN_SKEW_DATA)) + return LZMA_OK; + + coder->alignment.out_pos += coder->subblock.size; + coder->alignment.in_pos += coder->subblock.in_pending; + coder->subblock.in_pending = 0; + + write_byte(0x20 | (coder->tmp & 0x0F)); + coder->sequence = SEQ_DATA_SIZE_1; + break; + + case SEQ_DATA_SIZE_1: + write_byte(coder->tmp >> 4); + coder->sequence = SEQ_DATA_SIZE_2; + break; + + case SEQ_DATA_SIZE_2: + write_byte(coder->tmp >> 12); + coder->sequence = SEQ_DATA_SIZE_3; + break; + + case SEQ_DATA_SIZE_3: + write_byte(coder->tmp >> 20); + coder->sequence = SEQ_DATA; + break; + + case SEQ_DATA: + lzma_bufcpy(coder->subblock.data, &coder->pos, + coder->subblock.size, out, out_pos, out_size); + if (coder->pos < coder->subblock.size) + return LZMA_OK; + + coder->subblock.size = 0; + coder->pos = 0; + coder->sequence = SEQ_FLUSH; + break; + + case SEQ_SUBFILTER_INIT: { + assert(coder->subblock.size == 0); + assert(coder->subblock.in_pending == 0); + assert(coder->rle.count == 0); + assert(coder->rle.in_pending == 0); + assert(coder->subfilter.mode == SUB_SET); + assert(coder->options != NULL); + + // There must be a filter specified. + if (coder->options->subfilter_options.id == LZMA_VLI_UNKNOWN) + return LZMA_OPTIONS_ERROR; + + // Initialize a raw encoder to work as a Subfilter. + lzma_filter options[2]; + options[0] = coder->options->subfilter_options; + options[1].id = LZMA_VLI_UNKNOWN; + + return_if_error(lzma_raw_encoder_init( + &coder->subfilter.subcoder, allocator, + options)); + + // Encode the Filter Flags field into a buffer. This should + // never fail since we have already successfully initialized + // the Subfilter itself. Check it still, and return + // LZMA_PROG_ERROR instead of whatever the ret would say. + lzma_ret ret = lzma_filter_flags_size( + &coder->subfilter.flags_size, options); + assert(ret == LZMA_OK); + if (ret != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->subfilter.flags = lzma_alloc( + coder->subfilter.flags_size, allocator); + if (coder->subfilter.flags == NULL) + return LZMA_MEM_ERROR; + + // Now we have a big-enough buffer. Encode the Filter Flags. + // Like above, this should never fail. + size_t dummy = 0; + ret = lzma_filter_flags_encode(options, coder->subfilter.flags, + &dummy, coder->subfilter.flags_size); + assert(ret == LZMA_OK); + assert(dummy == coder->subfilter.flags_size); + if (ret != LZMA_OK || dummy != coder->subfilter.flags_size) + return LZMA_PROG_ERROR; + + // Write a Subblock indicating a new Subfilter. + write_byte(0x40); + + coder->options->subfilter_mode = LZMA_SUBFILTER_RUN; + coder->subfilter.mode = SUB_RUN; + coder->alignment.out_pos += coder->subfilter.flags_size; + coder->sequence = SEQ_SUBFILTER_FLAGS; + + // It is safe to fall through because SEQ_SUBFILTER_FLAGS + // uses lzma_bufcpy() which doesn't write unless there is + // output space. + } + + // Fall through + + case SEQ_SUBFILTER_FLAGS: + // Copy the Filter Flags to the output stream. + lzma_bufcpy(coder->subfilter.flags, &coder->pos, + coder->subfilter.flags_size, + out, out_pos, out_size); + if (coder->pos < coder->subfilter.flags_size) + return LZMA_OK; + + lzma_free(coder->subfilter.flags, allocator); + coder->subfilter.flags = NULL; + + coder->pos = 0; + coder->sequence = SEQ_FILL; + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +subblock_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + if (coder->next.code == NULL) + return subblock_buffer(coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + if (ret == LZMA_STREAM_END) { + assert(action != LZMA_RUN); + coder->next_finished = true; + } else if (coder->temp.size == 0 || ret != LZMA_OK) { + return ret; + } + } + + const lzma_ret ret = subblock_buffer(coder, allocator, + coder->temp.buffer, &coder->temp.pos, + coder->temp.size, out, out_pos, out_size, + coder->next_finished ? LZMA_FINISH : LZMA_RUN); + if (ret == LZMA_STREAM_END) { + assert(action != LZMA_RUN); + assert(coder->next_finished); + return LZMA_STREAM_END; + } + + if (ret != LZMA_OK) + return ret; + } + + return LZMA_OK; +} + + +static void +subblock_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_next_end(&coder->subfilter.subcoder, allocator); + lzma_free(coder->subblock.data, allocator); + lzma_free(coder->subfilter.flags, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &subblock_encode; + next->end = &subblock_encoder_end; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->subblock.data = NULL; + next->coder->subblock.limit = 0; + next->coder->subfilter.subcoder = LZMA_NEXT_CODER_INIT; + } else { + lzma_next_end(&next->coder->subfilter.subcoder, + allocator); + lzma_free(next->coder->subfilter.flags, allocator); + } + + next->coder->subfilter.flags = NULL; + + next->coder->next_finished = false; + next->coder->sequence = SEQ_FILL; + next->coder->options = filters[0].options; + next->coder->pos = 0; + + next->coder->alignment.in_pos = 0; + next->coder->alignment.out_pos = 0; + next->coder->subblock.size = 0; + next->coder->subblock.in_pending = 0; + next->coder->rle.count = 0; + next->coder->rle.in_pending = 0; + next->coder->subfilter.mode = SUB_NONE; + next->coder->subfilter.mode_locked = false; + + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + // Grab some values from the options structure if it is available. + size_t subblock_size_limit; + if (next->coder->options != NULL) { + if (next->coder->options->alignment + < LZMA_SUBBLOCK_ALIGNMENT_MIN + || next->coder->options->alignment + > LZMA_SUBBLOCK_ALIGNMENT_MAX) { + subblock_encoder_end(next->coder, allocator); + return LZMA_OPTIONS_ERROR; + } + next->coder->alignment.multiple + = next->coder->options->alignment; + next->coder->subfilter.allow + = next->coder->options->allow_subfilters; + subblock_size_limit = next->coder->options->subblock_data_size; + } else { + next->coder->alignment.multiple + = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT; + next->coder->subfilter.allow = false; + subblock_size_limit = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT; + } + + return_if_error(subblock_data_size(next->coder, allocator, + subblock_size_limit)); + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} Index: contrib/xz/src/liblzma/subblock/subblock_decoder_helper.c =================================================================== --- contrib/xz/src/liblzma/subblock/subblock_decoder_helper.c (revision 0) +++ contrib/xz/src/liblzma/subblock/subblock_decoder_helper.c (revision 0) @@ -0,0 +1,70 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder_helper.c +/// \brief Helper filter for the Subblock decoder +/// +/// This filter is used to indicate End of Input for subfilters needing it. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_decoder_helper.h" + + +struct lzma_coder_s { + const lzma_options_subblock_helper *options; +}; + + +static lzma_ret +helper_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action lzma_attribute((unused))) +{ + // If end_was_reached is true, we cannot have any input. + assert(!coder->options->end_was_reached || *in_pos == in_size); + + // We can safely copy as much as possible, because we are never + // given more data than a single Subblock Data field. + lzma_bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Return LZMA_STREAM_END when instructed so by the Subblock decoder. + return coder->options->end_was_reached ? LZMA_STREAM_END : LZMA_OK; +} + + +static void +helper_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_decoder_helper_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + // This is always the last filter in the chain. + assert(filters[1].init == NULL); + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &helper_decode; + next->end = &helper_end; + } + + next->coder->options = filters[0].options; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/subblock/subblock_encoder.h =================================================================== --- contrib/xz/src/liblzma/subblock/subblock_encoder.h (revision 0) +++ contrib/xz/src/liblzma/subblock/subblock_encoder.h (revision 0) @@ -0,0 +1,21 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_encoder.h +/// \brief Encoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_ENCODER_H +#define LZMA_SUBBLOCK_ENCODER_H + +#include "common.h" + +extern lzma_ret lzma_subblock_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif Index: contrib/xz/src/liblzma/subblock/subblock_decoder.c =================================================================== --- contrib/xz/src/liblzma/subblock/subblock_decoder.c (revision 0) +++ contrib/xz/src/liblzma/subblock/subblock_decoder.c (revision 0) @@ -0,0 +1,630 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder.c +/// \brief Decoder of the Subblock filter +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "subblock_decoder.h" +#include "subblock_decoder_helper.h" +#include "filter_decoder.h" + + +/// Maximum number of consecutive Subblocks with Subblock Type Padding +#define PADDING_MAX 31 + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + // These require that there is at least one input + // byte available. + SEQ_FLAGS, + SEQ_FILTER_FLAGS, + SEQ_FILTER_END, + SEQ_REPEAT_COUNT_1, + SEQ_REPEAT_COUNT_2, + SEQ_REPEAT_COUNT_3, + SEQ_REPEAT_SIZE, + SEQ_REPEAT_READ_DATA, + SEQ_SIZE_1, + SEQ_SIZE_2, + SEQ_SIZE_3, // This must be right before SEQ_DATA. + + // These don't require any input to be available. + SEQ_DATA, + SEQ_REPEAT_FAST, + SEQ_REPEAT_NORMAL, + } sequence; + + /// Number of bytes left in the current Subblock Data field. + size_t size; + + /// Number of consecutive Subblocks with Subblock Type Padding + uint32_t padding; + + /// True when .next.code() has returned LZMA_STREAM_END. + bool next_finished; + + /// True when the Subblock decoder has detected End of Payload Marker. + /// This may become true before next_finished becomes true. + bool this_finished; + + /// True if Subfilters are allowed. + bool allow_subfilters; + + /// Indicates if at least one byte of decoded output has been + /// produced after enabling Subfilter. + bool got_output_with_subfilter; + + /// Possible subfilter + lzma_next_coder subfilter; + + /// Filter Flags decoder is needed to parse the ID and Properties + /// of the subfilter. + lzma_next_coder filter_flags_decoder; + + /// The filter_flags_decoder stores its results here. + lzma_filter filter_flags; + + /// Options for the Subblock decoder helper. This is used to tell + /// the helper when it should return LZMA_STREAM_END to the subfilter. + lzma_options_subblock_helper helper; + + struct { + /// How many times buffer should be repeated + size_t count; + + /// Size of the buffer + size_t size; + + /// Position in the buffer + size_t pos; + + /// Buffer to hold the data to be repeated + uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; + } repeat; + + /// Temporary buffer needed when the Subblock filter is not the last + /// filter in the chain. The output of the next filter is first + /// decoded into buffer[], which is then used as input for the actual + /// Subblock decoder. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +/// Values of valid Subblock Flags +enum { + FLAG_PADDING, + FLAG_EOPM, + FLAG_DATA, + FLAG_REPEAT, + FLAG_SET_SUBFILTER, + FLAG_END_SUBFILTER, +}; + + +/// Calls the subfilter and updates coder->uncompressed_size. +static lzma_ret +subfilter_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(coder->subfilter.code != NULL); + + // Call the subfilter. + const lzma_ret ret = coder->subfilter.code( + coder->subfilter.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, action); + + return ret; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + while (*out_pos < out_size && (*in_pos < in_size + || coder->sequence >= SEQ_DATA)) + switch (coder->sequence) { + case SEQ_FLAGS: { + // Do the correct action depending on the Subblock Type. + switch (in[*in_pos] >> 4) { + case FLAG_PADDING: + // Only check that reserved bits are zero. + if (++coder->padding > PADDING_MAX + || in[*in_pos] & 0x0F) + return LZMA_DATA_ERROR; + ++*in_pos; + break; + + case FLAG_EOPM: + // There must be no Padding before EOPM. + if (coder->padding != 0) + return LZMA_DATA_ERROR; + + // Check that reserved bits are zero. + if (in[*in_pos] & 0x0F) + return LZMA_DATA_ERROR; + + // There must be no Subfilter enabled. + if (coder->subfilter.code != NULL) + return LZMA_DATA_ERROR; + + ++*in_pos; + return LZMA_STREAM_END; + + case FLAG_DATA: + // First four bits of the Subblock Data size. + coder->size = in[*in_pos] & 0x0F; + ++*in_pos; + coder->got_output_with_subfilter = true; + coder->sequence = SEQ_SIZE_1; + break; + + case FLAG_REPEAT: + // First four bits of the Repeat Count. We use + // coder->size as a temporary place for it. + coder->size = in[*in_pos] & 0x0F; + ++*in_pos; + coder->got_output_with_subfilter = true; + coder->sequence = SEQ_REPEAT_COUNT_1; + break; + + case FLAG_SET_SUBFILTER: { + if (coder->padding != 0 || (in[*in_pos] & 0x0F) + || coder->subfilter.code != NULL + || !coder->allow_subfilters) + return LZMA_DATA_ERROR; + + assert(coder->filter_flags.options == NULL); + abort(); +// return_if_error(lzma_filter_flags_decoder_init( +// &coder->filter_flags_decoder, +// allocator, &coder->filter_flags)); + + coder->got_output_with_subfilter = false; + + ++*in_pos; + coder->sequence = SEQ_FILTER_FLAGS; + break; + } + + case FLAG_END_SUBFILTER: { + if (coder->padding != 0 || (in[*in_pos] & 0x0F) + || coder->subfilter.code == NULL + || !coder->got_output_with_subfilter) + return LZMA_DATA_ERROR; + + // Tell the helper filter to indicate End of Input + // to our subfilter. + coder->helper.end_was_reached = true; + + size_t dummy = 0; + const lzma_ret ret = subfilter_decode(coder, allocator, + NULL, &dummy, 0, out, out_pos,out_size, + action); + + // If we didn't reach the end of the subfilter's output + // yet, return to the application. On the next call we + // will get to this same switch-case again, because we + // haven't updated *in_pos yet. + if (ret != LZMA_STREAM_END) + return ret; + + // Free Subfilter's memory. This is a bit debatable, + // since we could avoid some malloc()/free() calls + // if the same Subfilter gets used soon again. But + // if Subfilter isn't used again, we could leave + // a memory-hogging filter dangling until someone + // frees Subblock filter itself. + lzma_next_end(&coder->subfilter, allocator); + + // Free memory used for subfilter options. This is + // safe, because we don't support any Subfilter that + // would allow pointers in the options structure. + lzma_free(coder->filter_flags.options, allocator); + coder->filter_flags.options = NULL; + + ++*in_pos; + + break; + } + + default: + return LZMA_DATA_ERROR; + } + + break; + } + + case SEQ_FILTER_FLAGS: { + const lzma_ret ret = coder->filter_flags_decoder.code( + coder->filter_flags_decoder.coder, allocator, + in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret == LZMA_OPTIONS_ERROR + ? LZMA_DATA_ERROR : ret; + + // Don't free the filter_flags_decoder. It doesn't take much + // memory and we may need it again. + + // Initialize the Subfilter. Subblock and Copy filters are + // not allowed. + if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK) + return LZMA_DATA_ERROR; + + coder->helper.end_was_reached = false; + + lzma_filter filters[3] = { + { + .id = coder->filter_flags.id, + .options = coder->filter_flags.options, + }, { + .id = LZMA_FILTER_SUBBLOCK_HELPER, + .options = &coder->helper, + }, { + .id = LZMA_VLI_UNKNOWN, + .options = NULL, + } + }; + + // Optimization: We know that LZMA uses End of Payload Marker + // (not End of Input), so we can omit the helper filter. + if (filters[0].id == LZMA_FILTER_LZMA1) + filters[1].id = LZMA_VLI_UNKNOWN; + + return_if_error(lzma_raw_decoder_init( + &coder->subfilter, allocator, filters)); + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_FILTER_END: + // We are in the beginning of a Subblock. The next Subblock + // whose type is not Padding, must indicate end of Subfilter. + if (in[*in_pos] == (FLAG_PADDING << 4)) { + ++*in_pos; + break; + } + + if (in[*in_pos] != (FLAG_END_SUBFILTER << 4)) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_FLAGS; + break; + + case SEQ_REPEAT_COUNT_1: + case SEQ_SIZE_1: + // We use the same code to parse + // - the Size (28 bits) in Subblocks of type Data; and + // - the Repeat count (28 bits) in Subblocks of type + // Repeating Data. + coder->size |= (size_t)(in[*in_pos]) << 4; + ++*in_pos; + ++coder->sequence; + break; + + case SEQ_REPEAT_COUNT_2: + case SEQ_SIZE_2: + coder->size |= (size_t)(in[*in_pos]) << 12; + ++*in_pos; + ++coder->sequence; + break; + + case SEQ_REPEAT_COUNT_3: + case SEQ_SIZE_3: + coder->size |= (size_t)(in[*in_pos]) << 20; + ++*in_pos; + + // The real value is the stored value plus one. + ++coder->size; + + // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why + // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence. + ++coder->sequence; + break; + + case SEQ_REPEAT_SIZE: + // Move the Repeat Count to the correct variable and parse + // the Size of the Data to be repeated. + coder->repeat.count = coder->size; + coder->repeat.size = (size_t)(in[*in_pos]) + 1; + coder->repeat.pos = 0; + + // The size of the Data field must be bigger than the number + // of Padding bytes before this Subblock. + if (coder->repeat.size <= coder->padding) + return LZMA_DATA_ERROR; + + ++*in_pos; + coder->padding = 0; + coder->sequence = SEQ_REPEAT_READ_DATA; + break; + + case SEQ_REPEAT_READ_DATA: { + // Fill coder->repeat.buffer[]. + const size_t in_avail = in_size - *in_pos; + const size_t out_avail + = coder->repeat.size - coder->repeat.pos; + const size_t copy_size = MIN(in_avail, out_avail); + + memcpy(coder->repeat.buffer + coder->repeat.pos, + in + *in_pos, copy_size); + *in_pos += copy_size; + coder->repeat.pos += copy_size; + + if (coder->repeat.pos == coder->repeat.size) { + coder->repeat.pos = 0; + + if (coder->repeat.size == 1 + && coder->subfilter.code == NULL) + coder->sequence = SEQ_REPEAT_FAST; + else + coder->sequence = SEQ_REPEAT_NORMAL; + } + + break; + } + + case SEQ_DATA: { + // The size of the Data field must be bigger than the number + // of Padding bytes before this Subblock. + assert(coder->size > 0); + if (coder->size <= coder->padding) + return LZMA_DATA_ERROR; + + coder->padding = 0; + + // Limit the amount of input to match the available + // Subblock Data size. + size_t in_limit; + if (in_size - *in_pos > coder->size) + in_limit = *in_pos + coder->size; + else + in_limit = in_size; + + if (coder->subfilter.code == NULL) { + const size_t copy_size = lzma_bufcpy( + in, in_pos, in_limit, + out, out_pos, out_size); + + coder->size -= copy_size; + } else { + const size_t in_start = *in_pos; + const lzma_ret ret = subfilter_decode( + coder, allocator, + in, in_pos, in_limit, + out, out_pos, out_size, + action); + + // Update the number of unprocessed bytes left in + // this Subblock. This assert() is true because + // in_limit prevents *in_pos getting too big. + assert(*in_pos - in_start <= coder->size); + coder->size -= *in_pos - in_start; + + if (ret == LZMA_STREAM_END) { + // End of Subfilter can occur only at + // a Subblock boundary. + if (coder->size != 0) + return LZMA_DATA_ERROR; + + // We need a Subblock with Unset + // Subfilter before more data. + coder->sequence = SEQ_FILTER_END; + break; + } + + if (ret != LZMA_OK) + return ret; + } + + // If we couldn't process the whole Subblock Data yet, return. + if (coder->size > 0) + return LZMA_OK; + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_REPEAT_FAST: { + // Optimization for cases when there is only one byte to + // repeat and no Subfilter. + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(coder->repeat.count, out_avail); + + memset(out + *out_pos, coder->repeat.buffer[0], copy_size); + + *out_pos += copy_size; + coder->repeat.count -= copy_size; + + if (coder->repeat.count != 0) + return LZMA_OK; + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_REPEAT_NORMAL: + do { + // Cycle the repeat buffer if needed. + if (coder->repeat.pos == coder->repeat.size) { + if (--coder->repeat.count == 0) { + coder->sequence = SEQ_FLAGS; + break; + } + + coder->repeat.pos = 0; + } + + if (coder->subfilter.code == NULL) { + lzma_bufcpy(coder->repeat.buffer, + &coder->repeat.pos, + coder->repeat.size, + out, out_pos, out_size); + } else { + const lzma_ret ret = subfilter_decode( + coder, allocator, + coder->repeat.buffer, + &coder->repeat.pos, + coder->repeat.size, + out, out_pos, out_size, + action); + + if (ret == LZMA_STREAM_END) { + // End of Subfilter can occur only at + // a Subblock boundary. + if (coder->repeat.pos + != coder->repeat.size + || --coder->repeat + .count != 0) + return LZMA_DATA_ERROR; + + // We need a Subblock with Unset + // Subfilter before more data. + coder->sequence = SEQ_FILTER_END; + break; + + } else if (ret != LZMA_OK) { + return ret; + } + } + } while (*out_pos < out_size); + + break; + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static lzma_ret +subblock_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + if (coder->next.code == NULL) + return decode_buffer(coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + while (*out_pos < out_size) { + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->next_finished = true; + else if (coder->temp.size == 0 || ret != LZMA_OK) + return ret; + } + + if (coder->this_finished) { + if (coder->temp.pos != coder->temp.size) + return LZMA_DATA_ERROR; + + if (coder->next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + const lzma_ret ret = decode_buffer(coder, allocator, + coder->temp.buffer, &coder->temp.pos, + coder->temp.size, + out, out_pos, out_size, action); + + if (ret == LZMA_STREAM_END) + // The next coder in the chain hasn't finished + // yet. If the input data is valid, there + // must be no more output coming, but the + // next coder may still need a litle more + // input to detect End of Payload Marker. + coder->this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +static void +subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_next_end(&coder->subfilter, allocator); + lzma_next_end(&coder->filter_flags_decoder, allocator); + lzma_free(coder->filter_flags.options, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &subblock_decode; + next->end = &subblock_decoder_end; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->subfilter = LZMA_NEXT_CODER_INIT; + next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; + + } else { + lzma_next_end(&next->coder->subfilter, allocator); + lzma_free(next->coder->filter_flags.options, allocator); + } + + next->coder->filter_flags.options = NULL; + + next->coder->sequence = SEQ_FLAGS; + next->coder->padding = 0; + next->coder->next_finished = false; + next->coder->this_finished = false; + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + if (filters[0].options != NULL) + next->coder->allow_subfilters = ((lzma_options_subblock *)( + filters[0].options))->allow_subfilters; + else + next->coder->allow_subfilters = false; + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} Index: contrib/xz/src/liblzma/subblock/subblock_decoder_helper.h =================================================================== --- contrib/xz/src/liblzma/subblock/subblock_decoder_helper.h (revision 0) +++ contrib/xz/src/liblzma/subblock/subblock_decoder_helper.h (revision 0) @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file subblock_decoder_helper.h +/// \brief Helper filter for the Subblock decoder +/// +/// This filter is used to indicate End of Input for subfilters needing it. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SUBBLOCK_DECODER_HELPER_H +#define LZMA_SUBBLOCK_DECODER_HELPER_H + +#include "common.h" + + +typedef struct { + bool end_was_reached; +} lzma_options_subblock_helper; + + +extern lzma_ret lzma_subblock_decoder_helper_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif Index: contrib/xz/src/liblzma/lz/lz_decoder.h =================================================================== --- contrib/xz/src/liblzma/lz/lz_decoder.h (revision 0) +++ contrib/xz/src/liblzma/lz/lz_decoder.h (revision 0) @@ -0,0 +1,234 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.h +/// \brief LZ out window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_DECODER_H +#define LZMA_LZ_DECODER_H + +#include "common.h" + + +typedef struct { + /// Pointer to the dictionary buffer. It can be an allocated buffer + /// internal to liblzma, or it can a be a buffer given by the + /// application when in single-call mode (not implemented yet). + uint8_t *buf; + + /// Write position in dictionary. The next byte will be written to + /// buf[pos]. + size_t pos; + + /// Indicates how full the dictionary is. This is used by + /// dict_is_distance_valid() to detect corrupt files that would + /// read beyond the beginning of the dictionary. + size_t full; + + /// Write limit + size_t limit; + + /// Size of the dictionary + size_t size; + + /// True when dictionary should be reset before decoding more data. + bool need_reset; + +} lzma_dict; + + +typedef struct { + size_t dict_size; + const uint8_t *preset_dict; + size_t preset_dict_size; +} lzma_lz_options; + + +typedef struct { + /// Data specific to the LZ-based decoder + lzma_coder *coder; + + /// Function to decode from in[] to *dict + lzma_ret (*code)(lzma_coder *restrict coder, + lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size); + + void (*reset)(lzma_coder *coder, const void *options); + + /// Set the uncompressed size + void (*set_uncompressed)(lzma_coder *coder, + lzma_vli uncompressed_size); + + /// Free allocated resources + void (*end)(lzma_coder *coder, lzma_allocator *allocator); + +} lzma_lz_decoder; + + +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ \ + .coder = NULL, \ + .code = NULL, \ + .reset = NULL, \ + .set_uncompressed = NULL, \ + .end = NULL, \ + } + + +extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)); + +extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); + +extern void lzma_lz_decoder_uncompressed( + lzma_coder *coder, lzma_vli uncompressed_size); + + +////////////////////// +// Inline functions // +////////////////////// + +/// Get a byte from the history buffer. +static inline uint8_t +dict_get(const lzma_dict *const dict, const uint32_t distance) +{ + return dict->buf[dict->pos - distance - 1 + + (distance < dict->pos ? 0 : dict->size)]; +} + + +/// Test if dictionary is empty. +static inline bool +dict_is_empty(const lzma_dict *const dict) +{ + return dict->full == 0; +} + + +/// Validate the match distance +static inline bool +dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) +{ + return dict->full > distance; +} + + +/// Repeat *len bytes at distance. +static inline bool +dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) +{ + // Don't write past the end of the dictionary. + const size_t dict_avail = dict->limit - dict->pos; + uint32_t left = MIN(dict_avail, *len); + *len -= left; + + // Repeat a block of data from the history. Because memcpy() is faster + // than copying byte by byte in a loop, the copying process gets split + // into three cases. + if (distance < left) { + // Source and target areas overlap, thus we can't use + // memcpy() nor even memmove() safely. + do { + dict->buf[dict->pos] = dict_get(dict, distance); + ++dict->pos; + } while (--left > 0); + + } else if (distance < dict->pos) { + // The easiest and fastest case + memcpy(dict->buf + dict->pos, + dict->buf + dict->pos - distance - 1, + left); + dict->pos += left; + + } else { + // The bigger the dictionary, the more rare this + // case occurs. We need to "wrap" the dict, thus + // we might need two memcpy() to copy all the data. + assert(dict->full == dict->size); + const uint32_t copy_pos + = dict->pos - distance - 1 + dict->size; + uint32_t copy_size = dict->size - copy_pos; + + if (copy_size < left) { + memmove(dict->buf + dict->pos, dict->buf + copy_pos, + copy_size); + dict->pos += copy_size; + copy_size = left - copy_size; + memcpy(dict->buf + dict->pos, dict->buf, copy_size); + dict->pos += copy_size; + } else { + memmove(dict->buf + dict->pos, dict->buf + copy_pos, + left); + dict->pos += left; + } + } + + // Update how full the dictionary is. + if (dict->full < dict->pos) + dict->full = dict->pos; + + return unlikely(*len != 0); +} + + +/// Puts one byte into the dictionary. Returns true if the dictionary was +/// already full and the byte couldn't be added. +static inline bool +dict_put(lzma_dict *dict, uint8_t byte) +{ + if (unlikely(dict->pos == dict->limit)) + return true; + + dict->buf[dict->pos++] = byte; + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return false; +} + + +/// Copies arbitrary amount of data into the dictionary. +static inline void +dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + size_t *restrict left) +{ + // NOTE: If we are being given more data than the size of the + // dictionary, it could be possible to optimize the LZ decoder + // so that not everything needs to go through the dictionary. + // This shouldn't be very common thing in practice though, and + // the slowdown of one extra memcpy() isn't bad compared to how + // much time it would have taken if the data were compressed. + + if (in_size - *in_pos > *left) + in_size = *in_pos + *left; + + *left -= lzma_bufcpy(in, in_pos, in_size, + dict->buf, &dict->pos, dict->limit); + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return; +} + + +static inline void +dict_reset(lzma_dict *dict) +{ + dict->need_reset = true; + return; +} + +#endif Index: contrib/xz/src/liblzma/lz/lz_encoder_hash_table.h =================================================================== --- contrib/xz/src/liblzma/lz/lz_encoder_hash_table.h (revision 0) +++ contrib/xz/src/liblzma/lz/lz_encoder_hash_table.h (revision 0) @@ -0,0 +1,68 @@ +/* This file has been automatically generated by crc32_tablegen.c. */ + +const uint32_t lzma_lz_hash_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D +}; Index: contrib/xz/src/liblzma/lz/lz_encoder.c =================================================================== --- contrib/xz/src/liblzma/lz/lz_encoder.c (revision 0) +++ contrib/xz/src/liblzma/lz/lz_encoder.c (revision 0) @@ -0,0 +1,578 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.c +/// \brief LZ in window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lz_encoder_hash.h" + +// See lz_encoder_hash.h. This is a bit hackish but avoids making +// endianness a conditional in makefiles. +#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) +# include "lz_encoder_hash_table.h" +#endif + + +struct lzma_coder_s { + /// LZ-based encoder e.g. LZMA + lzma_lz_encoder lz; + + /// History buffer and match finder + lzma_mf mf; + + /// Next coder in the chain + lzma_next_coder next; +}; + + +/// \brief Moves the data in the input window to free space for new data +/// +/// mf->buffer is a sliding input window, which keeps mf->keep_size_before +/// bytes of input history available all the time. Now and then we need to +/// "slide" the buffer to make space for the new data to the end of the +/// buffer. At the same time, data older than keep_size_before is dropped. +/// +static void +move_window(lzma_mf *mf) +{ + // Align the move to a multiple of 16 bytes. Some LZ-based encoders + // like LZMA use the lowest bits of mf->read_pos to know the + // alignment of the uncompressed data. We also get better speed + // for memmove() with aligned buffers. + assert(mf->read_pos > mf->keep_size_before); + const uint32_t move_offset + = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); + + assert(mf->write_pos > move_offset); + const size_t move_size = mf->write_pos - move_offset; + + assert(move_offset + move_size <= mf->size); + + memmove(mf->buffer, mf->buffer + move_offset, move_size); + + mf->offset += move_offset; + mf->read_pos -= move_offset; + mf->read_limit -= move_offset; + mf->write_pos -= move_offset; + + return; +} + + +/// \brief Tries to fill the input window (mf->buffer) +/// +/// If we are the last encoder in the chain, our input data is in in[]. +/// Otherwise we call the next filter in the chain to process in[] and +/// write its output to mf->buffer. +/// +/// This function must not be called once it has returned LZMA_STREAM_END. +/// +static lzma_ret +fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, + size_t *in_pos, size_t in_size, lzma_action action) +{ + assert(coder->mf.read_pos <= coder->mf.write_pos); + + // Move the sliding window if needed. + if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) + move_window(&coder->mf); + + // Maybe this is ugly, but lzma_mf uses uint32_t for most things + // (which I find cleanest), but we need size_t here when filling + // the history window. + size_t write_pos = coder->mf.write_pos; + lzma_ret ret; + if (coder->next.code == NULL) { + // Not using a filter, simply memcpy() as much as possible. + lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, + &write_pos, coder->mf.size); + + ret = action != LZMA_RUN && *in_pos == in_size + ? LZMA_STREAM_END : LZMA_OK; + + } else { + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, + coder->mf.buffer, &write_pos, + coder->mf.size, action); + } + + coder->mf.write_pos = write_pos; + + // If end of stream has been reached or flushing completed, we allow + // the encoder to process all the input (that is, read_pos is allowed + // to reach write_pos). Otherwise we keep keep_size_after bytes + // available as prebuffer. + if (ret == LZMA_STREAM_END) { + assert(*in_pos == in_size); + ret = LZMA_OK; + coder->mf.action = action; + coder->mf.read_limit = coder->mf.write_pos; + + } else if (coder->mf.write_pos > coder->mf.keep_size_after) { + // This needs to be done conditionally, because if we got + // only little new input, there may be too little input + // to do any encoding yet. + coder->mf.read_limit = coder->mf.write_pos + - coder->mf.keep_size_after; + } + + // Restart the match finder after finished LZMA_SYNC_FLUSH. + if (coder->mf.pending > 0 + && coder->mf.read_pos < coder->mf.read_limit) { + // Match finder may update coder->pending and expects it to + // start from zero, so use a temporary variable. + const size_t pending = coder->mf.pending; + coder->mf.pending = 0; + + // Rewind read_pos so that the match finder can hash + // the pending bytes. + assert(coder->mf.read_pos >= pending); + coder->mf.read_pos -= pending; + + // Call the skip function directly instead of using + // mf_skip(), since we don't want to touch mf->read_ahead. + coder->mf.skip(&coder->mf, pending); + } + + return ret; +} + + +static lzma_ret +lz_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action) +{ + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + // Read more data to coder->mf.buffer if needed. + if (coder->mf.action == LZMA_RUN && coder->mf.read_pos + >= coder->mf.read_limit) + return_if_error(fill_window(coder, allocator, + in, in_pos, in_size, action)); + + // Encode + const lzma_ret ret = coder->lz.code(coder->lz.coder, + &coder->mf, out, out_pos, out_size); + if (ret != LZMA_OK) { + // Setting this to LZMA_RUN for cases when we are + // flushing. It doesn't matter when finishing or if + // an error occurred. + coder->mf.action = LZMA_RUN; + return ret; + } + } + + return LZMA_OK; +} + + +static bool +lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, + const lzma_lz_options *lz_options) +{ + // For now, the dictionary size is limited to 1.5 GiB. This may grow + // in the future if needed, but it needs a little more work than just + // changing this check. + if (lz_options->dict_size < LZMA_DICT_SIZE_MIN + || lz_options->dict_size + > (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + || lz_options->nice_len > lz_options->match_len_max) + return true; + + mf->keep_size_before = lz_options->before_size + lz_options->dict_size; + + mf->keep_size_after = lz_options->after_size + + lz_options->match_len_max; + + // To avoid constant memmove()s, allocate some extra space. Since + // memmove()s become more expensive when the size of the buffer + // increases, we reserve more space when a large dictionary is + // used to make the memmove() calls rarer. + // + // This works with dictionaries up to about 3 GiB. If bigger + // dictionary is wanted, some extra work is needed: + // - Several variables in lzma_mf have to be changed from uint32_t + // to size_t. + // - Memory usage calculation needs something too, e.g. use uint64_t + // for mf->size. + uint32_t reserve = lz_options->dict_size / 2; + if (reserve > (UINT32_C(1) << 30)) + reserve /= 2; + + reserve += (lz_options->before_size + lz_options->match_len_max + + lz_options->after_size) / 2 + (UINT32_C(1) << 19); + + const uint32_t old_size = mf->size; + mf->size = mf->keep_size_before + reserve + mf->keep_size_after; + + // Deallocate the old history buffer if it exists but has different + // size than what is needed now. + if (mf->buffer != NULL && old_size != mf->size) { + lzma_free(mf->buffer, allocator); + mf->buffer = NULL; + } + + // Match finder options + mf->match_len_max = lz_options->match_len_max; + mf->nice_len = lz_options->nice_len; + + // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't + // mean limiting dictionary size to less than 2 GiB. With a match + // finder that uses multibyte resolution (hashes start at e.g. every + // fourth byte), cyclic_size would stay below 2 Gi even when + // dictionary size is greater than 2 GiB. + // + // It would be possible to allow cyclic_size >= 2 Gi, but then we + // would need to be careful to use 64-bit types in various places + // (size_t could do since we would need bigger than 32-bit address + // space anyway). It would also require either zeroing a multigigabyte + // buffer at initialization (waste of time and RAM) or allow + // normalization in lz_encoder_mf.c to access uninitialized + // memory to keep the code simpler. The current way is simple and + // still allows pretty big dictionaries, so I don't expect these + // limits to change. + mf->cyclic_size = lz_options->dict_size + 1; + + // Validate the match finder ID and setup the function pointers. + switch (lz_options->match_finder) { +#ifdef HAVE_MF_HC3 + case LZMA_MF_HC3: + mf->find = &lzma_mf_hc3_find; + mf->skip = &lzma_mf_hc3_skip; + break; +#endif +#ifdef HAVE_MF_HC4 + case LZMA_MF_HC4: + mf->find = &lzma_mf_hc4_find; + mf->skip = &lzma_mf_hc4_skip; + break; +#endif +#ifdef HAVE_MF_BT2 + case LZMA_MF_BT2: + mf->find = &lzma_mf_bt2_find; + mf->skip = &lzma_mf_bt2_skip; + break; +#endif +#ifdef HAVE_MF_BT3 + case LZMA_MF_BT3: + mf->find = &lzma_mf_bt3_find; + mf->skip = &lzma_mf_bt3_skip; + break; +#endif +#ifdef HAVE_MF_BT4 + case LZMA_MF_BT4: + mf->find = &lzma_mf_bt4_find; + mf->skip = &lzma_mf_bt4_skip; + break; +#endif + + default: + return true; + } + + // Calculate the sizes of mf->hash and mf->son and check that + // nice_len is big enough for the selected match finder. + const uint32_t hash_bytes = lz_options->match_finder & 0x0F; + if (hash_bytes > mf->nice_len) + return true; + + const bool is_bt = (lz_options->match_finder & 0x10) != 0; + uint32_t hs; + + if (hash_bytes == 2) { + hs = 0xFFFF; + } else { + // Round dictionary size up to the next 2^n - 1 so it can + // be used as a hash mask. + hs = lz_options->dict_size - 1; + hs |= hs >> 1; + hs |= hs >> 2; + hs |= hs >> 4; + hs |= hs >> 8; + hs >>= 1; + hs |= 0xFFFF; + + if (hs > (UINT32_C(1) << 24)) { + if (hash_bytes == 3) + hs = (UINT32_C(1) << 24) - 1; + else + hs >>= 1; + } + } + + mf->hash_mask = hs; + + ++hs; + if (hash_bytes > 2) + hs += HASH_2_SIZE; + if (hash_bytes > 3) + hs += HASH_3_SIZE; +/* + No match finder uses this at the moment. + if (mf->hash_bytes > 4) + hs += HASH_4_SIZE; +*/ + + // If the above code calculating hs is modified, make sure that + // this assertion stays valid (UINT32_MAX / 5 is not strictly the + // exact limit). If it doesn't, you need to calculate that + // hash_size_sum + sons_count cannot overflow. + assert(hs < UINT32_MAX / 5); + + const uint32_t old_count = mf->hash_size_sum + mf->sons_count; + mf->hash_size_sum = hs; + mf->sons_count = mf->cyclic_size; + if (is_bt) + mf->sons_count *= 2; + + const uint32_t new_count = mf->hash_size_sum + mf->sons_count; + + // Deallocate the old hash array if it exists and has different size + // than what is needed now. + if (mf->hash != NULL && old_count != new_count) { + lzma_free(mf->hash, allocator); + mf->hash = NULL; + } + + // Maximum number of match finder cycles + mf->depth = lz_options->depth; + if (mf->depth == 0) { + mf->depth = 16 + (mf->nice_len / 2); + if (!is_bt) + mf->depth /= 2; + } + + return false; +} + + +static bool +lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator, + const lzma_lz_options *lz_options) +{ + // Allocate the history buffer. + if (mf->buffer == NULL) { + mf->buffer = lzma_alloc(mf->size, allocator); + if (mf->buffer == NULL) + return true; + } + + // Use cyclic_size as initial mf->offset. This allows + // avoiding a few branches in the match finders. The downside is + // that match finder needs to be normalized more often, which may + // hurt performance with huge dictionaries. + mf->offset = mf->cyclic_size; + mf->read_pos = 0; + mf->read_ahead = 0; + mf->read_limit = 0; + mf->write_pos = 0; + mf->pending = 0; + + // Allocate match finder's hash array. + const size_t alloc_count = mf->hash_size_sum + mf->sons_count; + +#if UINT32_MAX >= SIZE_MAX / 4 + // Check for integer overflow. (Huge dictionaries are not + // possible on 32-bit CPU.) + if (alloc_count > SIZE_MAX / sizeof(uint32_t)) + return true; +#endif + + if (mf->hash == NULL) { + mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t), + allocator); + if (mf->hash == NULL) + return true; + } + + mf->son = mf->hash + mf->hash_size_sum; + mf->cyclic_pos = 0; + + // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we + // can use memset(). +/* + for (uint32_t i = 0; i < hash_size_sum; ++i) + mf->hash[i] = EMPTY_HASH_VALUE; +*/ + memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t)); + + // We don't need to initialize mf->son, but not doing that will + // make Valgrind complain in normalization (see normalize() in + // lz_encoder_mf.c). + // + // Skipping this initialization is *very* good when big dictionary is + // used but only small amount of data gets actually compressed: most + // of the mf->hash won't get actually allocated by the kernel, so + // we avoid wasting RAM and improve initialization speed a lot. + //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); + + // Handle preset dictionary. + if (lz_options->preset_dict != NULL + && lz_options->preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, use only the tail. + mf->write_pos = MIN(lz_options->preset_dict_size, mf->size); + memcpy(mf->buffer, lz_options->preset_dict + + lz_options->preset_dict_size - mf->write_pos, + mf->write_pos); + mf->action = LZMA_SYNC_FLUSH; + mf->skip(mf, mf->write_pos); + } + + mf->action = LZMA_RUN; + + return false; +} + + +extern uint64_t +lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) +{ + // Old buffers must not exist when calling lz_encoder_prepare(). + lzma_mf mf = { + .buffer = NULL, + .hash = NULL, + }; + + // Setup the size information into mf. + if (lz_encoder_prepare(&mf, NULL, lz_options)) + return UINT64_MAX; + + // Calculate the memory usage. + return (uint64_t)(mf.hash_size_sum + mf.sons_count) + * sizeof(uint32_t) + + (uint64_t)(mf.size) + sizeof(lzma_coder); +} + + +static void +lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + + lzma_free(coder->mf.hash, allocator); + lzma_free(coder->mf.buffer, allocator); + + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + if (coder->lz.options_update == NULL) + return LZMA_PROG_ERROR; + + return_if_error(coder->lz.options_update( + coder->lz.coder, reversed_filters)); + + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)) +{ +#ifdef HAVE_SMALL + // We need that the CRC32 table has been initialized. + lzma_crc32_init(); +#endif + + // Allocate and initialize the base data structure. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &lz_encode; + next->end = &lz_encoder_end; + next->update = &lz_encoder_update; + + next->coder->lz.coder = NULL; + next->coder->lz.code = NULL; + next->coder->lz.end = NULL; + + next->coder->mf.buffer = NULL; + next->coder->mf.hash = NULL; + + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Initialize the LZ-based encoder. + lzma_lz_options lz_options; + return_if_error(lz_init(&next->coder->lz, allocator, + filters[0].options, &lz_options)); + + // Setup the size information into next->coder->mf and deallocate + // old buffers if they have wrong size. + if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options)) + return LZMA_OPTIONS_ERROR; + + // Allocate new buffers if needed, and do the rest of + // the initialization. + if (lz_encoder_init(&next->coder->mf, allocator, &lz_options)) + return LZMA_MEM_ERROR; + + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&next->coder->next, allocator, + filters + 1); +} + + +extern LZMA_API(lzma_bool) +lzma_mf_is_supported(lzma_match_finder mf) +{ + bool ret = false; + +#ifdef HAVE_MF_HC3 + if (mf == LZMA_MF_HC3) + ret = true; +#endif + +#ifdef HAVE_MF_HC4 + if (mf == LZMA_MF_HC4) + ret = true; +#endif + +#ifdef HAVE_MF_BT2 + if (mf == LZMA_MF_BT2) + ret = true; +#endif + +#ifdef HAVE_MF_BT3 + if (mf == LZMA_MF_BT3) + ret = true; +#endif + +#ifdef HAVE_MF_BT4 + if (mf == LZMA_MF_BT4) + ret = true; +#endif + + return ret; +} Index: contrib/xz/src/liblzma/lz/lz_encoder_mf.c =================================================================== --- contrib/xz/src/liblzma/lz/lz_encoder_mf.c (revision 0) +++ contrib/xz/src/liblzma/lz/lz_encoder_mf.c (revision 0) @@ -0,0 +1,753 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_mf.c +/// \brief Match finders +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lz_encoder_hash.h" + + +/// \brief Find matches starting from the current byte +/// +/// \return The length of the longest match found +extern uint32_t +lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) +{ + // Call the match finder. It returns the number of length-distance + // pairs found. + // FIXME: Minimum count is zero, what _exactly_ is the maximum? + const uint32_t count = mf->find(mf, matches); + + // Length of the longest match; assume that no matches were found + // and thus the maximum length is zero. + uint32_t len_best = 0; + + if (count > 0) { +#ifndef NDEBUG + // Validate the matches. + for (uint32_t i = 0; i < count; ++i) { + assert(matches[i].len <= mf->nice_len); + assert(matches[i].dist < mf->read_pos); + assert(memcmp(mf_ptr(mf) - 1, + mf_ptr(mf) - matches[i].dist - 2, + matches[i].len) == 0); + } +#endif + + // The last used element in the array contains + // the longest match. + len_best = matches[count - 1].len; + + // If a match of maximum search length was found, try to + // extend the match to maximum possible length. + if (len_best == mf->nice_len) { + // The limit for the match length is either the + // maximum match length supported by the LZ-based + // encoder or the number of bytes left in the + // dictionary, whichever is smaller. + uint32_t limit = mf_avail(mf) + 1; + if (limit > mf->match_len_max) + limit = mf->match_len_max; + + // Pointer to the byte we just ran through + // the match finder. + const uint8_t *p1 = mf_ptr(mf) - 1; + + // Pointer to the beginning of the match. We need -1 + // here because the match distances are zero based. + const uint8_t *p2 = p1 - matches[count - 1].dist - 1; + + while (len_best < limit + && p1[len_best] == p2[len_best]) + ++len_best; + } + } + + *count_ptr = count; + + // Finally update the read position to indicate that match finder was + // run for this dictionary offset. + ++mf->read_ahead; + + return len_best; +} + + +/// Hash value to indicate unused element in the hash. Since we start the +/// positions from dict_size + 1, zero is always too far to qualify +/// as usable match position. +#define EMPTY_HASH_VALUE 0 + + +/// Normalization must be done when lzma_mf.offset + lzma_mf.read_pos +/// reaches MUST_NORMALIZE_POS. +#define MUST_NORMALIZE_POS UINT32_MAX + + +/// \brief Normalizes hash values +/// +/// The hash arrays store positions of match candidates. The positions are +/// relative to an arbitrary offset that is not the same as the absolute +/// offset in the input stream. The relative position of the current byte +/// is lzma_mf.offset + lzma_mf.read_pos. The distances of the matches are +/// the differences of the current read position and the position found from +/// the hash. +/// +/// To prevent integer overflows of the offsets stored in the hash arrays, +/// we need to "normalize" the stored values now and then. During the +/// normalization, we drop values that indicate distance greater than the +/// dictionary size, thus making space for new values. +static void +normalize(lzma_mf *mf) +{ + assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); + + // In future we may not want to touch the lowest bits, because there + // may be match finders that use larger resolution than one byte. + const uint32_t subvalue + = (MUST_NORMALIZE_POS - mf->cyclic_size); + // & (~(UINT32_C(1) << 10) - 1); + + const uint32_t count = mf->hash_size_sum + mf->sons_count; + uint32_t *hash = mf->hash; + + for (uint32_t i = 0; i < count; ++i) { + // If the distance is greater than the dictionary size, + // we can simply mark the hash element as empty. + // + // NOTE: Only the first mf->hash_size_sum elements are + // initialized for sure. There may be uninitialized elements + // in mf->son. Since we go through both mf->hash and + // mf->son here in normalization, Valgrind may complain + // that the "if" below depends on uninitialized value. In + // this case it is safe to ignore the warning. See also the + // comments in lz_encoder_init() in lz_encoder.c. + if (hash[i] <= subvalue) + hash[i] = EMPTY_HASH_VALUE; + else + hash[i] -= subvalue; + } + + // Update offset to match the new locations. + mf->offset -= subvalue; + + return; +} + + +/// Mark the current byte as processed from point of view of the match finder. +static void +move_pos(lzma_mf *mf) +{ + if (++mf->cyclic_pos == mf->cyclic_size) + mf->cyclic_pos = 0; + + ++mf->read_pos; + assert(mf->read_pos <= mf->write_pos); + + if (unlikely(mf->read_pos + mf->offset == UINT32_MAX)) + normalize(mf); +} + + +/// When flushing, we cannot run the match finder unless there is nice_len +/// bytes available in the dictionary. Instead, we skip running the match +/// finder (indicating that no match was found), and count how many bytes we +/// have ignored this way. +/// +/// When new data is given after the flushing was completed, the match finder +/// is restarted by rewinding mf->read_pos backwards by mf->pending. Then +/// the missed bytes are added to the hash using the match finder's skip +/// function (with small amount of input, it may start using mf->pending +/// again if flushing). +/// +/// Due to this rewinding, we don't touch cyclic_pos or test for +/// normalization. It will be done when the match finder's skip function +/// catches up after a flush. +static void +move_pending(lzma_mf *mf) +{ + ++mf->read_pos; + assert(mf->read_pos <= mf->write_pos); + ++mf->pending; +} + + +/// Calculate len_limit and determine if there is enough input to run +/// the actual match finder code. Sets up "cur" and "pos". This macro +/// is used by all find functions and binary tree skip functions. Hash +/// chain skip function doesn't need len_limit so a simpler code is used +/// in them. +#define header(is_bt, len_min, ret_op) \ + uint32_t len_limit = mf_avail(mf); \ + if (mf->nice_len <= len_limit) { \ + len_limit = mf->nice_len; \ + } else if (len_limit < (len_min) \ + || (is_bt && mf->action == LZMA_SYNC_FLUSH)) { \ + assert(mf->action != LZMA_RUN); \ + move_pending(mf); \ + ret_op; \ + } \ + const uint8_t *cur = mf_ptr(mf); \ + const uint32_t pos = mf->read_pos + mf->offset + + +/// Header for find functions. "return 0" indicates that zero matches +/// were found. +#define header_find(is_bt, len_min) \ + header(is_bt, len_min, return 0); \ + uint32_t matches_count = 0 + + +/// Header for a loop in a skip function. "continue" tells to skip the rest +/// of the code in the loop. +#define header_skip(is_bt, len_min) \ + header(is_bt, len_min, continue) + + +/// Calls hc_find_func() or bt_find_func() and calculates the total number +/// of matches found. Updates the dictionary position and returns the number +/// of matches found. +#define call_find(func, len_best) \ +do { \ + matches_count = func(len_limit, pos, cur, cur_match, mf->depth, \ + mf->son, mf->cyclic_pos, mf->cyclic_size, \ + matches + matches_count, len_best) \ + - matches; \ + move_pos(mf); \ + return matches_count; \ +} while (0) + + +//////////////// +// Hash Chain // +//////////////// + +#if defined(HAVE_MF_HC3) || defined(HAVE_MF_HC4) +/// +/// +/// \param len_limit Don't look for matches longer than len_limit. +/// \param pos lzma_mf.read_pos + lzma_mf.offset +/// \param cur Pointer to current byte (mf_ptr(mf)) +/// \param cur_match Start position of the current match candidate +/// \param depth Maximum length of the hash chain +/// \param son lzma_mf.son (contains the hash chain) +/// \param cyclic_pos +/// \param cyclic_size +/// \param matches Array to hold the matches. +/// \param len_best The length of the longest match found so far. +static lzma_match * +hc_find_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size, + lzma_match *matches, + uint32_t len_best) +{ + son[cyclic_pos] = cur_match; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) + return matches; + + const uint8_t *const pb = cur - delta; + cur_match = son[cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)]; + + if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { + uint32_t len = 0; + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len_best < len) { + len_best = len; + matches->len = len; + matches->dist = delta - 1; + ++matches; + + if (len == len_limit) + return matches; + } + } + } +} + + +#define hc_find(len_best) \ + call_find(hc_find_func, len_best) + + +#define hc_skip() \ +do { \ + mf->son[mf->cyclic_pos] = cur_match; \ + move_pos(mf); \ +} while (0) + +#endif + + +#ifdef HAVE_MF_HC3 +extern uint32_t +lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(false, 3); + + hash_3_calc(); + + const uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 2; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[0].len = len_best; + matches[0].dist = delta2 - 1; + matches_count = 1; + + if (len_best == len_limit) { + hc_skip(); + return 1; // matches_count + } + } + + hc_find(len_best); +} + + +extern void +lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount) +{ + do { + if (mf_avail(mf) < 3) { + move_pending(mf); + continue; + } + + const uint8_t *cur = mf_ptr(mf); + const uint32_t pos = mf->read_pos + mf->offset; + + hash_3_calc(); + + const uint32_t cur_match + = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + hc_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_HC4 +extern uint32_t +lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(false, 4); + + hash_4_calc(); + + uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t delta3 + = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; + const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value ] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 1; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + len_best = 2; + matches[0].len = 2; + matches[0].dist = delta2 - 1; + matches_count = 1; + } + + if (delta2 != delta3 && delta3 < mf->cyclic_size + && *(cur - delta3) == *cur) { + len_best = 3; + matches[matches_count++].dist = delta3 - 1; + delta2 = delta3; + } + + if (matches_count != 0) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[matches_count - 1].len = len_best; + + if (len_best == len_limit) { + hc_skip(); + return matches_count; + } + } + + if (len_best < 3) + len_best = 3; + + hc_find(len_best); +} + + +extern void +lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount) +{ + do { + if (mf_avail(mf) < 4) { + move_pending(mf); + continue; + } + + const uint8_t *cur = mf_ptr(mf); + const uint32_t pos = mf->read_pos + mf->offset; + + hash_4_calc(); + + const uint32_t cur_match + = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + hc_skip(); + + } while (--amount != 0); +} +#endif + + +///////////////// +// Binary Tree // +///////////////// + +#if defined(HAVE_MF_BT2) || defined(HAVE_MF_BT3) || defined(HAVE_MF_BT4) +static lzma_match * +bt_find_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size, + lzma_match *matches, + uint32_t len_best) +{ + uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; + uint32_t *ptr1 = son + (cyclic_pos << 1); + + uint32_t len0 = 0; + uint32_t len1 = 0; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + return matches; + } + + uint32_t *const pair = son + ((cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)) + << 1); + + const uint8_t *const pb = cur - delta; + uint32_t len = MIN(len0, len1); + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len_best < len) { + len_best = len; + matches->len = len; + matches->dist = delta - 1; + ++matches; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return matches; + } + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +} + + +static void +bt_skip_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t depth, + uint32_t *const son, + const uint32_t cyclic_pos, + const uint32_t cyclic_size) +{ + uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; + uint32_t *ptr1 = son + (cyclic_pos << 1); + + uint32_t len0 = 0; + uint32_t len1 = 0; + + while (true) { + const uint32_t delta = pos - cur_match; + if (depth-- == 0 || delta >= cyclic_size) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + return; + } + + uint32_t *pair = son + ((cyclic_pos - delta + + (delta > cyclic_pos ? cyclic_size : 0)) + << 1); + const uint8_t *pb = cur - delta; + uint32_t len = MIN(len0, len1); + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +} + + +#define bt_find(len_best) \ + call_find(bt_find_func, len_best) + +#define bt_skip() \ +do { \ + bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, \ + mf->son, mf->cyclic_pos, \ + mf->cyclic_size); \ + move_pos(mf); \ +} while (0) + +#endif + + +#ifdef HAVE_MF_BT2 +extern uint32_t +lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 2); + + hash_2_calc(); + + const uint32_t cur_match = mf->hash[hash_value]; + mf->hash[hash_value] = pos; + + bt_find(1); +} + + +extern void +lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 2); + + hash_2_calc(); + + const uint32_t cur_match = mf->hash[hash_value]; + mf->hash[hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_BT3 +extern uint32_t +lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 3); + + hash_3_calc(); + + const uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 2; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[0].len = len_best; + matches[0].dist = delta2 - 1; + matches_count = 1; + + if (len_best == len_limit) { + bt_skip(); + return 1; // matches_count + } + } + + bt_find(len_best); +} + + +extern void +lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 3); + + hash_3_calc(); + + const uint32_t cur_match + = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_BT4 +extern uint32_t +lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 4); + + hash_4_calc(); + + uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t delta3 + = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; + const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 1; + + if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { + len_best = 2; + matches[0].len = 2; + matches[0].dist = delta2 - 1; + matches_count = 1; + } + + if (delta2 != delta3 && delta3 < mf->cyclic_size + && *(cur - delta3) == *cur) { + len_best = 3; + matches[matches_count++].dist = delta3 - 1; + delta2 = delta3; + } + + if (matches_count != 0) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[matches_count - 1].len = len_best; + + if (len_best == len_limit) { + bt_skip(); + return matches_count; + } + } + + if (len_best < 3) + len_best = 3; + + bt_find(len_best); +} + + +extern void +lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 4); + + hash_4_calc(); + + const uint32_t cur_match + = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif Index: contrib/xz/src/liblzma/lz/lz_encoder.h =================================================================== --- contrib/xz/src/liblzma/lz/lz_encoder.h (revision 0) +++ contrib/xz/src/liblzma/lz/lz_encoder.h (revision 0) @@ -0,0 +1,328 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.h +/// \brief LZ in window and match finder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_H +#define LZMA_LZ_ENCODER_H + +#include "common.h" + + +/// A table of these is used by the LZ-based encoder to hold +/// the length-distance pairs found by the match finder. +typedef struct { + uint32_t len; + uint32_t dist; +} lzma_match; + + +typedef struct lzma_mf_s lzma_mf; +struct lzma_mf_s { + /////////////// + // In Window // + /////////////// + + /// Pointer to buffer with data to be compressed + uint8_t *buffer; + + /// Total size of the allocated buffer (that is, including all + /// the extra space) + uint32_t size; + + /// Number of bytes that must be kept available in our input history. + /// That is, once keep_size_before bytes have been processed, + /// buffer[read_pos - keep_size_before] is the oldest byte that + /// must be available for reading. + uint32_t keep_size_before; + + /// Number of bytes that must be kept in buffer after read_pos. + /// That is, read_pos <= write_pos - keep_size_after as long as + /// action is LZMA_RUN; when action != LZMA_RUN, read_pos is allowed + /// to reach write_pos so that the last bytes get encoded too. + uint32_t keep_size_after; + + /// Match finders store locations of matches using 32-bit integers. + /// To avoid adjusting several megabytes of integers every time the + /// input window is moved with move_window, we only adjust the + /// offset of the buffer. Thus, buffer[value_in_hash_table - offset] + /// is the byte pointed by value_in_hash_table. + uint32_t offset; + + /// buffer[read_pos] is the next byte to run through the match + /// finder. This is incremented in the match finder once the byte + /// has been processed. + uint32_t read_pos; + + /// Number of bytes that have been ran through the match finder, but + /// which haven't been encoded by the LZ-based encoder yet. + uint32_t read_ahead; + + /// As long as read_pos is less than read_limit, there is enough + /// input available in buffer for at least one encoding loop. + /// + /// Because of the stateful API, read_limit may and will get greater + /// than read_pos quite often. This is taken into account when + /// calculating the value for keep_size_after. + uint32_t read_limit; + + /// buffer[write_pos] is the first byte that doesn't contain valid + /// uncompressed data; that is, the next input byte will be copied + /// to buffer[write_pos]. + uint32_t write_pos; + + /// Number of bytes not hashed before read_pos. This is needed to + /// restart the match finder after LZMA_SYNC_FLUSH. + uint32_t pending; + + ////////////////// + // Match Finder // + ////////////////// + + /// Find matches. Returns the number of distance-length pairs written + /// to the matches array. This is called only via lzma_mf_find(). + uint32_t (*find)(lzma_mf *mf, lzma_match *matches); + + /// Skips num bytes. This is like find() but doesn't make the + /// distance-length pairs available, thus being a little faster. + /// This is called only via mf_skip(). + void (*skip)(lzma_mf *mf, uint32_t num); + + uint32_t *hash; + uint32_t *son; + uint32_t cyclic_pos; + uint32_t cyclic_size; // Must be dictionary size + 1. + uint32_t hash_mask; + + /// Maximum number of loops in the match finder + uint32_t depth; + + /// Maximum length of a match that the match finder will try to find. + uint32_t nice_len; + + /// Maximum length of a match supported by the LZ-based encoder. + /// If the longest match found by the match finder is nice_len, + /// mf_find() tries to expand it up to match_len_max bytes. + uint32_t match_len_max; + + /// When running out of input, binary tree match finders need to know + /// if it is due to flushing or finishing. The action is used also + /// by the LZ-based encoders themselves. + lzma_action action; + + /// Number of elements in hash[] + uint32_t hash_size_sum; + + /// Number of elements in son[] + uint32_t sons_count; +}; + + +typedef struct { + /// Extra amount of data to keep available before the "actual" + /// dictionary. + size_t before_size; + + /// Size of the history buffer + size_t dict_size; + + /// Extra amount of data to keep available after the "actual" + /// dictionary. + size_t after_size; + + /// Maximum length of a match that the LZ-based encoder can accept. + /// This is used to extend matches of length nice_len to the + /// maximum possible length. + size_t match_len_max; + + /// Match finder will search matches up to this length. + /// This must be less than or equal to match_len_max. + size_t nice_len; + + /// Type of the match finder to use + lzma_match_finder match_finder; + + /// Maximum search depth + uint32_t depth; + + /// TODO: Comment + const uint8_t *preset_dict; + + uint32_t preset_dict_size; + +} lzma_lz_options; + + +// The total usable buffer space at any moment outside the match finder: +// before_size + dict_size + after_size + match_len_max +// +// In reality, there's some extra space allocated to prevent the number of +// memmove() calls reasonable. The bigger the dict_size is, the bigger +// this extra buffer will be since with bigger dictionaries memmove() would +// also take longer. +// +// A single encoder loop in the LZ-based encoder may call the match finder +// (mf_find() or mf_skip()) at most after_size times. In other words, +// a single encoder loop may increment lzma_mf.read_pos at most after_size +// times. Since matches are looked up to +// lzma_mf.buffer[lzma_mf.read_pos + match_len_max - 1], the total +// amount of extra buffer needed after dict_size becomes +// after_size + match_len_max. +// +// before_size has two uses. The first one is to keep literals available +// in cases when the LZ-based encoder has made some read ahead. +// TODO: Maybe this could be changed by making the LZ-based encoders to +// store the actual literals as they do with length-distance pairs. +// +// Algorithms such as LZMA2 first try to compress a chunk, and then check +// if the encoded result is smaller than the uncompressed one. If the chunk +// was uncompressible, it is better to store it in uncompressed form in +// the output stream. To do this, the whole uncompressed chunk has to be +// still available in the history buffer. before_size achieves that. + + +typedef struct { + /// Data specific to the LZ-based encoder + lzma_coder *coder; + + /// Function to encode from *dict to out[] + lzma_ret (*code)(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + /// Free allocated resources + void (*end)(lzma_coder *coder, lzma_allocator *allocator); + + /// Update the options in the middle of the encoding. + lzma_ret (*options_update)(lzma_coder *coder, + const lzma_filter *filter); + +} lzma_lz_encoder; + + +// Basic steps: +// 1. Input gets copied into the dictionary. +// 2. Data in dictionary gets run through the match finder byte by byte. +// 3. The literals and matches are encoded using e.g. LZMA. +// +// The bytes that have been ran through the match finder, but not encoded yet, +// are called `read ahead'. + + +/// Get pointer to the first byte not ran through the match finder +static inline const uint8_t * +mf_ptr(const lzma_mf *mf) +{ + return mf->buffer + mf->read_pos; +} + + +/// Get the number of bytes that haven't been ran through the match finder yet. +static inline uint32_t +mf_avail(const lzma_mf *mf) +{ + return mf->write_pos - mf->read_pos; +} + + +/// Get the number of bytes that haven't been encoded yet (some of these +/// bytes may have been ran through the match finder though). +static inline uint32_t +mf_unencoded(const lzma_mf *mf) +{ + return mf->write_pos - mf->read_pos + mf->read_ahead; +} + + +/// Calculate the absolute offset from the beginning of the most recent +/// dictionary reset. Only the lowest four bits are important, so there's no +/// problem that we don't know the 64-bit size of the data encoded so far. +/// +/// NOTE: When moving the input window, we need to do it so that the lowest +/// bits of dict->read_pos are not modified to keep this macro working +/// as intended. +static inline uint32_t +mf_position(const lzma_mf *mf) +{ + return mf->read_pos - mf->read_ahead; +} + + +/// Since everything else begins with mf_, use it also for lzma_mf_find(). +#define mf_find lzma_mf_find + + +/// Skip the given number of bytes. This is used when a good match was found. +/// For example, if mf_find() finds a match of 200 bytes long, the first byte +/// of that match was already consumed by mf_find(), and the rest 199 bytes +/// have to be skipped with mf_skip(mf, 199). +static inline void +mf_skip(lzma_mf *mf, uint32_t amount) +{ + if (amount != 0) { + mf->skip(mf, amount); + mf->read_ahead += amount; + } +} + + +/// Copies at most *left number of bytes from the history buffer +/// to out[]. This is needed by LZMA2 to encode uncompressed chunks. +static inline void +mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size, + size_t *left) +{ + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(out_avail, *left); + + assert(mf->read_ahead == 0); + assert(mf->read_pos >= *left); + + memcpy(out + *out_pos, mf->buffer + mf->read_pos - *left, + copy_size); + + *out_pos += copy_size; + *left -= copy_size; + return; +} + + +extern lzma_ret lzma_lz_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)); + + +extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options); + + +// These are only for LZ encoder's internal use. +extern uint32_t lzma_mf_find( + lzma_mf *mf, uint32_t *count, lzma_match *matches); + +extern uint32_t lzma_mf_hc3_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_hc4_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt2_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt3_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt4_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount); + +#endif Index: contrib/xz/src/liblzma/lz/lz_decoder.c =================================================================== --- contrib/xz/src/liblzma/lz/lz_decoder.c (revision 0) +++ contrib/xz/src/liblzma/lz/lz_decoder.c (revision 0) @@ -0,0 +1,299 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.c +/// \brief LZ out window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +// liblzma supports multiple LZ77-based filters. The LZ part is shared +// between these filters. The LZ code takes care of dictionary handling +// and passing the data between filters in the chain. The filter-specific +// part decodes from the input buffer to the dictionary. + + +#include "lz_decoder.h" + + +struct lzma_coder_s { + /// Dictionary (history buffer) + lzma_dict dict; + + /// The actual LZ-based decoder e.g. LZMA + lzma_lz_decoder lz; + + /// Next filter in the chain, if any. Note that LZMA and LZMA2 are + /// only allowed as the last filter, but the long-range filter in + /// future can be in the middle of the chain. + lzma_next_coder next; + + /// True if the next filter in the chain has returned LZMA_STREAM_END. + bool next_finished; + + /// True if the LZ decoder (e.g. LZMA) has detected end of payload + /// marker. This may become true before next_finished becomes true. + bool this_finished; + + /// Temporary buffer needed when the LZ-based filter is not the last + /// filter in the chain. The output of the next filter is first + /// decoded into buffer[], which is then used as input for the actual + /// LZ-based decoder. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +static void +lz_decoder_reset(lzma_coder *coder) +{ + coder->dict.pos = 0; + coder->dict.full = 0; + coder->dict.buf[coder->dict.size - 1] = '\0'; + coder->dict.need_reset = false; + return; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + while (true) { + // Wrap the dictionary if needed. + if (coder->dict.pos == coder->dict.size) + coder->dict.pos = 0; + + // Store the current dictionary position. It is needed to know + // where to start copying to the out[] buffer. + const size_t dict_start = coder->dict.pos; + + // Calculate how much we allow coder->lz.code() to decode. + // It must not decode past the end of the dictionary + // buffer, and we don't want it to decode more than is + // actually needed to fill the out[] buffer. + coder->dict.limit = coder->dict.pos + MIN(out_size - *out_pos, + coder->dict.size - coder->dict.pos); + + // Call the coder->lz.code() to do the actual decoding. + const lzma_ret ret = coder->lz.code( + coder->lz.coder, &coder->dict, + in, in_pos, in_size); + + // Copy the decoded data from the dictionary to the out[] + // buffer. + const size_t copy_size = coder->dict.pos - dict_start; + assert(copy_size <= out_size - *out_pos); + memcpy(out + *out_pos, coder->dict.buf + dict_start, + copy_size); + *out_pos += copy_size; + + // Reset the dictionary if so requested by coder->lz.code(). + if (coder->dict.need_reset) { + lz_decoder_reset(coder); + + // Since we reset dictionary, we don't check if + // dictionary became full. + if (ret != LZMA_OK || *out_pos == out_size) + return ret; + } else { + // Return if everything got decoded or an error + // occurred, or if there's no more data to decode. + // + // Note that detecting if there's something to decode + // is done by looking if dictionary become full + // instead of looking if *in_pos == in_size. This + // is because it is possible that all the input was + // consumed already but some data is pending to be + // written to the dictionary. + if (ret != LZMA_OK || *out_pos == out_size + || coder->dict.pos < coder->dict.size) + return ret; + } + } +} + + +static lzma_ret +lz_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + if (coder->next.code == NULL) + return decode_buffer(coder, in, in_pos, in_size, + out, out_pos, out_size); + + // We aren't the last coder in the chain, we need to decode + // our input to a temporary buffer. + while (*out_pos < out_size) { + // Fill the temporary buffer if it is empty. + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->next_finished = true; + else if (ret != LZMA_OK || coder->temp.size == 0) + return ret; + } + + if (coder->this_finished) { + if (coder->temp.size != 0) + return LZMA_DATA_ERROR; + + if (coder->next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, + &coder->temp.pos, coder->temp.size, + out, out_pos, out_size); + + if (ret == LZMA_STREAM_END) + coder->this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +static void +lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder->dict.buf, allocator); + + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)) +{ + // Allocate the base structure if it isn't already allocated. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &lz_decode; + next->end = &lz_decoder_end; + + next->coder->dict.buf = NULL; + next->coder->dict.size = 0; + next->coder->lz = LZMA_LZ_DECODER_INIT; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Allocate and initialize the LZ-based decoder. It will also give + // us the dictionary size. + lzma_lz_options lz_options; + return_if_error(lz_init(&next->coder->lz, allocator, + filters[0].options, &lz_options)); + + // If the dictionary size is very small, increase it to 4096 bytes. + // This is to prevent constant wrapping of the dictionary, which + // would slow things down. The downside is that since we don't check + // separately for the real dictionary size, we may happily accept + // corrupt files. + if (lz_options.dict_size < 4096) + lz_options.dict_size = 4096; + + // Make dictionary size a multipe of 16. Some LZ-based decoders like + // LZMA use the lowest bits lzma_dict.pos to know the alignment of the + // data. Aligned buffer is also good when memcpying from the + // dictionary to the output buffer, since applications are + // recommended to give aligned buffers to liblzma. + // + // Avoid integer overflow. + if (lz_options.dict_size > SIZE_MAX - 15) + return LZMA_MEM_ERROR; + + lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); + + // Allocate and initialize the dictionary. + if (next->coder->dict.size != lz_options.dict_size) { + lzma_free(next->coder->dict.buf, allocator); + next->coder->dict.buf + = lzma_alloc(lz_options.dict_size, allocator); + if (next->coder->dict.buf == NULL) + return LZMA_MEM_ERROR; + + next->coder->dict.size = lz_options.dict_size; + } + + lz_decoder_reset(next->coder); + + // Use the preset dictionary if it was given to us. + if (lz_options.preset_dict != NULL + && lz_options.preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, copy only the tail. + const size_t copy_size = MIN(lz_options.preset_dict_size, + lz_options.dict_size); + const size_t offset = lz_options.preset_dict_size - copy_size; + memcpy(next->coder->dict.buf, lz_options.preset_dict + offset, + copy_size); + next->coder->dict.pos = copy_size; + next->coder->dict.full = copy_size; + } + + // Miscellaneous initializations + next->coder->next_finished = false; + next->coder->this_finished = false; + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&next->coder->next, allocator, + filters + 1); +} + + +extern uint64_t +lzma_lz_decoder_memusage(size_t dictionary_size) +{ + return sizeof(lzma_coder) + (uint64_t)(dictionary_size); +} + + +extern void +lzma_lz_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) +{ + coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size); +} Index: contrib/xz/src/liblzma/lz/lz_encoder_hash.h =================================================================== --- contrib/xz/src/liblzma/lz/lz_encoder_hash.h (revision 0) +++ contrib/xz/src/liblzma/lz/lz_encoder_hash.h (revision 0) @@ -0,0 +1,108 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_hash.h +/// \brief Hash macros for match finders +// +// Author: Igor Pavlov +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_HASH_H +#define LZMA_LZ_ENCODER_HASH_H + +#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) + // This is to make liblzma produce the same output on big endian + // systems that it does on little endian systems. lz_encoder.c + // takes care of including the actual table. + extern const uint32_t lzma_lz_hash_table[256]; +# define hash_table lzma_lz_hash_table +#else +# include "check.h" +# define hash_table lzma_crc32_table[0] +#endif + +#define HASH_2_SIZE (UINT32_C(1) << 10) +#define HASH_3_SIZE (UINT32_C(1) << 16) +#define HASH_4_SIZE (UINT32_C(1) << 20) + +#define HASH_2_MASK (HASH_2_SIZE - 1) +#define HASH_3_MASK (HASH_3_SIZE - 1) +#define HASH_4_MASK (HASH_4_SIZE - 1) + +#define FIX_3_HASH_SIZE (HASH_2_SIZE) +#define FIX_4_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE) +#define FIX_5_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE + HASH_4_SIZE) + +// Endianness doesn't matter in hash_2_calc() (no effect on the output). +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define hash_2_calc() \ + const uint32_t hash_value = *(const uint16_t *)(cur); +#else +# define hash_2_calc() \ + const uint32_t hash_value \ + = (uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8) +#endif + +#define hash_3_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask + +#define hash_4_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ + ^ (hash_table[cur[3]] << 5)) & mf->hash_mask + + +// The following are not currently used. + +#define hash_5_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ + ^ hash_table[cur[3]] << 5); \ + const uint32_t hash_value \ + = (hash_4_value ^ (hash_table[cur[4]] << 3)) \ + & mf->hash_mask; \ + hash_4_value &= HASH_4_MASK + +/* +#define hash_zip_calc() \ + const uint32_t hash_value \ + = (((uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8)) \ + ^ hash_table[cur[2]]) & 0xFFFF +*/ + +#define hash_zip_calc() \ + const uint32_t hash_value \ + = (((uint32_t)(cur[2]) | ((uint32_t)(cur[0]) << 8)) \ + ^ hash_table[cur[1]]) & 0xFFFF + +#define mt_hash_2_calc() \ + const uint32_t hash_2_value \ + = (hash_table[cur[0]] ^ cur[1]) & HASH_2_MASK + +#define mt_hash_3_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK + +#define mt_hash_4_calc() \ + const uint32_t temp = hash_table[cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + const uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ + (hash_table[cur[3]] << 5)) & HASH_4_MASK + +#endif Index: contrib/xz/src/liblzma/common/hardware_physmem.c =================================================================== --- contrib/xz/src/liblzma/common/hardware_physmem.c (revision 0) +++ contrib/xz/src/liblzma/common/hardware_physmem.c (revision 0) @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware_physmem.c +/// \brief Get the total amount of physical memory (RAM) +// +// Author: Jonathan Nieder +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + +#include "tuklib_physmem.h" + + +extern LZMA_API(uint64_t) +lzma_physmem(void) +{ + // It is simpler to make lzma_physmem() a wrapper for + // tuklib_physmem() than to hack appropriate symbol visiblity + // support for the tuklib modules. + return tuklib_physmem(); +} Index: contrib/xz/src/liblzma/common/stream_encoder.h =================================================================== --- contrib/xz/src/liblzma/common/stream_encoder.h (revision 0) +++ contrib/xz/src/liblzma/common/stream_encoder.h (revision 0) @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.h +/// \brief Encodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_ENCODER_H +#define LZMA_STREAM_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_stream_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters, lzma_check check); + +#endif Index: contrib/xz/src/liblzma/common/filter_flags_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/filter_flags_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/filter_flags_encoder.c (revision 0) @@ -0,0 +1,56 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_encoder.c +/// \brief Decodes a Filter Flags field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_size(uint32_t *size, const lzma_filter *filter) +{ + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_PROG_ERROR; + + return_if_error(lzma_properties_size(size, filter)); + + *size += lzma_vli_size(filter->id) + lzma_vli_size(*size); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_encode(const lzma_filter *filter, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Filter ID + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_PROG_ERROR; + + return_if_error(lzma_vli_encode(filter->id, NULL, + out, out_pos, out_size)); + + // Size of Properties + uint32_t props_size; + return_if_error(lzma_properties_size(&props_size, filter)); + return_if_error(lzma_vli_encode(props_size, NULL, + out, out_pos, out_size)); + + // Filter Properties + if (out_size - *out_pos < props_size) + return LZMA_PROG_ERROR; + + return_if_error(lzma_properties_encode(filter, out + *out_pos)); + + *out_pos += props_size; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/block_header_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/block_header_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/block_header_encoder.c (revision 0) @@ -0,0 +1,132 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_encoder.c +/// \brief Encodes Block Header for .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +extern LZMA_API(lzma_ret) +lzma_block_header_size(lzma_block *block) +{ + if (block->version != 0) + return LZMA_OPTIONS_ERROR; + + // Block Header Size + Block Flags + CRC32. + uint32_t size = 1 + 1 + 4; + + // Compressed Size + if (block->compressed_size != LZMA_VLI_UNKNOWN) { + const uint32_t add = lzma_vli_size(block->compressed_size); + if (add == 0 || block->compressed_size == 0) + return LZMA_PROG_ERROR; + + size += add; + } + + // Uncompressed Size + if (block->uncompressed_size != LZMA_VLI_UNKNOWN) { + const uint32_t add = lzma_vli_size(block->uncompressed_size); + if (add == 0) + return LZMA_PROG_ERROR; + + size += add; + } + + // List of Filter Flags + if (block->filters == NULL || block->filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + for (size_t i = 0; block->filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + // Don't allow too many filters. + if (i == LZMA_FILTERS_MAX) + return LZMA_PROG_ERROR; + + uint32_t add; + return_if_error(lzma_filter_flags_size(&add, + block->filters + i)); + + size += add; + } + + // Pad to a multiple of four bytes. + block->header_size = (size + 3) & ~UINT32_C(3); + + // NOTE: We don't verify that the encoded size of the Block stays + // within limits. This is because it is possible that we are called + // with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve + // space for Block Header, and later called again with lower, + // real values. + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_block_header_encode(const lzma_block *block, uint8_t *out) +{ + // Validate everything but filters. + if (lzma_block_unpadded_size(block) == 0 + || !lzma_vli_is_valid(block->uncompressed_size)) + return LZMA_PROG_ERROR; + + // Indicate the size of the buffer _excluding_ the CRC32 field. + const size_t out_size = block->header_size - 4; + + // Store the Block Header Size. + out[0] = out_size / 4; + + // We write Block Flags in pieces. + out[1] = 0x00; + size_t out_pos = 2; + + // Compressed Size + if (block->compressed_size != LZMA_VLI_UNKNOWN) { + return_if_error(lzma_vli_encode(block->compressed_size, NULL, + out, &out_pos, out_size)); + + out[1] |= 0x40; + } + + // Uncompressed Size + if (block->uncompressed_size != LZMA_VLI_UNKNOWN) { + return_if_error(lzma_vli_encode(block->uncompressed_size, NULL, + out, &out_pos, out_size)); + + out[1] |= 0x80; + } + + // Filter Flags + if (block->filters == NULL || block->filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + size_t filter_count = 0; + do { + // There can be a maximum of four filters. + if (filter_count == LZMA_FILTERS_MAX) + return LZMA_PROG_ERROR; + + return_if_error(lzma_filter_flags_encode( + block->filters + filter_count, + out, &out_pos, out_size)); + + } while (block->filters[++filter_count].id != LZMA_VLI_UNKNOWN); + + out[1] |= filter_count - 1; + + // Padding + memzero(out + out_pos, out_size - out_pos); + + // CRC32 + unaligned_write32le(out + out_size, lzma_crc32(out, out_size, 0)); + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/chunk_size.c =================================================================== --- contrib/xz/src/liblzma/common/chunk_size.c (revision 0) +++ contrib/xz/src/liblzma/common/chunk_size.c (revision 0) @@ -0,0 +1,67 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file chunk_size.c +/// \brief Finds out the minimal reasonable chunk size for a filter chain +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +/** + * \brief Finds out the minimal reasonable chunk size for a filter chain + * + * This function helps determining the Uncompressed Sizes of the Blocks when + * doing multi-threaded encoding. + * + * When compressing a large file on a system having multiple CPUs or CPU + * cores, the file can be split into smaller chunks, that are compressed + * independently into separate Blocks in the same .lzma Stream. + * + * \return Minimum reasonable Uncompressed Size of a Block. The + * recommended minimum Uncompressed Size is between this value + * and the value times two. + + Zero if the Uncompressed Sizes of Blocks don't matter + */ +extern LZMA_API(size_t) +lzma_chunk_size(const lzma_options_filter *filters) +{ + while (filters->id != LZMA_VLI_UNKNOWN) { + switch (filters->id) { + // TODO LZMA_FILTER_SPARSE + + case LZMA_FILTER_COPY: + case LZMA_FILTER_SUBBLOCK: + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + // These are very fast, thus there is no point in + // splitting the data into smaller blocks. + break; + + case LZMA_FILTER_LZMA1: + // The block sizes of the possible next filters in + // the chain are irrelevant after the LZMA filter. + return ((lzma_options_lzma *)(filters->options)) + ->dictionary_size; + + default: + // Unknown filters + return 0; + } + + ++filters; + } + + // Indicate that splitting would be useless. + return SIZE_MAX; +} Index: contrib/xz/src/liblzma/common/filter_common.h =================================================================== --- contrib/xz/src/liblzma/common/filter_common.h (revision 0) +++ contrib/xz/src/liblzma/common/filter_common.h (revision 0) @@ -0,0 +1,48 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_common.c +/// \brief Filter-specific stuff common for both encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_COMMON_H +#define LZMA_FILTER_COMMON_H + +#include "common.h" + + +/// Both lzma_filter_encoder and lzma_filter_decoder begin with these members. +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + +} lzma_filter_coder; + + +typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id); + + +extern lzma_ret lzma_raw_coder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters, + lzma_filter_find coder_find, bool is_encoder); + + +extern uint64_t lzma_raw_coder_memusage(lzma_filter_find coder_find, + const lzma_filter *filters); + + +#endif Index: contrib/xz/src/liblzma/common/index.c =================================================================== --- contrib/xz/src/liblzma/common/index.c (revision 0) +++ contrib/xz/src/liblzma/common/index.c (revision 0) @@ -0,0 +1,1241 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.c +/// \brief Handling of .xz Indexes and some other Stream information +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "stream_flags_common.h" + + +/// \brief How many Records to allocate at once +/// +/// This should be big enough to avoid making lots of tiny allocations +/// but small enough to avoid too much unused memory at once. +#define INDEX_GROUP_SIZE 500 + + +/// \brief How many Records can be allocated at once at maximum +#define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record)) + + +/// \brief Base structure for index_stream and index_group structures +typedef struct index_tree_node_s index_tree_node; +struct index_tree_node_s { + /// Uncompressed start offset of this Stream (relative to the + /// beginning of the file) or Block (relative to the beginning + /// of the Stream) + lzma_vli uncompressed_base; + + /// Compressed start offset of this Stream or Block + lzma_vli compressed_base; + + index_tree_node *parent; + index_tree_node *left; + index_tree_node *right; +}; + + +/// \brief AVL tree to hold index_stream or index_group structures +typedef struct { + /// Root node + index_tree_node *root; + + /// Leftmost node. Since the tree will be filled sequentially, + /// this won't change after the first node has been added to + /// the tree. + index_tree_node *leftmost; + + /// The rightmost node in the tree. Since the tree is filled + /// sequentially, this is always the node where to add the new data. + index_tree_node *rightmost; + + /// Number of nodes in the tree + uint32_t count; + +} index_tree; + + +typedef struct { + lzma_vli uncompressed_sum; + lzma_vli unpadded_sum; +} index_record; + + +typedef struct { + /// Every Record group is part of index_stream.groups tree. + index_tree_node node; + + /// Number of Blocks in this Stream before this group. + lzma_vli number_base; + + /// Number of Records that can be put in records[]. + size_t allocated; + + /// Index of the last Record in use. + size_t last; + + /// The sizes in this array are stored as cumulative sums relative + /// to the beginning of the Stream. This makes it possible to + /// use binary search in lzma_index_locate(). + /// + /// Note that the cumulative summing is done specially for + /// unpadded_sum: The previous value is rounded up to the next + /// multiple of four before adding the Unpadded Size of the new + /// Block. The total encoded size of the Blocks in the Stream + /// is records[last].unpadded_sum in the last Record group of + /// the Stream. + /// + /// For example, if the Unpadded Sizes are 39, 57, and 81, the + /// stored values are 39, 97 (40 + 57), and 181 (100 + 181). + /// The total encoded size of these Blocks is 184. + /// + /// This is a flexible array, because it makes easy to optimize + /// memory usage in case someone concatenates many Streams that + /// have only one or few Blocks. + index_record records[]; + +} index_group; + + +typedef struct { + /// Every index_stream is a node in the tree of Sreams. + index_tree_node node; + + /// Number of this Stream (first one is 1) + uint32_t number; + + /// Total number of Blocks before this Stream + lzma_vli block_number_base; + + /// Record groups of this Stream are stored in a tree. + /// It's a T-tree with AVL-tree balancing. There are + /// INDEX_GROUP_SIZE Records per node by default. + /// This keeps the number of memory allocations reasonable + /// and finding a Record is fast. + index_tree groups; + + /// Number of Records in this Stream + lzma_vli record_count; + + /// Size of the List of Records field in this Stream. This is used + /// together with record_count to calculate the size of the Index + /// field and thus the total size of the Stream. + lzma_vli index_list_size; + + /// Stream Flags of this Stream. This is meaningful only if + /// the Stream Flags have been told us with lzma_index_stream_flags(). + /// Initially stream_flags.version is set to UINT32_MAX to indicate + /// that the Stream Flags are unknown. + lzma_stream_flags stream_flags; + + /// Amount of Stream Padding after this Stream. This defaults to + /// zero and can be set with lzma_index_stream_padding(). + lzma_vli stream_padding; + +} index_stream; + + +struct lzma_index_s { + /// AVL-tree containing the Stream(s). Often there is just one + /// Stream, but using a tree keeps lookups fast even when there + /// are many concatenated Streams. + index_tree streams; + + /// Uncompressed size of all the Blocks in the Stream(s) + lzma_vli uncompressed_size; + + /// Total size of all the Blocks in the Stream(s) + lzma_vli total_size; + + /// Total number of Records in all Streams in this lzma_index + lzma_vli record_count; + + /// Size of the List of Records field if all the Streams in this + /// lzma_index were packed into a single Stream (makes it simpler to + /// take many .xz files and combine them into a single Stream). + /// + /// This value together with record_count is needed to calculate + /// Backward Size that is stored into Stream Footer. + lzma_vli index_list_size; + + /// How many Records to allocate at once in lzma_index_append(). + /// This defaults to INDEX_GROUP_SIZE but can be overriden with + /// lzma_index_prealloc(). + size_t prealloc; + + /// Bitmask indicating what integrity check types have been used + /// as set by lzma_index_stream_flags(). The bit of the last Stream + /// is not included here, since it is possible to change it by + /// calling lzma_index_stream_flags() again. + uint32_t checks; +}; + + +static void +index_tree_init(index_tree *tree) +{ + tree->root = NULL; + tree->leftmost = NULL; + tree->rightmost = NULL; + tree->count = 0; + return; +} + + +/// Helper for index_tree_end() +static void +index_tree_node_end(index_tree_node *node, lzma_allocator *allocator, + void (*free_func)(void *node, lzma_allocator *allocator)) +{ + // The tree won't ever be very huge, so recursion should be fine. + // 20 levels in the tree is likely quite a lot already in practice. + if (node->left != NULL) + index_tree_node_end(node->left, allocator, free_func); + + if (node->right != NULL) + index_tree_node_end(node->right, allocator, free_func); + + if (free_func != NULL) + free_func(node, allocator); + + lzma_free(node, allocator); + return; +} + + +/// Free the meory allocated for a tree. If free_func is not NULL, +/// it is called on each node before freeing the node. This is used +/// to free the Record groups from each index_stream before freeing +/// the index_stream itself. +static void +index_tree_end(index_tree *tree, lzma_allocator *allocator, + void (*free_func)(void *node, lzma_allocator *allocator)) +{ + if (tree->root != NULL) + index_tree_node_end(tree->root, allocator, free_func); + + return; +} + + +/// Add a new node to the tree. node->uncompressed_base and +/// node->compressed_base must have been set by the caller already. +static void +index_tree_append(index_tree *tree, index_tree_node *node) +{ + node->parent = tree->rightmost; + node->left = NULL; + node->right = NULL; + + ++tree->count; + + // Handle the special case of adding the first node. + if (tree->root == NULL) { + tree->root = node; + tree->leftmost = node; + tree->rightmost = node; + return; + } + + // The tree is always filled sequentially. + assert(tree->rightmost->uncompressed_base <= node->uncompressed_base); + assert(tree->rightmost->compressed_base < node->compressed_base); + + // Add the new node after the rightmost node. It's the correct + // place due to the reason above. + tree->rightmost->right = node; + tree->rightmost = node; + + // Balance the AVL-tree if needed. We don't need to keep the balance + // factors in nodes, because we always fill the tree sequentially, + // and thus know the state of the tree just by looking at the node + // count. From the node count we can calculate how many steps to go + // up in the tree to find the rotation root. + uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count)); + if (up != 0) { + // Locate the root node for the rotation. + up = ctz32(tree->count) + 2; + do { + node = node->parent; + } while (--up > 0); + + // Rotate left using node as the rotation root. + index_tree_node *pivot = node->right; + + if (node->parent == NULL) { + tree->root = pivot; + } else { + assert(node->parent->right == node); + node->parent->right = pivot; + } + + pivot->parent = node->parent; + + node->right = pivot->left; + if (node->right != NULL) + node->right->parent = node; + + pivot->left = node; + node->parent = pivot; + } + + return; +} + + +/// Get the next node in the tree. Return NULL if there are no more nodes. +static void * +index_tree_next(const index_tree_node *node) +{ + if (node->right != NULL) { + node = node->right; + while (node->left != NULL) + node = node->left; + + return (void *)(node); + } + + while (node->parent != NULL && node->parent->right == node) + node = node->parent; + + return (void *)(node->parent); +} + + +/// Locate a node that contains the given uncompressed offset. It is +/// caller's job to check that target is not bigger than the uncompressed +/// size of the tree (the last node would be returned in that case still). +static void * +index_tree_locate(const index_tree *tree, lzma_vli target) +{ + const index_tree_node *result = NULL; + const index_tree_node *node = tree->root; + + assert(tree->leftmost == NULL + || tree->leftmost->uncompressed_base == 0); + + // Consecutive nodes may have the same uncompressed_base. + // We must pick the rightmost one. + while (node != NULL) { + if (node->uncompressed_base > target) { + node = node->left; + } else { + result = node; + node = node->right; + } + } + + return (void *)(result); +} + + +/// Allocate and initialize a new Stream using the given base offsets. +static index_stream * +index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base, + lzma_vli stream_number, lzma_vli block_number_base, + lzma_allocator *allocator) +{ + index_stream *s = lzma_alloc(sizeof(index_stream), allocator); + if (s == NULL) + return NULL; + + s->node.uncompressed_base = uncompressed_base; + s->node.compressed_base = compressed_base; + s->node.parent = NULL; + s->node.left = NULL; + s->node.right = NULL; + + s->number = stream_number; + s->block_number_base = block_number_base; + + index_tree_init(&s->groups); + + s->record_count = 0; + s->index_list_size = 0; + s->stream_flags.version = UINT32_MAX; + s->stream_padding = 0; + + return s; +} + + +/// Free the memory allocated for a Stream and its Record groups. +static void +index_stream_end(void *node, lzma_allocator *allocator) +{ + index_stream *s = node; + index_tree_end(&s->groups, allocator, NULL); + return; +} + + +static lzma_index * +index_init_plain(lzma_allocator *allocator) +{ + lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); + if (i != NULL) { + index_tree_init(&i->streams); + i->uncompressed_size = 0; + i->total_size = 0; + i->record_count = 0; + i->index_list_size = 0; + i->prealloc = INDEX_GROUP_SIZE; + i->checks = 0; + } + + return i; +} + + +extern LZMA_API(lzma_index *) +lzma_index_init(lzma_allocator *allocator) +{ + lzma_index *i = index_init_plain(allocator); + index_stream *s = index_stream_init(0, 0, 1, 0, allocator); + if (i == NULL || s == NULL) { + index_stream_end(s, allocator); + lzma_free(i, allocator); + } + + index_tree_append(&i->streams, &s->node); + + return i; +} + + +extern LZMA_API(void) +lzma_index_end(lzma_index *i, lzma_allocator *allocator) +{ + // NOTE: If you modify this function, check also the bottom + // of lzma_index_cat(). + if (i != NULL) { + index_tree_end(&i->streams, allocator, &index_stream_end); + lzma_free(i, allocator); + } + + return; +} + + +extern void +lzma_index_prealloc(lzma_index *i, lzma_vli records) +{ + if (records > PREALLOC_MAX) + records = PREALLOC_MAX; + + i->prealloc = (size_t)(records); + return; +} + + +extern LZMA_API(uint64_t) +lzma_index_memusage(lzma_vli streams, lzma_vli blocks) +{ + // This calculates an upper bound that is only a little bit + // bigger than the exact maximum memory usage with the given + // parameters. + + // Typical malloc() overhead is 2 * sizeof(void *) but we take + // a little bit extra just in case. Using LZMA_MEMUSAGE_BASE + // instead would give too inaccurate estimate. + const size_t alloc_overhead = 4 * sizeof(void *); + + // Amount of memory needed for each Stream base structures. + // We assume that every Stream has at least one Block and + // thus at least one group. + const size_t stream_base = sizeof(index_stream) + + sizeof(index_group) + 2 * alloc_overhead; + + // Amount of memory needed per group. + const size_t group_base = sizeof(index_group) + + INDEX_GROUP_SIZE * sizeof(index_record) + + alloc_overhead; + + // Number of groups. There may actually be more, but that overhead + // has been taken into account in stream_base already. + const lzma_vli groups + = (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE; + + // Memory used by index_stream and index_group structures. + const uint64_t streams_mem = streams * stream_base; + const uint64_t groups_mem = groups * group_base; + + // Memory used by the base structure. + const uint64_t index_base = sizeof(lzma_index) + alloc_overhead; + + // Validate the arguments and catch integer overflows. + // Maximum number of Streams is "only" UINT32_MAX, because + // that limit is used by the tree containing the Streams. + const uint64_t limit = UINT64_MAX - index_base; + if (streams == 0 || streams > UINT32_MAX || blocks > LZMA_VLI_MAX + || streams > limit / stream_base + || groups > limit / group_base + || limit - streams_mem < groups_mem) + return UINT64_MAX; + + return index_base + streams_mem + groups_mem; +} + + +extern LZMA_API(uint64_t) +lzma_index_memused(const lzma_index *i) +{ + return lzma_index_memusage(i->streams.count, i->record_count); +} + + +extern LZMA_API(lzma_vli) +lzma_index_block_count(const lzma_index *i) +{ + return i->record_count; +} + + +extern LZMA_API(lzma_vli) +lzma_index_stream_count(const lzma_index *i) +{ + return i->streams.count; +} + + +extern LZMA_API(lzma_vli) +lzma_index_size(const lzma_index *i) +{ + return index_size(i->record_count, i->index_list_size); +} + + +extern LZMA_API(lzma_vli) +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API(lzma_vli) +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->record_count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +static lzma_vli +index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum, + lzma_vli record_count, lzma_vli index_list_size, + lzma_vli stream_padding) +{ + // Earlier Streams and Stream Paddings + Stream Header + // + Blocks + Index + Stream Footer + Stream Padding + // + // This might go over LZMA_VLI_MAX due to too big unpadded_sum + // when this function is used in lzma_index_append(). + lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE + + stream_padding + vli_ceil4(unpadded_sum); + if (file_size > LZMA_VLI_MAX) + return LZMA_VLI_UNKNOWN; + + // The same applies here. + file_size += index_size(record_count, index_list_size); + if (file_size > LZMA_VLI_MAX) + return LZMA_VLI_UNKNOWN; + + return file_size; +} + + +extern LZMA_API(lzma_vli) +lzma_index_file_size(const lzma_index *i) +{ + const index_stream *s = (const index_stream *)(i->streams.rightmost); + const index_group *g = (const index_group *)(s->groups.rightmost); + return index_file_size(s->node.compressed_base, + g == NULL ? 0 : g->records[g->last].unpadded_sum, + s->record_count, s->index_list_size, + s->stream_padding); +} + + +extern LZMA_API(lzma_vli) +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern LZMA_API(uint32_t) +lzma_index_checks(const lzma_index *i) +{ + uint32_t checks = i->checks; + + // Get the type of the Check of the last Stream too. + const index_stream *s = (const index_stream *)(i->streams.rightmost); + if (s->stream_flags.version != UINT32_MAX) + checks |= UINT32_C(1) << s->stream_flags.check; + + return checks; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) - index_size_unpadded( + i->record_count, i->index_list_size)) & 3; +} + + +extern LZMA_API(lzma_ret) +lzma_index_stream_flags(lzma_index *i, const lzma_stream_flags *stream_flags) +{ + if (i == NULL || stream_flags == NULL) + return LZMA_PROG_ERROR; + + // Validate the Stream Flags. + return_if_error(lzma_stream_flags_compare( + stream_flags, stream_flags)); + + index_stream *s = (index_stream *)(i->streams.rightmost); + s->stream_flags = *stream_flags; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding) +{ + if (i == NULL || stream_padding > LZMA_VLI_MAX + || (stream_padding & 3) != 0) + return LZMA_PROG_ERROR; + + index_stream *s = (index_stream *)(i->streams.rightmost); + + // Check that the new value won't make the file grow too big. + const lzma_vli old_stream_padding = s->stream_padding; + s->stream_padding = 0; + if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) { + s->stream_padding = old_stream_padding; + return LZMA_DATA_ERROR; + } + + s->stream_padding = stream_padding; + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) +{ + // Validate. + if (i == NULL || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX + || uncompressed_size > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + index_stream *s = (index_stream *)(i->streams.rightmost); + index_group *g = (index_group *)(s->groups.rightmost); + + const lzma_vli compressed_base = g == NULL ? 0 + : vli_ceil4(g->records[g->last].unpadded_sum); + const lzma_vli uncompressed_base = g == NULL ? 0 + : g->records[g->last].uncompressed_sum; + const uint32_t index_list_size_add = lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); + + // Check that the file size will stay within limits. + if (index_file_size(s->node.compressed_base, + compressed_base + unpadded_size, s->record_count + 1, + s->index_list_size + index_list_size_add, + s->stream_padding) == LZMA_VLI_UNKNOWN) + return LZMA_DATA_ERROR; + + // The size of the Index field must not exceed the maximum value + // that can be stored in the Backward Size field. + if (index_size(i->record_count + 1, + i->index_list_size + index_list_size_add) + > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + + if (g != NULL && g->last + 1 < g->allocated) { + // There is space in the last group at least for one Record. + ++g->last; + } else { + // We need to allocate a new group. + g = lzma_alloc(sizeof(index_group) + + i->prealloc * sizeof(index_record), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + g->last = 0; + g->allocated = i->prealloc; + + // Reset prealloc so that if the application happens to + // add new Records, the allocation size will be sane. + i->prealloc = INDEX_GROUP_SIZE; + + // Set the start offsets of this group. + g->node.uncompressed_base = uncompressed_base; + g->node.compressed_base = compressed_base; + g->number_base = s->record_count + 1; + + // Add the new group to the Stream. + index_tree_append(&s->groups, &g->node); + } + + // Add the new Record to the group. + g->records[g->last].uncompressed_sum + = uncompressed_base + uncompressed_size; + g->records[g->last].unpadded_sum + = compressed_base + unpadded_size; + + // Update the totals. + ++s->record_count; + s->index_list_size += index_list_size_add; + + i->total_size += vli_ceil4(unpadded_size); + i->uncompressed_size += uncompressed_size; + ++i->record_count; + i->index_list_size += index_list_size_add; + + return LZMA_OK; +} + + +/// Structure to pass info to index_cat_helper() +typedef struct { + /// Uncompressed size of the destination + lzma_vli uncompressed_size; + + /// Compressed file size of the destination + lzma_vli file_size; + + /// Same as above but for Block numbers + lzma_vli block_number_add; + + /// Number of Streams that were in the destination index before we + /// started appending new Streams from the source index. This is + /// used to fix the Stream numbering. + uint32_t stream_number_add; + + /// Destination index' Stream tree + index_tree *streams; + +} index_cat_info; + + +/// Add the Stream nodes from the source index to dest using recursion. +/// Simplest iterative traversal of the source tree wouldn't work, because +/// we update the pointers in nodes when moving them to the destination tree. +static void +index_cat_helper(const index_cat_info *info, index_stream *this) +{ + index_stream *left = (index_stream *)(this->node.left); + index_stream *right = (index_stream *)(this->node.right); + + if (left != NULL) + index_cat_helper(info, left); + + this->node.uncompressed_base += info->uncompressed_size; + this->node.compressed_base += info->file_size; + this->number += info->stream_number_add; + this->block_number_base += info->block_number_add; + index_tree_append(info->streams, &this->node); + + if (right != NULL) + index_cat_helper(info, right); + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + lzma_allocator *allocator) +{ + const lzma_vli dest_file_size = lzma_index_file_size(dest); + + // Check that we don't exceed the file size limits. + if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX + || dest->uncompressed_size + src->uncompressed_size + > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + // Check that the encoded size of the combined lzma_indexes stays + // within limits. In theory, this should be done only if we know + // that the user plans to actually combine the Streams and thus + // construct a single Index (probably rare). However, exceeding + // this limit is quite theoretical, so we do this check always + // to simplify things elsewhere. + { + const lzma_vli dest_size = index_size_unpadded( + dest->record_count, dest->index_list_size); + const lzma_vli src_size = index_size_unpadded( + src->record_count, src->index_list_size); + if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + } + + // Optimize the last group to minimize memory usage. Allocation has + // to be done before modifying dest or src. + { + index_stream *s = (index_stream *)(dest->streams.rightmost); + index_group *g = (index_group *)(s->groups.rightmost); + if (g != NULL && g->last + 1 < g->allocated) { + assert(g->node.left == NULL); + assert(g->node.right == NULL); + + index_group *newg = lzma_alloc(sizeof(index_group) + + (g->last + 1) + * sizeof(index_record), + allocator); + if (newg == NULL) + return LZMA_MEM_ERROR; + + newg->node = g->node; + newg->allocated = g->last + 1; + newg->last = g->last; + newg->number_base = g->number_base; + + memcpy(newg->records, g->records, newg->allocated + * sizeof(index_record)); + + if (g->node.parent != NULL) { + assert(g->node.parent->right == &g->node); + g->node.parent->right = &newg->node; + } + + if (s->groups.leftmost == &g->node) { + assert(s->groups.root == &g->node); + s->groups.leftmost = &newg->node; + s->groups.root = &newg->node; + } + + if (s->groups.rightmost == &g->node) + s->groups.rightmost = &newg->node; + + lzma_free(g, allocator); + } + } + + // Add all the Streams from src to dest. Update the base offsets + // of each Stream from src. + const index_cat_info info = { + .uncompressed_size = dest->uncompressed_size, + .file_size = dest_file_size, + .stream_number_add = dest->streams.count, + .block_number_add = dest->record_count, + .streams = &dest->streams, + }; + index_cat_helper(&info, (index_stream *)(src->streams.root)); + + // Update info about all the combined Streams. + dest->uncompressed_size += src->uncompressed_size; + dest->total_size += src->total_size; + dest->record_count += src->record_count; + dest->index_list_size += src->index_list_size; + dest->checks = lzma_index_checks(dest) | src->checks; + + // There's nothing else left in src than the base structure. + lzma_free(src, allocator); + + return LZMA_OK; +} + + +/// Duplicate an index_stream. +static index_stream * +index_dup_stream(const index_stream *src, lzma_allocator *allocator) +{ + // Catch a somewhat theoretical integer overflow. + if (src->record_count > PREALLOC_MAX) + return NULL; + + // Allocate and initialize a new Stream. + index_stream *dest = index_stream_init(src->node.compressed_base, + src->node.uncompressed_base, src->number, + src->block_number_base, allocator); + + // Return immediately if allocation failed or if there are + // no groups to duplicate. + if (dest == NULL || src->groups.leftmost == NULL) + return dest; + + // Copy the overall information. + dest->record_count = src->record_count; + dest->index_list_size = src->index_list_size; + dest->stream_flags = src->stream_flags; + dest->stream_padding = src->stream_padding; + + // Allocate memory for the Records. We put all the Records into + // a single group. It's simplest and also tends to make + // lzma_index_locate() a little bit faster with very big Indexes. + index_group *destg = lzma_alloc(sizeof(index_group) + + src->record_count * sizeof(index_record), + allocator); + if (destg == NULL) { + index_stream_end(dest, allocator); + return NULL; + } + + // Initialize destg. + destg->node.uncompressed_base = 0; + destg->node.compressed_base = 0; + destg->number_base = 1; + destg->allocated = src->record_count; + destg->last = src->record_count - 1; + + // Go through all the groups in src and copy the Records into destg. + const index_group *srcg = (const index_group *)(src->groups.leftmost); + size_t i = 0; + do { + memcpy(destg->records + i, srcg->records, + (srcg->last + 1) * sizeof(index_record)); + i += srcg->last + 1; + srcg = index_tree_next(&srcg->node); + } while (srcg != NULL); + + assert(i == destg->allocated); + + // Add the group to the new Stream. + index_tree_append(&dest->groups, &destg->node); + + return dest; +} + + +extern LZMA_API(lzma_index *) +lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +{ + // Allocate the base structure (no initial Stream). + lzma_index *dest = index_init_plain(allocator); + if (dest == NULL) + return NULL; + + // Copy the totals. + dest->uncompressed_size = src->uncompressed_size; + dest->total_size = src->total_size; + dest->record_count = src->record_count; + dest->index_list_size = src->index_list_size; + + // Copy the Streams and the groups in them. + const index_stream *srcstream + = (const index_stream *)(src->streams.leftmost); + do { + index_stream *deststream = index_dup_stream( + srcstream, allocator); + if (deststream == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + index_tree_append(&dest->streams, &deststream->node); + + srcstream = index_tree_next(&srcstream->node); + } while (srcstream != NULL); + + return dest; +} + + +/// Indexing for lzma_index_iter.internal[] +enum { + ITER_INDEX, + ITER_STREAM, + ITER_GROUP, + ITER_RECORD, + ITER_METHOD, +}; + + +/// Values for lzma_index_iter.internal[ITER_METHOD].s +enum { + ITER_METHOD_NORMAL, + ITER_METHOD_NEXT, + ITER_METHOD_LEFTMOST, +}; + + +static void +iter_set_info(lzma_index_iter *iter) +{ + const lzma_index *i = iter->internal[ITER_INDEX].p; + const index_stream *stream = iter->internal[ITER_STREAM].p; + const index_group *group = iter->internal[ITER_GROUP].p; + const size_t record = iter->internal[ITER_RECORD].s; + + // lzma_index_iter.internal must not contain a pointer to the last + // group in the index, because that may be reallocated by + // lzma_index_cat(). + if (group == NULL) { + // There are no groups. + assert(stream->groups.root == NULL); + iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; + + } else if (i->streams.rightmost != &stream->node + || stream->groups.rightmost != &group->node) { + // The group is not not the last group in the index. + iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; + + } else if (stream->groups.leftmost != &group->node) { + // The group isn't the only group in the Stream, thus we + // know that it must have a parent group i.e. it's not + // the root node. + assert(stream->groups.root != &group->node); + assert(group->node.parent->right == &group->node); + iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT; + iter->internal[ITER_GROUP].p = group->node.parent; + + } else { + // The Stream has only one group. + assert(stream->groups.root == &group->node); + assert(group->node.parent == NULL); + iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; + iter->internal[ITER_GROUP].p = NULL; + } + + iter->stream.number = stream->number; + iter->stream.block_count = stream->record_count; + iter->stream.compressed_offset = stream->node.compressed_base; + iter->stream.uncompressed_offset = stream->node.uncompressed_base; + + // iter->stream.flags will be NULL if the Stream Flags haven't been + // set with lzma_index_stream_flags(). + iter->stream.flags = stream->stream_flags.version == UINT32_MAX + ? NULL : &stream->stream_flags; + iter->stream.padding = stream->stream_padding; + + if (stream->groups.rightmost == NULL) { + // Stream has no Blocks. + iter->stream.compressed_size = index_size(0, 0) + + 2 * LZMA_STREAM_HEADER_SIZE; + iter->stream.uncompressed_size = 0; + } else { + const index_group *g = (const index_group *)( + stream->groups.rightmost); + + // Stream Header + Stream Footer + Index + Blocks + iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE + + index_size(stream->record_count, + stream->index_list_size) + + vli_ceil4(g->records[g->last].unpadded_sum); + iter->stream.uncompressed_size + = g->records[g->last].uncompressed_sum; + } + + if (group != NULL) { + iter->block.number_in_stream = group->number_base + record; + iter->block.number_in_file = iter->block.number_in_stream + + stream->block_number_base; + + iter->block.compressed_stream_offset + = record == 0 ? group->node.compressed_base + : vli_ceil4(group->records[ + record - 1].unpadded_sum); + iter->block.uncompressed_stream_offset + = record == 0 ? group->node.uncompressed_base + : group->records[record - 1].uncompressed_sum; + + iter->block.uncompressed_size + = group->records[record].uncompressed_sum + - iter->block.uncompressed_stream_offset; + iter->block.unpadded_size + = group->records[record].unpadded_sum + - iter->block.compressed_stream_offset; + iter->block.total_size = vli_ceil4(iter->block.unpadded_size); + + iter->block.compressed_stream_offset + += LZMA_STREAM_HEADER_SIZE; + + iter->block.compressed_file_offset + = iter->block.compressed_stream_offset + + iter->stream.compressed_offset; + iter->block.uncompressed_file_offset + = iter->block.uncompressed_stream_offset + + iter->stream.uncompressed_offset; + } + + return; +} + + +extern LZMA_API(void) +lzma_index_iter_init(lzma_index_iter *iter, const lzma_index *i) +{ + iter->internal[ITER_INDEX].p = i; + lzma_index_iter_rewind(iter); + return; +} + + +extern LZMA_API(void) +lzma_index_iter_rewind(lzma_index_iter *iter) +{ + iter->internal[ITER_STREAM].p = NULL; + iter->internal[ITER_GROUP].p = NULL; + iter->internal[ITER_RECORD].s = 0; + iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; + return; +} + + +extern LZMA_API(lzma_bool) +lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode) +{ + // Catch unsupported mode values. + if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK) + return true; + + const lzma_index *i = iter->internal[ITER_INDEX].p; + const index_stream *stream = iter->internal[ITER_STREAM].p; + const index_group *group = NULL; + size_t record = iter->internal[ITER_RECORD].s; + + // If we are being asked for the next Stream, leave group to NULL + // so that the rest of the this function thinks that this Stream + // has no groups and will thus go to the next Stream. + if (mode != LZMA_INDEX_ITER_STREAM) { + // Get the pointer to the current group. See iter_set_inf() + // for explanation. + switch (iter->internal[ITER_METHOD].s) { + case ITER_METHOD_NORMAL: + group = iter->internal[ITER_GROUP].p; + break; + + case ITER_METHOD_NEXT: + group = index_tree_next(iter->internal[ITER_GROUP].p); + break; + + case ITER_METHOD_LEFTMOST: + group = (const index_group *)( + stream->groups.leftmost); + break; + } + } + +again: + if (stream == NULL) { + // We at the beginning of the lzma_index. + // Locate the first Stream. + stream = (const index_stream *)(i->streams.leftmost); + if (mode >= LZMA_INDEX_ITER_BLOCK) { + // Since we are being asked to return information + // about the first a Block, skip Streams that have + // no Blocks. + while (stream->groups.leftmost == NULL) { + stream = index_tree_next(&stream->node); + if (stream == NULL) + return true; + } + } + + // Start from the first Record in the Stream. + group = (const index_group *)(stream->groups.leftmost); + record = 0; + + } else if (group != NULL && record < group->last) { + // The next Record is in the same group. + ++record; + + } else { + // This group has no more Records or this Stream has + // no Blocks at all. + record = 0; + + // If group is not NULL, this Stream has at least one Block + // and thus at least one group. Find the next group. + if (group != NULL) + group = index_tree_next(&group->node); + + if (group == NULL) { + // This Stream has no more Records. Find the next + // Stream. If we are being asked to return information + // about a Block, we skip empty Streams. + do { + stream = index_tree_next(&stream->node); + if (stream == NULL) + return true; + } while (mode >= LZMA_INDEX_ITER_BLOCK + && stream->groups.leftmost == NULL); + + group = (const index_group *)( + stream->groups.leftmost); + } + } + + if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) { + // We need to look for the next Block again if this Block + // is empty. + if (record == 0) { + if (group->node.uncompressed_base + == group->records[0].uncompressed_sum) + goto again; + } else if (group->records[record - 1].uncompressed_sum + == group->records[record].uncompressed_sum) { + goto again; + } + } + + iter->internal[ITER_STREAM].p = stream; + iter->internal[ITER_GROUP].p = group; + iter->internal[ITER_RECORD].s = record; + + iter_set_info(iter); + + return false; +} + + +extern LZMA_API(lzma_bool) +lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target) +{ + const lzma_index *i = iter->internal[ITER_INDEX].p; + + // If the target is past the end of the file, return immediately. + if (i->uncompressed_size <= target) + return true; + + // Locate the Stream containing the target offset. + const index_stream *stream = index_tree_locate(&i->streams, target); + assert(stream != NULL); + target -= stream->node.uncompressed_base; + + // Locate the group containing the target offset. + const index_group *group = index_tree_locate(&stream->groups, target); + assert(group != NULL); + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sum is greater than target. + // This is because we want the rightmost Record that fullfills the + // search criterion. It is possible that there are empty Blocks; + // we don't want to return them. + size_t left = 0; + size_t right = group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (group->records[pos].uncompressed_sum <= target) + left = pos + 1; + else + right = pos; + } + + iter->internal[ITER_STREAM].p = stream; + iter->internal[ITER_GROUP].p = group; + iter->internal[ITER_RECORD].s = left; + + iter_set_info(iter); + + return false; +} Index: contrib/xz/src/liblzma/common/block_buffer_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/block_buffer_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/block_buffer_decoder.c (revision 0) @@ -0,0 +1,80 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_decoder.c +/// \brief Single-call .xz Block decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_block_buffer_decode(lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if (in_pos == NULL || (in == NULL && *in_pos != in_size) + || *in_pos > in_size || out_pos == NULL + || (out == NULL && *out_pos != out_size) + || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the Block decoder. + lzma_next_coder block_decoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_block_decoder_init( + &block_decoder, allocator, block); + + if (ret == LZMA_OK) { + // Save the positions so that we can restore them in case + // an error occurs. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding. + ret = block_decoder.code(block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size + || *out_pos == out_size); + + // If all the input was consumed, then the + // input is truncated, even if the output + // buffer is also full. This is because + // processing the last byte of the Block + // never produces output. + // + // NOTE: This assumption may break when new + // filters are added, if the end marker of + // the filter doesn't consume at least one + // complete byte. + if (*in_pos == in_size) + ret = LZMA_DATA_ERROR; + else + ret = LZMA_BUF_ERROR; + } + + // Restore the positions. + *in_pos = in_start; + *out_pos = out_start; + } + } + + // Free the decoder memory. This needs to be done even if + // initialization fails, because the internal API doesn't + // require the initialization function to free its memory on error. + lzma_next_end(&block_decoder, allocator); + + return ret; +} Index: contrib/xz/src/liblzma/common/easy_decoder_memusage.c =================================================================== --- contrib/xz/src/liblzma/common/easy_decoder_memusage.c (revision 0) +++ contrib/xz/src/liblzma/common/easy_decoder_memusage.c (revision 0) @@ -0,0 +1,24 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_decoder_memusage.c +/// \brief Decoder memory usage calculation to match easy encoder presets +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(uint64_t) +lzma_easy_decoder_memusage(uint32_t preset) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return UINT32_MAX; + + return lzma_raw_decoder_memusage(opt_easy.filters); +} Index: contrib/xz/src/liblzma/common/stream_buffer_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/stream_buffer_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/stream_buffer_decoder.c (revision 0) @@ -0,0 +1,91 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_buffer_decoder.c +/// \brief Single-call .xz Stream decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags, + lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Sanity checks + if (in_pos == NULL || (in == NULL && *in_pos != in_size) + || *in_pos > in_size || out_pos == NULL + || (out == NULL && *out_pos != out_size) + || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Catch flags that are not allowed in buffer-to-buffer decoding. + if (flags & LZMA_TELL_ANY_CHECK) + return LZMA_PROG_ERROR; + + // Initialize the Stream decoder. + // TODO: We need something to tell the decoder that it can use the + // output buffer as workspace, and thus save significant amount of RAM. + lzma_next_coder stream_decoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_stream_decoder_init( + &stream_decoder, allocator, *memlimit, flags); + + if (ret == LZMA_OK) { + // Save the positions so that we can restore them in case + // an error occurs. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding. + ret = stream_decoder.code(stream_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // Something went wrong, restore the positions. + *in_pos = in_start; + *out_pos = out_start; + + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size + || *out_pos == out_size); + + // If all the input was consumed, then the + // input is truncated, even if the output + // buffer is also full. This is because + // processing the last byte of the Stream + // never produces output. + if (*in_pos == in_size) + ret = LZMA_DATA_ERROR; + else + ret = LZMA_BUF_ERROR; + + } else if (ret == LZMA_MEMLIMIT_ERROR) { + // Let the caller know how much memory would + // have been needed. + uint64_t memusage; + (void)stream_decoder.memconfig( + stream_decoder.coder, + memlimit, &memusage, 0); + } + } + } + + // Free the decoder memory. This needs to be done even if + // initialization fails, because the internal API doesn't + // require the initialization function to free its memory on error. + lzma_next_end(&stream_decoder, allocator); + + return ret; +} Index: contrib/xz/src/liblzma/common/index.h =================================================================== --- contrib/xz/src/liblzma/common/index.h (revision 0) +++ contrib/xz/src/liblzma/common/index.h (revision 0) @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.h +/// \brief Handling of Index +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_H +#define LZMA_INDEX_H + +#include "common.h" + + +/// Minimum Unpadded Size +#define UNPADDED_SIZE_MIN LZMA_VLI_C(5) + +/// Maximum Unpadded Size +#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) + + +/// Get the size of the Index Padding field. This is needed by Index encoder +/// and decoder, but applications should have no use for this. +extern uint32_t lzma_index_padding_size(const lzma_index *i); + + +/// Set for how many Records to allocate memory the next time +/// lzma_index_append() needs to allocate space for a new Record. +/// This is used only by the Index decoder. +extern void lzma_index_prealloc(lzma_index *i, lzma_vli records); + + +/// Round the variable-length integer to the next multiple of four. +static inline lzma_vli +vli_ceil4(lzma_vli vli) +{ + assert(vli <= LZMA_VLI_MAX); + return (vli + 3) & ~LZMA_VLI_C(3); +} + + +/// Calculate the size of the Index field excluding Index Padding +static inline lzma_vli +index_size_unpadded(lzma_vli count, lzma_vli index_list_size) +{ + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + lzma_vli_size(count) + index_list_size + 4; +} + + +/// Calculate the size of the Index field including Index Padding +static inline lzma_vli +index_size(lzma_vli count, lzma_vli index_list_size) +{ + return vli_ceil4(index_size_unpadded(count, index_list_size)); +} + + +/// Calculate the total size of the Stream +static inline lzma_vli +index_stream_size(lzma_vli blocks_size, + lzma_vli count, lzma_vli index_list_size) +{ + return LZMA_STREAM_HEADER_SIZE + blocks_size + + index_size(count, index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + +#endif Index: contrib/xz/src/liblzma/common/filter_buffer_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/filter_buffer_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/filter_buffer_encoder.c (revision 0) @@ -0,0 +1,54 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_buffer_encoder.c +/// \brief Single-call raw encoding +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" + + +extern LZMA_API(lzma_ret) +lzma_raw_buffer_encode(const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, + size_t *out_pos, size_t out_size) +{ + // Validate what isn't validated later in filter_common.c. + if ((in == NULL && in_size != 0) || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the encoder + lzma_next_coder next = LZMA_NEXT_CODER_INIT; + return_if_error(lzma_raw_encoder_init(&next, allocator, filters)); + + // Store the output position so that we can restore it if + // something goes wrong. + const size_t out_start = *out_pos; + + // Do the actual encoding and free coder's memory. + size_t in_pos = 0; + lzma_ret ret = next.code(next.coder, allocator, in, &in_pos, in_size, + out, out_pos, out_size, LZMA_FINISH); + lzma_next_end(&next, allocator); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Output buffer was too small. + assert(*out_pos == out_size); + ret = LZMA_BUF_ERROR; + } + + // Restore the output position. + *out_pos = out_start; + } + + return ret; +} Index: contrib/xz/src/liblzma/common/stream_flags_common.c =================================================================== --- contrib/xz/src/liblzma/common/stream_flags_common.c (revision 0) +++ contrib/xz/src/liblzma/common/stream_flags_common.c (revision 0) @@ -0,0 +1,47 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_common.c +/// \brief Common stuff for Stream flags coders +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +const uint8_t lzma_header_magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; +const uint8_t lzma_footer_magic[2] = { 0x59, 0x5A }; + + +extern LZMA_API(lzma_ret) +lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) +{ + // We can compare only version 0 structures. + if (a->version != 0 || b->version != 0) + return LZMA_OPTIONS_ERROR; + + // Check type + if ((unsigned int)(a->check) > LZMA_CHECK_ID_MAX + || (unsigned int)(b->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (a->check != b->check) + return LZMA_DATA_ERROR; + + // Backward Sizes are compared only if they are known in both. + if (a->backward_size != LZMA_VLI_UNKNOWN + && b->backward_size != LZMA_VLI_UNKNOWN) { + if (!is_backward_size_valid(a) || !is_backward_size_valid(b)) + return LZMA_PROG_ERROR; + + if (a->backward_size != b->backward_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/vli_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/vli_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/vli_decoder.c (revision 0) @@ -0,0 +1,86 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_decoder.c +/// \brief Decodes variable-length integers +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_ret) +lzma_vli_decode(lzma_vli *restrict vli, size_t *vli_pos, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) { + vli_pos = &vli_pos_internal; + *vli = 0; + + // If there's no input, use LZMA_DATA_ERROR. This way it is + // easy to decode VLIs from buffers that have known size, + // and get the correct error code in case the buffer is + // too short. + if (*in_pos >= in_size) + return LZMA_DATA_ERROR; + + } else { + // Initialize *vli when starting to decode a new integer. + if (*vli_pos == 0) + *vli = 0; + + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX + || (*vli >> (*vli_pos * 7)) != 0) + return LZMA_PROG_ERROR;; + + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + } + + do { + // Read the next byte. Use a temporary variable so that we + // can update *in_pos immediately. + const uint8_t byte = in[*in_pos]; + ++*in_pos; + + // Add the newly read byte to *vli. + *vli += (lzma_vli)(byte & 0x7F) << (*vli_pos * 7); + ++*vli_pos; + + // Check if this is the last byte of a multibyte integer. + if ((byte & 0x80) == 0) { + // We don't allow using variable-length integers as + // padding i.e. the encoding must use the most the + // compact form. + if (byte == 0x00 && *vli_pos > 1) + return LZMA_DATA_ERROR; + + return vli_pos == &vli_pos_internal + ? LZMA_OK : LZMA_STREAM_END; + } + + // There is at least one more byte coming. If we have already + // read maximum number of bytes, the integer is considered + // corrupt. + // + // If we need bigger integers in future, old versions liblzma + // will confusingly indicate the file being corrupt istead of + // unsupported. I suppose it's still better this way, because + // in the foreseeable future (writing this in 2008) the only + // reason why files would appear having over 63-bit integers + // is that the files are simply corrupt. + if (*vli_pos == LZMA_VLI_BYTES_MAX) + return LZMA_DATA_ERROR; + + } while (*in_pos < in_size); + + return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK; +} Index: contrib/xz/src/liblzma/common/stream_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/stream_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/stream_decoder.c (revision 0) @@ -0,0 +1,451 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.c +/// \brief Decodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" +#include "block_decoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, + SEQ_STREAM_PADDING, + } sequence; + + /// Block or Metadata decoder. This takes little memory and the same + /// data structure can be used to decode every Block Header, so it's + /// a good idea to have a separate lzma_next_coder structure for it. + lzma_next_coder block_decoder; + + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. + lzma_block block_options; + + /// Stream Flags from Stream Header + lzma_stream_flags stream_flags; + + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// If true, LZMA_NO_CHECK is returned if the Stream has + /// no integrity check. + bool tell_no_check; + + /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has + /// an integrity check that isn't supported by this liblzma build. + bool tell_unsupported_check; + + /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. + bool tell_any_check; + + /// If true, we will decode concatenated Streams that possibly have + /// Stream Padding between or after them. LZMA_STREAM_END is returned + /// once the application isn't giving us any new input, and we aren't + /// in the middle of a Stream, and possible Stream Padding is a + /// multiple of four bytes. + bool concatenated; + + /// When decoding concatenated Streams, this is true as long as we + /// are decoding the first Stream. This is needed to avoid misleading + /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic + /// bytes. + bool first_stream; + + /// Write position in buffer[] and position in Stream Padding + size_t pos; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator) +{ + // Initialize the Index hash used to verify the Index. + coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); + if (coder->index_hash == NULL) + return LZMA_MEM_ERROR; + + // Reset the rest of the variables. + coder->sequence = SEQ_STREAM_HEADER; + coder->pos = 0; + + return LZMA_OK; +} + + +static lzma_ret +stream_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. + while (true) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Header. + const lzma_ret ret = lzma_stream_header_decode( + &coder->stream_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR && !coder->first_stream + ? LZMA_DATA_ERROR : ret; + + // If we are decoding concatenated Streams, and the later + // Streams have invalid Header Magic Bytes, we give + // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. + coder->first_stream = false; + + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_*_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; + + // Detect if there's no integrity check or if it is + // unsupported if those were requested by the application. + if (coder->tell_no_check && coder->stream_flags.check + == LZMA_CHECK_NONE) + return LZMA_NO_CHECK; + + if (coder->tell_unsupported_check + && !lzma_check_is_supported( + coder->stream_flags.check)) + return LZMA_UNSUPPORTED_CHECK; + + if (coder->tell_any_check) + return LZMA_GET_CHECK; + } + + // Fall through + + case SEQ_BLOCK_HEADER: { + if (*in_pos >= in_size) + return LZMA_OK; + + if (coder->pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == 0x00) { + coder->sequence = SEQ_INDEX; + break; + } + + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); + } + + // Copy the Block Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + coder->block_options.header_size); + + // Return if we didn't get the whole Block Header yet. + if (coder->pos < coder->block_options.header_size) + return LZMA_OK; + + coder->pos = 0; + + // Version 0 is currently the only possible version. + coder->block_options.version = 0; + + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + coder->block_options.filters = filters; + + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); + + // Check the memory usage limit. + const uint64_t memusage = lzma_raw_decoder_memusage(filters); + lzma_ret ret; + + if (memusage == UINT64_MAX) { + // One or more unknown Filter IDs. + ret = LZMA_OPTIONS_ERROR; + } else { + // Now we can set coder->memusage since we know that + // the filter chain is valid. We don't want + // lzma_memusage() to return UINT64_MAX in case of + // invalid filter chain. + coder->memusage = memusage; + + if (memusage > coder->memlimit) { + // The chain would need too much memory. + ret = LZMA_MEMLIMIT_ERROR; + } else { + // Memory usage is OK. + // Initialize the Block decoder. + ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, + &coder->block_options); + } + } + + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) + lzma_free(filters[i].options, allocator); + + coder->block_options.filters = NULL; + + // Check if memory usage calculation and Block enocoder + // initialization succeeded. + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_BLOCK; + } + + // Fall through + + case SEQ_BLOCK: { + const lzma_ret ret = coder->block_decoder.code( + coder->block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + if (ret != LZMA_STREAM_END) + return ret; + + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_unpadded_size( + &coder->block_options), + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_INDEX: { + // If we don't have any input, don't call + // lzma_index_hash_decode() since it would return + // LZMA_BUF_ERROR, which we must not do here. + if (*in_pos >= in_size) + return LZMA_OK; + + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + return ret; + + coder->sequence = SEQ_STREAM_FOOTER; + } + + // Fall through + + case SEQ_STREAM_FOOTER: { + // Copy the Stream Footer to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Footer yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Footer. The decoder gives + // LZMA_FORMAT_ERROR if the magic bytes don't match, + // so convert that return code to LZMA_DATA_ERROR. + lzma_stream_flags footer_flags; + const lzma_ret ret = lzma_stream_footer_decode( + &footer_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR + ? LZMA_DATA_ERROR : ret; + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + return_if_error(lzma_stream_flags_compare( + &coder->stream_flags, &footer_flags)); + + if (!coder->concatenated) + return LZMA_STREAM_END; + + coder->sequence = SEQ_STREAM_PADDING; + } + + // Fall through + + case SEQ_STREAM_PADDING: + assert(coder->concatenated); + + // Skip over possible Stream Padding. + while (true) { + if (*in_pos >= in_size) { + // Unless LZMA_FINISH was used, we cannot + // know if there's more input coming later. + if (action != LZMA_FINISH) + return LZMA_OK; + + // Stream Padding must be a multiple of + // four bytes. + return coder->pos == 0 + ? LZMA_STREAM_END + : LZMA_DATA_ERROR; + } + + // If the byte is not zero, it probably indicates + // beginning of a new Stream (or the file is corrupt). + if (in[*in_pos] != 0x00) + break; + + ++*in_pos; + coder->pos = (coder->pos + 1) & 3; + } + + // Stream Padding must be a multiple of four bytes (empty + // Stream Padding is OK). + if (coder->pos != 0) { + ++*in_pos; + return LZMA_DATA_ERROR; + } + + // Prepare to decode the next Stream. + return_if_error(stream_decoder_reset(coder, allocator)); + break; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + // Never reached +} + + +static void +stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->block_decoder, allocator); + lzma_index_hash_end(coder->index_hash, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_check +stream_decoder_get_check(const lzma_coder *coder) +{ + return coder->stream_flags.check; +} + + +static lzma_ret +stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags) +{ + lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator); + + if (memlimit == 0) + return LZMA_PROG_ERROR; + + if (flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_decode; + next->end = &stream_decoder_end; + next->get_check = &stream_decoder_get_check; + next->memconfig = &stream_decoder_memconfig; + + next->coder->block_decoder = LZMA_NEXT_CODER_INIT; + next->coder->index_hash = NULL; + } + + next->coder->memlimit = memlimit; + next->coder->memusage = LZMA_MEMUSAGE_BASE; + next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0; + next->coder->tell_unsupported_check + = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; + next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; + next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0; + next->coder->first_stream = true; + + return stream_decoder_reset(next->coder, allocator); +} + + +extern LZMA_API(lzma_ret) +lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) +{ + lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/alone_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/alone_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/alone_decoder.c (revision 0) @@ -0,0 +1,232 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.c +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "alone_decoder.h" +#include "lzma_decoder.h" +#include "lz_decoder.h" + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + SEQ_PROPERTIES, + SEQ_DICTIONARY_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_CODER_INIT, + SEQ_CODE, + } sequence; + + /// Position in the header fields + size_t pos; + + /// Uncompressed size decoded from the header + lzma_vli uncompressed_size; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// Options decoded from the header needed to initialize + /// the LZMA decoder + lzma_options_lzma options; +}; + + +static lzma_ret +alone_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + while (*out_pos < out_size + && (coder->sequence == SEQ_CODE || *in_pos < in_size)) + switch (coder->sequence) { + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos])) + return LZMA_FORMAT_ERROR; + + coder->sequence = SEQ_DICTIONARY_SIZE; + ++*in_pos; + break; + + case SEQ_DICTIONARY_SIZE: + coder->options.dict_size + |= (size_t)(in[*in_pos]) << (coder->pos * 8); + + if (++coder->pos == 4) { + if (coder->options.dict_size != UINT32_MAX) { + // A hack to ditch tons of false positives: + // We allow only dictionary sizes that are + // 2^n or 2^n + 2^(n-1). LZMA_Alone created + // only files with 2^n, but accepts any + // dictionary size. If someone complains, this + // will be reconsidered. + uint32_t d = coder->options.dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + if (d != coder->options.dict_size) + return LZMA_FORMAT_ERROR; + } + + coder->pos = 0; + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + } + + ++*in_pos; + break; + + case SEQ_UNCOMPRESSED_SIZE: + coder->uncompressed_size + |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); + ++*in_pos; + if (++coder->pos < 8) + break; + + // Another hack to ditch false positives: Assume that + // if the uncompressed size is known, it must be less + // than 256 GiB. Again, if someone complains, this + // will be reconsidered. + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN + && coder->uncompressed_size + >= (LZMA_VLI_C(1) << 38)) + return LZMA_FORMAT_ERROR; + + // Calculate the memory usage so that it is ready + // for SEQ_CODER_INIT. + coder->memusage = lzma_lzma_decoder_memusage(&coder->options) + + LZMA_MEMUSAGE_BASE; + + coder->pos = 0; + coder->sequence = SEQ_CODER_INIT; + + // Fall through + + case SEQ_CODER_INIT: { + if (coder->memusage > coder->memlimit) + return LZMA_MEMLIMIT_ERROR; + + lzma_filter_info filters[2] = { + { + .init = &lzma_lzma_decoder_init, + .options = &coder->options, + }, { + .init = NULL, + } + }; + + const lzma_ret ret = lzma_next_filter_init(&coder->next, + allocator, filters); + if (ret != LZMA_OK) + return ret; + + // Use a hack to set the uncompressed size. + lzma_lz_decoder_uncompressed(coder->next.coder, + coder->uncompressed_size); + + coder->sequence = SEQ_CODE; + break; + } + + case SEQ_CODE: { + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +alone_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit) +{ + lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator); + + if (memlimit == 0) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &alone_decode; + next->end = &alone_decoder_end; + next->memconfig = &alone_decoder_memconfig; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + next->coder->sequence = SEQ_PROPERTIES; + next->coder->pos = 0; + next->coder->options.dict_size = 0; + next->coder->options.preset_dict = NULL; + next->coder->options.preset_dict_size = 0; + next->coder->uncompressed_size = 0; + next->coder->memlimit = memlimit; + next->coder->memusage = LZMA_MEMUSAGE_BASE; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit) +{ + lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/stream_flags_common.h =================================================================== --- contrib/xz/src/liblzma/common/stream_flags_common.h (revision 0) +++ contrib/xz/src/liblzma/common/stream_flags_common.h (revision 0) @@ -0,0 +1,33 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_common.h +/// \brief Common stuff for Stream flags coders +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_FLAGS_COMMON_H +#define LZMA_STREAM_FLAGS_COMMON_H + +#include "common.h" + +/// Size of the Stream Flags field +#define LZMA_STREAM_FLAGS_SIZE 2 + +extern const uint8_t lzma_header_magic[6]; +extern const uint8_t lzma_footer_magic[2]; + + +static inline bool +is_backward_size_valid(const lzma_stream_flags *options) +{ + return options->backward_size >= LZMA_BACKWARD_SIZE_MIN + && options->backward_size <= LZMA_BACKWARD_SIZE_MAX + && (options->backward_size & 3) == 0; +} + +#endif Index: contrib/xz/src/liblzma/common/filter_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/filter_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/filter_encoder.c (revision 0) @@ -0,0 +1,298 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_encoder.h" +#include "filter_common.h" +#include "lzma_encoder.h" +#include "lzma2_encoder.h" +#include "subblock_encoder.h" +#include "simple_encoder.h" +#include "delta_encoder.h" + + +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + + /// Calculates the minimum sane size for Blocks (or other types of + /// chunks) to which the input data can be split to make + /// multithreaded encoding possible. If this is NULL, it is assumed + /// that the encoder is fast enough with single thread. + lzma_vli (*chunk_size)(const void *options); + + /// Tells the size of the Filter Properties field. If options are + /// invalid, UINT32_MAX is returned. If this is NULL, props_size_fixed + /// is used. + lzma_ret (*props_size_get)(uint32_t *size, const void *options); + uint32_t props_size_fixed; + + /// Encodes Filter Properties. + /// + /// \return - LZMA_OK: Properties encoded successfully. + /// - LZMA_OPTIONS_ERROR: Unsupported options + /// - LZMA_PROG_ERROR: Invalid options or not enough + /// output space + lzma_ret (*props_encode)(const void *options, uint8_t *out); + +} lzma_filter_encoder; + + +static const lzma_filter_encoder encoders[] = { +#ifdef HAVE_ENCODER_LZMA1 + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_encoder_init, + .memusage = &lzma_lzma_encoder_memusage, + .chunk_size = NULL, // FIXME + .props_size_get = NULL, + .props_size_fixed = 5, + .props_encode = &lzma_lzma_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .init = &lzma_lzma2_encoder_init, + .memusage = &lzma_lzma2_encoder_memusage, + .chunk_size = NULL, // FIXME + .props_size_get = NULL, + .props_size_fixed = 1, + .props_encode = &lzma_lzma2_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_SUBBLOCK + { + .id = LZMA_FILTER_SUBBLOCK, + .init = &lzma_subblock_encoder_init, +// .memusage = &lzma_subblock_encoder_memusage, + .chunk_size = NULL, + .props_size_get = NULL, + .props_size_fixed = 0, + .props_encode = NULL, + }, +#endif +#ifdef HAVE_ENCODER_X86 + { + .id = LZMA_FILTER_X86, + .init = &lzma_simple_x86_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_POWERPC + { + .id = LZMA_FILTER_POWERPC, + .init = &lzma_simple_powerpc_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_IA64 + { + .id = LZMA_FILTER_IA64, + .init = &lzma_simple_ia64_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_ARM + { + .id = LZMA_FILTER_ARM, + .init = &lzma_simple_arm_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_ARMTHUMB + { + .id = LZMA_FILTER_ARMTHUMB, + .init = &lzma_simple_armthumb_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_SPARC + { + .id = LZMA_FILTER_SPARC, + .init = &lzma_simple_sparc_encoder_init, + .memusage = NULL, + .chunk_size = NULL, + .props_size_get = &lzma_simple_props_size, + .props_encode = &lzma_simple_props_encode, + }, +#endif +#ifdef HAVE_ENCODER_DELTA + { + .id = LZMA_FILTER_DELTA, + .init = &lzma_delta_encoder_init, + .memusage = &lzma_delta_coder_memusage, + .chunk_size = NULL, + .props_size_get = NULL, + .props_size_fixed = 1, + .props_encode = &lzma_delta_props_encode, + }, +#endif +}; + + +static const lzma_filter_encoder * +encoder_find(lzma_vli id) +{ + for (size_t i = 0; i < ARRAY_SIZE(encoders); ++i) + if (encoders[i].id == id) + return encoders + i; + + return NULL; +} + + +extern LZMA_API(lzma_bool) +lzma_filter_encoder_is_supported(lzma_vli id) +{ + return encoder_find(id) != NULL; +} + + +extern LZMA_API(lzma_ret) +lzma_filters_update(lzma_stream *strm, const lzma_filter *filters) +{ + if (strm->internal->next.update == NULL) + return LZMA_PROG_ERROR; + + // Validate the filter chain. + if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // The actual filter chain in the encoder is reversed. Some things + // still want the normal order chain, so we provide both. + size_t count = 1; + while (filters[count].id != LZMA_VLI_UNKNOWN) + ++count; + + lzma_filter reversed_filters[LZMA_FILTERS_MAX + 1]; + for (size_t i = 0; i < count; ++i) + reversed_filters[count - i - 1] = filters[i]; + + reversed_filters[count].id = LZMA_VLI_UNKNOWN; + + return strm->internal->next.update(strm->internal->next.coder, + strm->allocator, filters, reversed_filters); +} + + +extern lzma_ret +lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options) +{ + return lzma_raw_coder_init(next, allocator, + options, (lzma_filter_find)(&encoder_find), true); +} + + +extern LZMA_API(lzma_ret) +lzma_raw_encoder(lzma_stream *strm, const lzma_filter *options) +{ + lzma_next_strm_init(lzma_raw_coder_init, strm, options, + (lzma_filter_find)(&encoder_find), true); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(uint64_t) +lzma_raw_encoder_memusage(const lzma_filter *filters) +{ + return lzma_raw_coder_memusage( + (lzma_filter_find)(&encoder_find), filters); +} + + +extern LZMA_API(lzma_vli) +lzma_chunk_size(const lzma_filter *filters) +{ + lzma_vli max = 0; + + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + const lzma_filter_encoder *const fe + = encoder_find(filters[i].id); + if (fe->chunk_size != NULL) { + const lzma_vli size + = fe->chunk_size(filters[i].options); + if (size == LZMA_VLI_UNKNOWN) + return LZMA_VLI_UNKNOWN; + + if (size > max) + max = size; + } + } + + return max; +} + + +extern LZMA_API(lzma_ret) +lzma_properties_size(uint32_t *size, const lzma_filter *filter) +{ + const lzma_filter_encoder *const fe = encoder_find(filter->id); + if (fe == NULL) { + // Unknown filter - if the Filter ID is a proper VLI, + // return LZMA_OPTIONS_ERROR instead of LZMA_PROG_ERROR, + // because it's possible that we just don't have support + // compiled in for the requested filter. + return filter->id <= LZMA_VLI_MAX + ? LZMA_OPTIONS_ERROR : LZMA_PROG_ERROR; + } + + if (fe->props_size_get == NULL) { + // No props_size_get() function, use props_size_fixed. + *size = fe->props_size_fixed; + return LZMA_OK; + } + + return fe->props_size_get(size, filter->options); +} + + +extern LZMA_API(lzma_ret) +lzma_properties_encode(const lzma_filter *filter, uint8_t *props) +{ + const lzma_filter_encoder *const fe = encoder_find(filter->id); + if (fe == NULL) + return LZMA_PROG_ERROR; + + if (fe->props_encode == NULL) + return LZMA_OK; + + return fe->props_encode(filter->options, props); +} Index: contrib/xz/src/liblzma/common/stream_decoder.h =================================================================== --- contrib/xz/src/liblzma/common/stream_decoder.h (revision 0) +++ contrib/xz/src/liblzma/common/stream_decoder.h (revision 0) @@ -0,0 +1,21 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.h +/// \brief Decodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_DECODER_H +#define LZMA_STREAM_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_stream_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, uint64_t memlimit, uint32_t flags); + +#endif Index: contrib/xz/src/liblzma/common/filter_flags_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/filter_flags_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/filter_flags_decoder.c (revision 0) @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_decoder.c +/// \brief Decodes a Filter Flags field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_decode( + lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // Set the pointer to NULL so the caller can always safely free it. + filter->options = NULL; + + // Filter ID + return_if_error(lzma_vli_decode(&filter->id, NULL, + in, in_pos, in_size)); + + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_DATA_ERROR; + + // Size of Properties + lzma_vli props_size; + return_if_error(lzma_vli_decode(&props_size, NULL, + in, in_pos, in_size)); + + // Filter Properties + if (in_size - *in_pos < props_size) + return LZMA_DATA_ERROR; + + const lzma_ret ret = lzma_properties_decode( + filter, allocator, in + *in_pos, props_size); + + *in_pos += props_size; + + return ret; +} Index: contrib/xz/src/liblzma/common/block_header_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/block_header_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/block_header_decoder.c (revision 0) @@ -0,0 +1,116 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_decoder.c +/// \brief Decodes Block Header from .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +static void +free_properties(lzma_block *block, lzma_allocator *allocator) +{ + // Free allocated filter options. The last array member is not + // touched after the initialization in the beginning of + // lzma_block_header_decode(), so we don't need to touch that here. + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) { + lzma_free(block->filters[i].options, allocator); + block->filters[i].id = LZMA_VLI_UNKNOWN; + block->filters[i].options = NULL; + } + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_block_header_decode(lzma_block *block, + lzma_allocator *allocator, const uint8_t *in) +{ + // NOTE: We consider the header to be corrupt not only when the + // CRC32 doesn't match, but also when variable-length integers + // are invalid or over 63 bits, or if the header is too small + // to contain the claimed information. + + // Initialize the filter options array. This way the caller can + // safely free() the options even if an error occurs in this function. + for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) { + block->filters[i].id = LZMA_VLI_UNKNOWN; + block->filters[i].options = NULL; + } + + // Always zero for now. + block->version = 0; + + // Validate Block Header Size and Check type. The caller must have + // already set these, so it is a programming error if this test fails. + if (lzma_block_header_size_decode(in[0]) != block->header_size + || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + // Exclude the CRC32 field. + const size_t in_size = block->header_size - 4; + + // Verify CRC32 + if (lzma_crc32(in, in_size, 0) != unaligned_read32le(in + in_size)) + return LZMA_DATA_ERROR; + + // Check for unsupported flags. + if (in[1] & 0x3C) + return LZMA_OPTIONS_ERROR; + + // Start after the Block Header Size and Block Flags fields. + size_t in_pos = 2; + + // Compressed Size + if (in[1] & 0x40) { + return_if_error(lzma_vli_decode(&block->compressed_size, + NULL, in, &in_pos, in_size)); + + // Validate Compressed Size. This checks that it isn't zero + // and that the total size of the Block is a valid VLI. + if (lzma_block_unpadded_size(block) == 0) + return LZMA_DATA_ERROR; + } else { + block->compressed_size = LZMA_VLI_UNKNOWN; + } + + // Uncompressed Size + if (in[1] & 0x80) + return_if_error(lzma_vli_decode(&block->uncompressed_size, + NULL, in, &in_pos, in_size)); + else + block->uncompressed_size = LZMA_VLI_UNKNOWN; + + // Filter Flags + const size_t filter_count = (in[1] & 3) + 1; + for (size_t i = 0; i < filter_count; ++i) { + const lzma_ret ret = lzma_filter_flags_decode( + &block->filters[i], allocator, + in, &in_pos, in_size); + if (ret != LZMA_OK) { + free_properties(block, allocator); + return ret; + } + } + + // Padding + while (in_pos < in_size) { + if (in[in_pos++] != 0x00) { + free_properties(block, allocator); + + // Possibly some new field present so use + // LZMA_OPTIONS_ERROR instead of LZMA_DATA_ERROR. + return LZMA_OPTIONS_ERROR; + } + } + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/alone_decoder.h =================================================================== --- contrib/xz/src/liblzma/common/alone_decoder.h (revision 0) +++ contrib/xz/src/liblzma/common/alone_decoder.h (revision 0) @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.h +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_ALONE_DECODER_H +#define LZMA_ALONE_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, uint64_t memlimit); + +#endif Index: contrib/xz/src/liblzma/common/block_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/block_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/block_encoder.c (revision 0) @@ -0,0 +1,212 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_encoder.c +/// \brief Encodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_encoder.h" +#include "filter_encoder.h" +#include "check.h" + + +struct lzma_coder_s { + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Encoding options; we also write Unpadded Size, Compressed Size, + /// and Uncompressed Size back to this structure when the encoding + /// has been finished. + lzma_block *block; + + enum { + SEQ_CODE, + SEQ_PADDING, + SEQ_CHECK, + } sequence; + + /// Compressed Size calculated while encoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while encoding + lzma_vli uncompressed_size; + + /// Position in the Check field + size_t pos; + + /// Check of the uncompressed data + lzma_check_state check; +}; + + +static lzma_ret +block_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Check that our amount of input stays in proper limits. + if (LZMA_VLI_MAX - coder->uncompressed_size < in_size - *in_pos) + return LZMA_DATA_ERROR; + + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + if (COMPRESSED_SIZE_MAX - coder->compressed_size < out_used) + return LZMA_DATA_ERROR; + + coder->compressed_size += out_used; + + // No need to check for overflow because we have already + // checked it at the beginning of this function. + coder->uncompressed_size += in_used; + + lzma_check_update(&coder->check, coder->block->check, + in + in_start, in_used); + + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + assert(*in_pos == in_size); + assert(action == LZMA_FINISH); + + // Copy the values into coder->block. The caller + // may use this information to construct Index. + coder->block->compressed_size = coder->compressed_size; + coder->block->uncompressed_size = coder->uncompressed_size; + + coder->sequence = SEQ_PADDING; + } + + // Fall through + + case SEQ_PADDING: + // Pad Compressed Data to a multiple of four bytes. We can + // use coder->compressed_size for this since we don't need + // it for anything else anymore. + while (coder->compressed_size & 3) { + if (*out_pos >= out_size) + return LZMA_OK; + + out[*out_pos] = 0x00; + ++*out_pos; + ++coder->compressed_size; + } + + if (coder->block->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; + + lzma_check_finish(&coder->check, coder->block->check); + + coder->sequence = SEQ_CHECK; + + // Fall through + + case SEQ_CHECK: { + const size_t check_size = lzma_check_size(coder->block->check); + lzma_bufcpy(coder->check.buffer.u8, &coder->pos, check_size, + out, out_pos, out_size); + if (coder->pos < check_size) + return LZMA_OK; + + memcpy(coder->block->raw_check, coder->check.buffer.u8, + check_size); + return LZMA_STREAM_END; + } + } + + return LZMA_PROG_ERROR; +} + + +static void +block_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +block_encoder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + if (coder->sequence != SEQ_CODE) + return LZMA_PROG_ERROR; + + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters); +} + + +extern lzma_ret +lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_block *block) +{ + lzma_next_coder_init(&lzma_block_encoder_init, next, allocator); + + if (block->version != 0) + return LZMA_OPTIONS_ERROR; + + // If the Check ID is not supported, we cannot calculate the check and + // thus not create a proper Block. + if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (!lzma_check_is_supported(block->check)) + return LZMA_UNSUPPORTED_CHECK; + + // Allocate and initialize *next->coder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &block_encode; + next->end = &block_encoder_end; + next->update = &block_encoder_update; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + next->coder->sequence = SEQ_CODE; + next->coder->block = block; + next->coder->compressed_size = 0; + next->coder->uncompressed_size = 0; + next->coder->pos = 0; + + // Initialize the check + lzma_check_init(&next->coder->check, block->check); + + // Initialize the requested filters. + return lzma_raw_encoder_init(&next->coder->next, allocator, + block->filters); +} + + +extern LZMA_API(lzma_ret) +lzma_block_encoder(lzma_stream *strm, lzma_block *block) +{ + lzma_next_strm_init(lzma_block_encoder_init, strm, block); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/filter_encoder.h =================================================================== --- contrib/xz/src/liblzma/common/filter_encoder.h (revision 0) +++ contrib/xz/src/liblzma/common/filter_encoder.h (revision 0) @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_encoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_ENCODER_H +#define LZMA_FILTER_ENCODER_H + +#include "common.h" + + +// FIXME !!! Public API +extern lzma_vli lzma_chunk_size(const lzma_filter *filters); + + +extern lzma_ret lzma_raw_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters); + +#endif Index: contrib/xz/src/liblzma/common/easy_buffer_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/easy_buffer_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/easy_buffer_encoder.c (revision 0) @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_buffer_encoder.c +/// \brief Easy single-call .xz Stream encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(lzma_ret) +lzma_easy_buffer_encode(uint32_t preset, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return LZMA_OPTIONS_ERROR; + + return lzma_stream_buffer_encode(opt_easy.filters, check, + allocator, in, in_size, out, out_pos, out_size); +} Index: contrib/xz/src/liblzma/common/auto_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/auto_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/auto_decoder.c (revision 0) @@ -0,0 +1,186 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file auto_decoder.c +/// \brief Autodetect between .xz Stream and .lzma (LZMA_Alone) formats +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" +#include "alone_decoder.h" + + +struct lzma_coder_s { + /// Stream decoder or LZMA_Alone decoder + lzma_next_coder next; + + uint64_t memlimit; + uint32_t flags; + + enum { + SEQ_INIT, + SEQ_CODE, + SEQ_FINISH, + } sequence; +}; + + +static lzma_ret +auto_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + switch (coder->sequence) { + case SEQ_INIT: + if (*in_pos >= in_size) + return LZMA_OK; + + // Update the sequence now, because we want to continue from + // SEQ_CODE even if we return some LZMA_*_CHECK. + coder->sequence = SEQ_CODE; + + // Detect the file format. For now this is simple, since if + // it doesn't start with 0xFD (the first magic byte of the + // new format), it has to be LZMA_Alone, or something that + // we don't support at all. + if (in[*in_pos] == 0xFD) { + return_if_error(lzma_stream_decoder_init( + &coder->next, allocator, + coder->memlimit, coder->flags)); + } else { + return_if_error(lzma_alone_decoder_init(&coder->next, + allocator, coder->memlimit)); + + // If the application wants to know about missing + // integrity check or about the check in general, we + // need to handle it here, because LZMA_Alone decoder + // doesn't accept any flags. + if (coder->flags & LZMA_TELL_NO_CHECK) + return LZMA_NO_CHECK; + + if (coder->flags & LZMA_TELL_ANY_CHECK) + return LZMA_GET_CHECK; + } + + // Fall through + + case SEQ_CODE: { + const lzma_ret ret = coder->next.code( + coder->next.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + if (ret != LZMA_STREAM_END + || (coder->flags & LZMA_CONCATENATED) == 0) + return ret; + + coder->sequence = SEQ_FINISH; + } + + // Fall through + + case SEQ_FINISH: + // When LZMA_DECODE_CONCATENATED was used and we were decoding + // LZMA_Alone file, we need to check check that there is no + // trailing garbage and wait for LZMA_FINISH. + if (*in_pos < in_size) + return LZMA_DATA_ERROR; + + return action == LZMA_FINISH ? LZMA_STREAM_END : LZMA_OK; + + default: + assert(0); + return LZMA_PROG_ERROR; + } +} + + +static void +auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_check +auto_decoder_get_check(const lzma_coder *coder) +{ + // It is LZMA_Alone if get_check is NULL. + return coder->next.get_check == NULL ? LZMA_CHECK_NONE + : coder->next.get_check(coder->next.coder); +} + + +static lzma_ret +auto_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + lzma_ret ret; + + if (coder->next.memconfig != NULL) { + ret = coder->next.memconfig(coder->next.coder, + memusage, old_memlimit, new_memlimit); + assert(*old_memlimit == coder->memlimit); + } else { + // No coder is configured yet. Use the base value as + // the current memory usage. + *memusage = LZMA_MEMUSAGE_BASE; + *old_memlimit = coder->memlimit; + ret = LZMA_OK; + } + + if (ret == LZMA_OK && new_memlimit != 0) + coder->memlimit = new_memlimit; + + return ret; +} + + +static lzma_ret +auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags) +{ + lzma_next_coder_init(&auto_decoder_init, next, allocator); + + if (memlimit == 0) + return LZMA_PROG_ERROR; + + if (flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &auto_decode; + next->end = &auto_decoder_end; + next->get_check = &auto_decoder_get_check; + next->memconfig = &auto_decoder_memconfig; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + next->coder->memlimit = memlimit; + next->coder->flags = flags; + next->coder->sequence = SEQ_INIT; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_auto_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) +{ + lzma_next_strm_init(auto_decoder_init, strm, memlimit, flags); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/block_encoder.h =================================================================== --- contrib/xz/src/liblzma/common/block_encoder.h (revision 0) +++ contrib/xz/src/liblzma/common/block_encoder.h (revision 0) @@ -0,0 +1,47 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_encoder.h +/// \brief Encodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_ENCODER_H +#define LZMA_BLOCK_ENCODER_H + +#include "common.h" + + +/// \brief Biggest Compressed Size value that the Block encoder supports +/// +/// The maximum size of a single Block is limited by the maximum size of +/// a Stream, which in theory is 2^63 - 3 bytes (i.e. LZMA_VLI_MAX - 3). +/// While the size is really big and no one should hit it in practice, we +/// take it into account in some places anyway to catch some errors e.g. if +/// application passes insanely big value to some function. +/// +/// We could take into account the headers etc. to determine the exact +/// maximum size of the Compressed Data field, but the complexity would give +/// us nothing useful. Instead, limit the size of Compressed Data so that +/// even with biggest possible Block Header and Check fields the total +/// encoded size of the Block stays as a valid VLI. This doesn't guarantee +/// that the size of the Stream doesn't grow too big, but that problem is +/// taken care outside the Block handling code. +/// +/// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of +/// the Compressed Data field, it will still stay in the proper limit. +/// +/// This constant is in this file because it is needed in both +/// block_encoder.c and block_buffer_encoder.c. +#define COMPRESSED_SIZE_MAX ((LZMA_VLI_MAX - LZMA_BLOCK_HEADER_SIZE_MAX \ + - LZMA_CHECK_SIZE_MAX) & ~LZMA_VLI_C(3)) + + +extern lzma_ret lzma_block_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_block *block); + +#endif Index: contrib/xz/src/liblzma/common/easy_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/easy_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/easy_encoder.c (revision 0) @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_encoder.c +/// \brief Easy .xz Stream encoder initialization +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" +#include "stream_encoder.h" + + +extern LZMA_API(lzma_ret) +lzma_easy_encoder(lzma_stream *strm, uint32_t preset, lzma_check check) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return LZMA_OPTIONS_ERROR; + + return lzma_stream_encoder(strm, opt_easy.filters, check); +} Index: contrib/xz/src/liblzma/common/filter_buffer_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/filter_buffer_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/filter_buffer_decoder.c (revision 0) @@ -0,0 +1,87 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_buffer_decoder.c +/// \brief Single-call raw decoding +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_raw_buffer_decode(const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Validate what isn't validated later in filter_common.c. + if (in == NULL || in_pos == NULL || *in_pos > in_size || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the decoer. + lzma_next_coder next = LZMA_NEXT_CODER_INIT; + return_if_error(lzma_raw_decoder_init(&next, allocator, filters)); + + // Store the positions so that we can restore them if something + // goes wrong. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding and free decoder's memory. + lzma_ret ret = next.code(next.coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size || *out_pos == out_size); + + if (*in_pos != in_size) { + // Since input wasn't consumed completely, + // the output buffer became full and is + // too small. + ret = LZMA_BUF_ERROR; + + } else if (*out_pos != out_size) { + // Since output didn't became full, the input + // has to be truncated. + ret = LZMA_DATA_ERROR; + + } else { + // All the input was consumed and output + // buffer is full. Now we don't immediately + // know the reason for the error. Try + // decoding one more byte. If it succeeds, + // then the output buffer was too small. If + // we cannot get a new output byte, the input + // is truncated. + uint8_t tmp[1]; + size_t tmp_pos = 0; + (void)next.code(next.coder, allocator, + in, in_pos, in_size, + tmp, &tmp_pos, 1, LZMA_FINISH); + + if (tmp_pos == 1) + ret = LZMA_BUF_ERROR; + else + ret = LZMA_DATA_ERROR; + } + } + + // Restore the positions. + *in_pos = in_start; + *out_pos = out_start; + } + + lzma_next_end(&next, allocator); + + return ret; +} Index: contrib/xz/src/liblzma/common/stream_flags_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/stream_flags_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/stream_flags_encoder.c (revision 0) @@ -0,0 +1,86 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_encoder.c +/// \brief Encodes Stream Header and Stream Footer for .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +static bool +stream_flags_encode(const lzma_stream_flags *options, uint8_t *out) +{ + if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) + return true; + + out[0] = 0x00; + out[1] = options->check; + + return false; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out) +{ + assert(sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE + + 4 == LZMA_STREAM_HEADER_SIZE); + + if (options->version != 0) + return LZMA_OPTIONS_ERROR; + + // Magic + memcpy(out, lzma_header_magic, sizeof(lzma_header_magic)); + + // Stream Flags + if (stream_flags_encode(options, out + sizeof(lzma_header_magic))) + return LZMA_PROG_ERROR; + + // CRC32 of the Stream Header + const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + + unaligned_write32le(out + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE, crc); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) +{ + assert(2 * 4 + LZMA_STREAM_FLAGS_SIZE + sizeof(lzma_footer_magic) + == LZMA_STREAM_HEADER_SIZE); + + if (options->version != 0) + return LZMA_OPTIONS_ERROR; + + // Backward Size + if (!is_backward_size_valid(options)) + return LZMA_PROG_ERROR; + + unaligned_write32le(out + 4, options->backward_size / 4 - 1); + + // Stream Flags + if (stream_flags_encode(options, out + 2 * 4)) + return LZMA_PROG_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32( + out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0); + + unaligned_write32le(out, crc); + + // Magic + memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)); + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/index_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/index_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/index_encoder.c (revision 0) @@ -0,0 +1,252 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.c +/// \brief Encodes the Index field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_encoder.h" +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_NEXT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Index being encoded + const lzma_index *index; + + /// Iterator for the Index being encoded + lzma_index_iter iter; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in lzma_attribute((unused)), + size_t *restrict in_pos lzma_attribute((unused)), + size_t in_size lzma_attribute((unused)), + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action lzma_attribute((unused))) +{ + // Position where to start calculating CRC32. The idea is that we + // need to call lzma_crc32() only once per call to index_encode(). + const size_t out_start = *out_pos; + + // Return value to use if we return at the end of this function. + // We use "goto out" to jump out of the while-switch construct + // instead of returning directly, because that way we don't need + // to copypaste the lzma_crc32() call to many places. + lzma_ret ret = LZMA_OK; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + out[*out_pos] = 0x00; + ++*out_pos; + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + const lzma_vli count = lzma_index_block_count(coder->index); + ret = lzma_vli_encode(count, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = SEQ_NEXT; + break; + } + + case SEQ_NEXT: + if (lzma_index_iter_next( + &coder->iter, LZMA_INDEX_ITER_BLOCK)) { + // Get the size of the Index Padding field. + coder->pos = lzma_index_padding_size(coder->index); + assert(coder->pos <= 3); + coder->sequence = SEQ_PADDING; + break; + } + + coder->sequence = SEQ_UNPADDED; + + // Fall through + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + const lzma_vli size = coder->sequence == SEQ_UNPADDED + ? coder->iter.block.unpadded_size + : coder->iter.block.uncompressed_size; + + ret = lzma_vli_encode(size, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT. + ++coder->sequence; + break; + } + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + out[(*out_pos)++] = 0x00; + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + // We don't use the main loop, because we don't want + // coder->crc32 to be touched anymore. + do { + if (*out_pos == out_size) + return LZMA_OK; + + out[*out_pos] = (coder->crc32 >> (coder->pos * 8)) + & 0xFF; + ++*out_pos; + + } while (++coder->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + return ret; +} + + +static void +index_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static void +index_encoder_reset(lzma_coder *coder, const lzma_index *i) +{ + lzma_index_iter_init(&coder->iter, i); + + coder->sequence = SEQ_INDICATOR; + coder->index = i; + coder->pos = 0; + coder->crc32 = 0; + + return; +} + + +extern lzma_ret +lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_index *i) +{ + lzma_next_coder_init(&lzma_index_encoder_init, next, allocator); + + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_encode; + next->end = &index_encoder_end; + } + + index_encoder_reset(next->coder, i); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_encoder(lzma_stream *strm, const lzma_index *i) +{ + lzma_next_strm_init(lzma_index_encoder_init, strm, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Validate the arguments. + if (i == NULL || out == NULL || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Don't try to encode if there's not enough output space. + if (out_size - *out_pos < lzma_index_size(i)) + return LZMA_BUF_ERROR; + + // The Index encoder needs just one small data structure so we can + // allocate it on stack. + lzma_coder coder; + index_encoder_reset(&coder, i); + + // Do the actual encoding. This should never fail, but store + // the original *out_pos just in case. + const size_t out_start = *out_pos; + lzma_ret ret = index_encode(&coder, NULL, NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // We should never get here, but just in case, restore the + // output position and set the error accordingly if something + // goes wrong and debugging isn't enabled. + assert(0); + *out_pos = out_start; + ret = LZMA_PROG_ERROR; + } + + return ret; +} Index: contrib/xz/src/liblzma/common/filter_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/filter_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/filter_decoder.c (revision 0) @@ -0,0 +1,199 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" +#include "filter_common.h" +#include "lzma_decoder.h" +#include "lzma2_decoder.h" +#include "subblock_decoder.h" +#include "subblock_decoder_helper.h" +#include "simple_decoder.h" +#include "delta_decoder.h" + + +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + + /// Decodes Filter Properties. + /// + /// \return - LZMA_OK: Properties decoded successfully. + /// - LZMA_OPTIONS_ERROR: Unsupported properties + /// - LZMA_MEM_ERROR: Memory allocation failed. + lzma_ret (*props_decode)(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +} lzma_filter_decoder; + + +static const lzma_filter_decoder decoders[] = { +#ifdef HAVE_DECODER_LZMA1 + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_decoder_init, + .memusage = &lzma_lzma_decoder_memusage, + .props_decode = &lzma_lzma_props_decode, + }, +#endif +#ifdef HAVE_DECODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .init = &lzma_lzma2_decoder_init, + .memusage = &lzma_lzma2_decoder_memusage, + .props_decode = &lzma_lzma2_props_decode, + }, +#endif +#ifdef HAVE_DECODER_SUBBLOCK + { + .id = LZMA_FILTER_SUBBLOCK, + .init = &lzma_subblock_decoder_init, +// .memusage = &lzma_subblock_decoder_memusage, + .props_decode = NULL, + }, + { + .id = LZMA_FILTER_SUBBLOCK_HELPER, + .init = &lzma_subblock_decoder_helper_init, + .memusage = NULL, + .props_decode = NULL, + }, +#endif +#ifdef HAVE_DECODER_X86 + { + .id = LZMA_FILTER_X86, + .init = &lzma_simple_x86_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_POWERPC + { + .id = LZMA_FILTER_POWERPC, + .init = &lzma_simple_powerpc_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_IA64 + { + .id = LZMA_FILTER_IA64, + .init = &lzma_simple_ia64_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARM + { + .id = LZMA_FILTER_ARM, + .init = &lzma_simple_arm_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARMTHUMB + { + .id = LZMA_FILTER_ARMTHUMB, + .init = &lzma_simple_armthumb_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_SPARC + { + .id = LZMA_FILTER_SPARC, + .init = &lzma_simple_sparc_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_DELTA + { + .id = LZMA_FILTER_DELTA, + .init = &lzma_delta_decoder_init, + .memusage = &lzma_delta_coder_memusage, + .props_decode = &lzma_delta_props_decode, + }, +#endif +}; + + +static const lzma_filter_decoder * +decoder_find(lzma_vli id) +{ + for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i) + if (decoders[i].id == id) + return decoders + i; + + return NULL; +} + + +extern LZMA_API(lzma_bool) +lzma_filter_decoder_is_supported(lzma_vli id) +{ + return decoder_find(id) != NULL; +} + + +extern lzma_ret +lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options) +{ + return lzma_raw_coder_init(next, allocator, + options, (lzma_filter_find)(&decoder_find), false); +} + + +extern LZMA_API(lzma_ret) +lzma_raw_decoder(lzma_stream *strm, const lzma_filter *options) +{ + lzma_next_strm_init(lzma_raw_decoder_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(uint64_t) +lzma_raw_decoder_memusage(const lzma_filter *filters) +{ + return lzma_raw_coder_memusage( + (lzma_filter_find)(&decoder_find), filters); +} + + +extern LZMA_API(lzma_ret) +lzma_properties_decode(lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + // Make it always NULL so that the caller can always safely free() it. + filter->options = NULL; + + const lzma_filter_decoder *const fd = decoder_find(filter->id); + if (fd == NULL) + return LZMA_OPTIONS_ERROR; + + if (fd->props_decode == NULL) + return props_size == 0 ? LZMA_OK : LZMA_OPTIONS_ERROR; + + return fd->props_decode( + &filter->options, allocator, props, props_size); +} Index: contrib/xz/src/liblzma/common/index_hash.c =================================================================== --- contrib/xz/src/liblzma/common/index_hash.c (revision 0) +++ contrib/xz/src/liblzma/common/index_hash.c (revision 0) @@ -0,0 +1,332 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_hash.c +/// \brief Validates Index by using a hash function +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "check.h" + + +typedef struct { + /// Sum of the Block sizes (including Block Padding) + lzma_vli blocks_size; + + /// Sum of the Uncompressed Size fields + lzma_vli uncompressed_size; + + /// Number of Records + lzma_vli count; + + /// Size of the List of Index Records as bytes + lzma_vli index_list_size; + + /// Check calculated from Unpadded Sizes and Uncompressed Sizes. + lzma_check_state check; + +} lzma_index_hash_info; + + +struct lzma_index_hash_s { + enum { + SEQ_BLOCK, + SEQ_COUNT, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Information collected while decoding the actual Blocks. + lzma_index_hash_info blocks; + + /// Information collected from the Index field. + lzma_index_hash_info records; + + /// Number of Records not fully decoded + lzma_vli remaining; + + /// Unpadded Size currently being read from an Index Record. + lzma_vli unpadded_size; + + /// Uncompressed Size currently being read from an Index Record. + lzma_vli uncompressed_size; + + /// Position in variable-length integers when decoding them from + /// the List of Records. + size_t pos; + + /// CRC32 of the Index + uint32_t crc32; +}; + + +extern LZMA_API(lzma_index_hash *) +lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + if (index_hash == NULL) { + index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); + if (index_hash == NULL) + return NULL; + } + + index_hash->sequence = SEQ_BLOCK; + index_hash->blocks.blocks_size = 0; + index_hash->blocks.uncompressed_size = 0; + index_hash->blocks.count = 0; + index_hash->blocks.index_list_size = 0; + index_hash->records.blocks_size = 0; + index_hash->records.uncompressed_size = 0; + index_hash->records.count = 0; + index_hash->records.index_list_size = 0; + index_hash->unpadded_size = 0; + index_hash->uncompressed_size = 0; + index_hash->pos = 0; + index_hash->crc32 = 0; + + // These cannot fail because LZMA_CHECK_BEST is known to be supported. + (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); + (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); + + return index_hash; +} + + +extern LZMA_API(void) +lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + lzma_free(index_hash, allocator); + return; +} + + +extern LZMA_API(lzma_vli) +lzma_index_hash_size(const lzma_index_hash *index_hash) +{ + // Get the size of the Index from ->blocks instead of ->records for + // cases where application wants to know the Index Size before + // decoding the Index. + return index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size); +} + + +/// Updates the sizes and the hash without any validation. +static lzma_ret +hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, + lzma_vli uncompressed_size) +{ + info->blocks_size += vli_ceil4(unpadded_size); + info->uncompressed_size += uncompressed_size; + info->index_list_size += lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); + ++info->count; + + const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; + lzma_check_update(&info->check, LZMA_CHECK_BEST, + (const uint8_t *)(sizes), sizeof(sizes)); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, + lzma_vli uncompressed_size) +{ + // Validate the arguments. + if (index_hash->sequence != SEQ_BLOCK + || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX + || uncompressed_size > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + // Update the hash. + return_if_error(hash_append(&index_hash->blocks, + unpadded_size, uncompressed_size)); + + // Validate the properties of *info are still in allowed limits. + if (index_hash->blocks.blocks_size > LZMA_VLI_MAX + || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX + || index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_BACKWARD_SIZE_MAX + || index_stream_size(index_hash->blocks.blocks_size, + index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + // Catch zero input buffer here, because in contrast to Index encoder + // and decoder functions, applications call this function directly + // instead of via lzma_code(), which does the buffer checking. + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + // NOTE: This function has many similarities to index_encode() and + // index_decode() functions found from index_encoder.c and + // index_decoder.c. See the comments especially in index_encoder.c. + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (index_hash->sequence) { + case SEQ_BLOCK: + // Check the Index Indicator is present. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&index_hash->remaining, + &index_hash->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + // The count must match the count of the Blocks decoded. + if (index_hash->remaining != index_hash->blocks.count) + return LZMA_DATA_ERROR; + + ret = LZMA_OK; + index_hash->pos = 0; + + // Handle the special case when there are no Blocks. + index_hash->sequence = index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + break; + } + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = index_hash->sequence == SEQ_UNPADDED + ? &index_hash->unpadded_size + : &index_hash->uncompressed_size; + + ret = lzma_vli_decode(size, &index_hash->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + index_hash->pos = 0; + + if (index_hash->sequence == SEQ_UNPADDED) { + if (index_hash->unpadded_size < UNPADDED_SIZE_MIN + || index_hash->unpadded_size + > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_UNCOMPRESSED; + } else { + // Update the hash. + return_if_error(hash_append(&index_hash->records, + index_hash->unpadded_size, + index_hash->uncompressed_size)); + + // Verify that we don't go over the known sizes. Note + // that this validation is simpler than the one used + // in lzma_index_hash_append(), because here we know + // that values in index_hash->blocks are already + // validated and we are fine as long as we don't + // exceed them in index_hash->records. + if (index_hash->blocks.blocks_size + < index_hash->records.blocks_size + || index_hash->blocks.uncompressed_size + < index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + < index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Check if this was the last Record. + index_hash->sequence = --index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + } + + break; + } + + case SEQ_PADDING_INIT: + index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( + index_hash->records.count, + index_hash->records.index_list_size)) & 3; + index_hash->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (index_hash->pos > 0) { + --index_hash->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Compare the sizes. + if (index_hash->blocks.blocks_size + != index_hash->records.blocks_size + || index_hash->blocks.uncompressed_size + != index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + != index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Finish the hashes and compare them. + lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); + lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); + if (memcmp(index_hash->blocks.check.buffer.u8, + index_hash->records.check.buffer.u8, + lzma_check_size(LZMA_CHECK_BEST)) != 0) + return LZMA_DATA_ERROR; + + // Finish the CRC32 calculation. + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + index_hash->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((index_hash->crc32 >> (index_hash->pos * 8)) + & 0xFF) != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++index_hash->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + return ret; +} Index: contrib/xz/src/liblzma/common/index_encoder.h =================================================================== --- contrib/xz/src/liblzma/common/index_encoder.h (revision 0) +++ contrib/xz/src/liblzma/common/index_encoder.h (revision 0) @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.h +/// \brief Encodes the Index field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_ENCODER_H +#define LZMA_INDEX_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_index *i); + + +#endif Index: contrib/xz/src/liblzma/common/filter_decoder.h =================================================================== --- contrib/xz/src/liblzma/common/filter_decoder.h (revision 0) +++ contrib/xz/src/liblzma/common/filter_decoder.h (revision 0) @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_DECODER_H +#define LZMA_FILTER_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_raw_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options); + +#endif Index: contrib/xz/src/liblzma/common/block_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/block_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/block_decoder.c (revision 0) @@ -0,0 +1,242 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.c +/// \brief Decodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" +#include "filter_decoder.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_CODE, + SEQ_PADDING, + SEQ_CHECK, + } sequence; + + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Decoding options; we also write Compressed Size and Uncompressed + /// Size back to this structure when the decoding has been finished. + lzma_block *block; + + /// Compressed Size calculated while decoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while decoding + lzma_vli uncompressed_size; + + /// Maximum allowed Compressed Size; this takes into account the + /// size of the Block Header and Check fields when Compressed Size + /// is unknown. + lzma_vli compressed_limit; + + /// Position when reading the Check field + size_t check_pos; + + /// Check of the uncompressed data + lzma_check_state check; +}; + + +static inline bool +update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) +{ + if (limit > LZMA_VLI_MAX) + limit = LZMA_VLI_MAX; + + if (limit < *size || limit - *size < add) + return true; + + *size += add; + + return false; +} + + +static inline bool +is_size_valid(lzma_vli size, lzma_vli reference) +{ + return reference == LZMA_VLI_UNKNOWN || reference == size; +} + + +static lzma_ret +block_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + // NOTE: We compare to compressed_limit here, which prevents + // the total size of the Block growing past LZMA_VLI_MAX. + if (update_size(&coder->compressed_size, in_used, + coder->compressed_limit) + || update_size(&coder->uncompressed_size, + out_used, + coder->block->uncompressed_size)) + return LZMA_DATA_ERROR; + + lzma_check_update(&coder->check, coder->block->check, + out + out_start, out_used); + + if (ret != LZMA_STREAM_END) + return ret; + + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->block->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->block->uncompressed_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->block. The caller + // may use this information to construct Index. + coder->block->compressed_size = coder->compressed_size; + coder->block->uncompressed_size = coder->uncompressed_size; + + coder->sequence = SEQ_PADDING; + } + + // Fall through + + case SEQ_PADDING: + // Compressed Data is padded to a multiple of four bytes. + while (coder->compressed_size & 3) { + if (*in_pos >= in_size) + return LZMA_OK; + + // We use compressed_size here just get the Padding + // right. The actual Compressed Size was stored to + // coder->block already, and won't be modified by + // us anymore. + ++coder->compressed_size; + + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + } + + if (coder->block->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; + + lzma_check_finish(&coder->check, coder->block->check); + coder->sequence = SEQ_CHECK; + + // Fall through + + case SEQ_CHECK: { + const size_t check_size = lzma_check_size(coder->block->check); + lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check, + &coder->check_pos, check_size); + if (coder->check_pos < check_size) + return LZMA_OK; + + // Validate the Check only if we support it. + // coder->check.buffer may be uninitialized + // when the Check ID is not supported. + if (lzma_check_is_supported(coder->block->check) + && memcmp(coder->block->raw_check, + coder->check.buffer.u8, + check_size) != 0) + return LZMA_DATA_ERROR; + + return LZMA_STREAM_END; + } + } + + return LZMA_PROG_ERROR; +} + + +static void +block_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_block *block) +{ + lzma_next_coder_init(&lzma_block_decoder_init, next, allocator); + + // Validate the options. lzma_block_unpadded_size() does that for us + // except for Uncompressed Size and filters. Filters are validated + // by the raw decoder. + if (lzma_block_unpadded_size(block) == 0 + || !lzma_vli_is_valid(block->uncompressed_size)) + return LZMA_PROG_ERROR; + + // Allocate and initialize *next->coder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &block_decode; + next->end = &block_decoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + next->coder->sequence = SEQ_CODE; + next->coder->block = block; + next->coder->compressed_size = 0; + next->coder->uncompressed_size = 0; + + // If Compressed Size is not known, we calculate the maximum allowed + // value so that encoded size of the Block (including Block Padding) + // is still a valid VLI and a multiple of four. + next->coder->compressed_limit + = block->compressed_size == LZMA_VLI_UNKNOWN + ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) + - block->header_size + - lzma_check_size(block->check) + : block->compressed_size; + + // Initialize the check. It's caller's problem if the Check ID is not + // supported, and the Block decoder cannot verify the Check field. + // Caller can test lzma_check_is_supported(block->check). + next->coder->check_pos = 0; + lzma_check_init(&next->coder->check, block->check); + + // Initialize the filter chain. + return lzma_raw_decoder_init(&next->coder->next, allocator, + block->filters); +} + + +extern LZMA_API(lzma_ret) +lzma_block_decoder(lzma_stream *strm, lzma_block *block) +{ + lzma_next_strm_init(lzma_block_decoder_init, strm, block); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/block_buffer_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/block_buffer_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/block_buffer_encoder.c (revision 0) @@ -0,0 +1,299 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_encoder.c +/// \brief Single-call .xz Block encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_encoder.h" +#include "filter_encoder.h" +#include "lzma2_encoder.h" +#include "check.h" + + +/// Estimate the maximum size of the Block Header and Check fields for +/// a Block that uses LZMA2 uncompressed chunks. We could use +/// lzma_block_header_size() but this is simpler. +/// +/// Block Header Size + Block Flags + Compressed Size +/// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check +/// and round up to the next multiple of four to take Header Padding +/// into account. +#define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \ + + LZMA_CHECK_SIZE_MAX + 3) & ~3) + + +static lzma_vli +lzma2_bound(lzma_vli uncompressed_size) +{ + // Prevent integer overflow in overhead calculation. + if (uncompressed_size > COMPRESSED_SIZE_MAX) + return 0; + + // Calculate the exact overhead of the LZMA2 headers: Round + // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX, + // multiply by the size of per-chunk header, and add one byte for + // the end marker. + const lzma_vli overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1) + / LZMA2_CHUNK_MAX) + * LZMA2_HEADER_UNCOMPRESSED + 1; + + // Catch the possible integer overflow. + if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size) + return 0; + + return uncompressed_size + overhead; +} + + +extern LZMA_API(size_t) +lzma_block_buffer_bound(size_t uncompressed_size) +{ + // For now, if the data doesn't compress, we always use uncompressed + // chunks of LZMA2. In future we may use Subblock filter too, but + // but for simplicity we probably will still use the same bound + // calculation even though Subblock filter would have slightly less + // overhead. + lzma_vli lzma2_size = lzma2_bound(uncompressed_size); + if (lzma2_size == 0) + return 0; + + // Take Block Padding into account. + lzma2_size = (lzma2_size + 3) & ~LZMA_VLI_C(3); + +#if SIZE_MAX < LZMA_VLI_MAX + // Catch the possible integer overflow on 32-bit systems. There's no + // overflow on 64-bit systems, because lzma2_bound() already takes + // into account the size of the headers in the Block. + if (SIZE_MAX - HEADERS_BOUND < lzma2_size) + return 0; +#endif + + return HEADERS_BOUND + lzma2_size; +} + + +static lzma_ret +block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // TODO: Figure out if the last filter is LZMA2 or Subblock and use + // that filter to encode the uncompressed chunks. + + // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at + // all, but LZMA2 always requires a dictionary, so use the minimum + // value to minimize memory usage of the decoder. + lzma_options_lzma lzma2 = { + .dict_size = LZMA_DICT_SIZE_MIN, + }; + + lzma_filter filters[2]; + filters[0].id = LZMA_FILTER_LZMA2; + filters[0].options = &lzma2; + filters[1].id = LZMA_VLI_UNKNOWN; + + // Set the above filter options to *block temporarily so that we can + // encode the Block Header. + lzma_filter *filters_orig = block->filters; + block->filters = filters; + + if (lzma_block_header_size(block) != LZMA_OK) { + block->filters = filters_orig; + return LZMA_PROG_ERROR; + } + + // Check that there's enough output space. The caller has already + // set block->compressed_size to what lzma2_bound() has returned, + // so we can reuse that value. We know that compressed_size is a + // known valid VLI and header_size is a small value so their sum + // will never overflow. + assert(block->compressed_size == lzma2_bound(in_size)); + if (out_size - *out_pos + < block->header_size + block->compressed_size) { + block->filters = filters_orig; + return LZMA_BUF_ERROR; + } + + if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) { + block->filters = filters_orig; + return LZMA_PROG_ERROR; + } + + block->filters = filters_orig; + *out_pos += block->header_size; + + // Encode the data using LZMA2 uncompressed chunks. + size_t in_pos = 0; + uint8_t control = 0x01; // Dictionary reset + + while (in_pos < in_size) { + // Control byte: Indicate uncompressed chunk, of which + // the first resets the dictionary. + out[(*out_pos)++] = control; + control = 0x02; // No dictionary reset + + // Size of the uncompressed chunk + const size_t copy_size + = MIN(in_size - in_pos, LZMA2_CHUNK_MAX); + out[(*out_pos)++] = (copy_size - 1) >> 8; + out[(*out_pos)++] = (copy_size - 1) & 0xFF; + + // The actual data + assert(*out_pos + copy_size <= out_size); + memcpy(out + *out_pos, in + in_pos, copy_size); + + in_pos += copy_size; + *out_pos += copy_size; + } + + // End marker + out[(*out_pos)++] = 0x00; + assert(*out_pos <= out_size); + + return LZMA_OK; +} + + +static lzma_ret +block_encode_normal(lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Find out the size of the Block Header. + block->compressed_size = lzma2_bound(in_size); + if (block->compressed_size == 0) + return LZMA_DATA_ERROR; + + block->uncompressed_size = in_size; + return_if_error(lzma_block_header_size(block)); + + // Reserve space for the Block Header and skip it for now. + if (out_size - *out_pos <= block->header_size) + return LZMA_BUF_ERROR; + + const size_t out_start = *out_pos; + *out_pos += block->header_size; + + // Limit out_size so that we stop encoding if the output would grow + // bigger than what uncompressed Block would be. + if (out_size - *out_pos > block->compressed_size) + out_size = *out_pos + block->compressed_size; + + // TODO: In many common cases this could be optimized to use + // significantly less memory. + lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_raw_encoder_init( + &raw_encoder, allocator, block->filters); + + if (ret == LZMA_OK) { + size_t in_pos = 0; + ret = raw_encoder.code(raw_encoder.coder, allocator, + in, &in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + } + + // NOTE: This needs to be run even if lzma_raw_encoder_init() failed. + lzma_next_end(&raw_encoder, allocator); + + if (ret == LZMA_STREAM_END) { + // Compression was successful. Write the Block Header. + block->compressed_size + = *out_pos - (out_start + block->header_size); + ret = lzma_block_header_encode(block, out + out_start); + if (ret != LZMA_OK) + ret = LZMA_PROG_ERROR; + + } else if (ret == LZMA_OK) { + // Output buffer became full. + ret = LZMA_BUF_ERROR; + } + + // Reset *out_pos if something went wrong. + if (ret != LZMA_OK) + *out_pos = out_start; + + return ret; +} + + +extern LZMA_API(lzma_ret) +lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Sanity checks + if (block == NULL || block->filters == NULL + || (in == NULL && in_size != 0) || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Check the version field. + if (block->version != 0) + return LZMA_OPTIONS_ERROR; + + // Size of a Block has to be a multiple of four, so limit the size + // here already. This way we don't need to check it again when adding + // Block Padding. + out_size -= (out_size - *out_pos) & 3; + + // Get the size of the Check field. + const size_t check_size = lzma_check_size(block->check); + if (check_size == UINT32_MAX) + return LZMA_PROG_ERROR; + + // Reserve space for the Check field. + if (out_size - *out_pos <= check_size) + return LZMA_BUF_ERROR; + + out_size -= check_size; + + // Do the actual compression. + const lzma_ret ret = block_encode_normal(block, allocator, + in, in_size, out, out_pos, out_size); + if (ret != LZMA_OK) { + // If the error was something else than output buffer + // becoming full, return the error now. + if (ret != LZMA_BUF_ERROR) + return ret; + + // The data was uncompressible (at least with the options + // given to us) or the output buffer was too small. Use the + // uncompressed chunks of LZMA2 to wrap the data into a valid + // Block. If we haven't been given enough output space, even + // this may fail. + return_if_error(block_encode_uncompressed(block, in, in_size, + out, out_pos, out_size)); + } + + assert(*out_pos <= out_size); + + // Block Padding. No buffer overflow here, because we already adjusted + // out_size so that (out_size - out_start) is a multiple of four. + // Thus, if the buffer is full, the loop body can never run. + for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) { + assert(*out_pos < out_size); + out[(*out_pos)++] = 0x00; + } + + // If there's no Check field, we are done now. + if (check_size > 0) { + // Calculate the integrity check. We reserved space for + // the Check field earlier so we don't need to check for + // available output space here. + lzma_check_state check; + lzma_check_init(&check, block->check); + lzma_check_update(&check, block->check, in, in_size); + lzma_check_finish(&check, block->check); + + memcpy(block->raw_check, check.buffer.u8, check_size); + memcpy(out + *out_pos, check.buffer.u8, check_size); + *out_pos += check_size; + } + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/easy_encoder_memusage.c =================================================================== --- contrib/xz/src/liblzma/common/easy_encoder_memusage.c (revision 0) +++ contrib/xz/src/liblzma/common/easy_encoder_memusage.c (revision 0) @@ -0,0 +1,24 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_encoder_memusage.c +/// \brief Easy .xz Stream encoder memory usage calculation +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(uint64_t) +lzma_easy_encoder_memusage(uint32_t preset) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return UINT32_MAX; + + return lzma_raw_encoder_memusage(opt_easy.filters); +} Index: contrib/xz/src/liblzma/common/stream_buffer_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/stream_buffer_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/stream_buffer_encoder.c (revision 0) @@ -0,0 +1,131 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_buffer_encoder.c +/// \brief Single-call .xz Stream encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" + + +/// Maximum size of Index that has exactly one Record. +/// Index Indicator + Number of Records + Record + CRC32 rounded up to +/// the next multiple of four. +#define INDEX_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 4 + 3) & ~3) + +/// Stream Header, Stream Footer, and Index +#define HEADERS_BOUND (2 * LZMA_STREAM_HEADER_SIZE + INDEX_BOUND) + + +extern LZMA_API(size_t) +lzma_stream_buffer_bound(size_t uncompressed_size) +{ + // Get the maximum possible size of a Block. + const size_t block_bound = lzma_block_buffer_bound(uncompressed_size); + if (block_bound == 0) + return 0; + + // Catch the possible integer overflow and also prevent the size of + // the Stream exceeding LZMA_VLI_MAX (theoretically possible on + // 64-bit systems). + if (MIN(SIZE_MAX, LZMA_VLI_MAX) - block_bound < HEADERS_BOUND) + return 0; + + return block_bound + HEADERS_BOUND; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos_ptr, size_t out_size) +{ + // Sanity checks + if (filters == NULL || (unsigned int)(check) > LZMA_CHECK_ID_MAX + || (in == NULL && in_size != 0) || out == NULL + || out_pos_ptr == NULL || *out_pos_ptr > out_size) + return LZMA_PROG_ERROR; + + // Note for the paranoids: Index encoder prevents the Stream from + // getting too big and still being accepted with LZMA_OK, and Block + // encoder catches if the input is too big. So we don't need to + // separately check if the buffers are too big. + + // Use a local copy. We update *out_pos_ptr only if everything + // succeeds. + size_t out_pos = *out_pos_ptr; + + // Check that there's enough space for both Stream Header and + // Stream Footer. + if (out_size - out_pos <= 2 * LZMA_STREAM_HEADER_SIZE) + return LZMA_BUF_ERROR; + + // Reserve space for Stream Footer so we don't need to check for + // available space again before encoding Stream Footer. + out_size -= LZMA_STREAM_HEADER_SIZE; + + // Encode the Stream Header. + lzma_stream_flags stream_flags = { + .version = 0, + .check = check, + }; + + if (lzma_stream_header_encode(&stream_flags, out + out_pos) + != LZMA_OK) + return LZMA_PROG_ERROR; + + out_pos += LZMA_STREAM_HEADER_SIZE; + + // Block + lzma_block block = { + .version = 0, + .check = check, + .filters = filters, + }; + + return_if_error(lzma_block_buffer_encode(&block, allocator, + in, in_size, out, &out_pos, out_size)); + + // Index + { + // Create an Index with one Record. + lzma_index *i = lzma_index_init(allocator); + if (i == NULL) + return LZMA_MEM_ERROR; + + lzma_ret ret = lzma_index_append(i, allocator, + lzma_block_unpadded_size(&block), + block.uncompressed_size); + + // If adding the Record was successful, encode the Index + // and get its size which will be stored into Stream Footer. + if (ret == LZMA_OK) { + ret = lzma_index_buffer_encode( + i, out, &out_pos, out_size); + + stream_flags.backward_size = lzma_index_size(i); + } + + lzma_index_end(i, allocator); + + if (ret != LZMA_OK) + return ret; + } + + // Stream Footer. We have already reserved space for this. + if (lzma_stream_footer_encode(&stream_flags, out + out_pos) + != LZMA_OK) + return LZMA_PROG_ERROR; + + out_pos += LZMA_STREAM_HEADER_SIZE; + + // Everything went fine, make the new output position available + // to the application. + *out_pos_ptr = out_pos; + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/easy_preset.c =================================================================== --- contrib/xz/src/liblzma/common/easy_preset.c (revision 0) +++ contrib/xz/src/liblzma/common/easy_preset.c (revision 0) @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_preset.c +/// \brief Preset handling for easy encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern bool +lzma_easy_preset(lzma_options_easy *opt_easy, uint32_t preset) +{ + if (lzma_lzma_preset(&opt_easy->opt_lzma, preset)) + return true; + + opt_easy->filters[0].id = LZMA_FILTER_LZMA2; + opt_easy->filters[0].options = &opt_easy->opt_lzma; + opt_easy->filters[1].id = LZMA_VLI_UNKNOWN; + + return false; +} Index: contrib/xz/src/liblzma/common/vli_size.c =================================================================== --- contrib/xz/src/liblzma/common/vli_size.c (revision 0) +++ contrib/xz/src/liblzma/common/vli_size.c (revision 0) @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_size.c +/// \brief Calculates the encoded size of a variable-length integer +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(uint32_t) +lzma_vli_size(lzma_vli vli) +{ + if (vli > LZMA_VLI_MAX) + return 0; + + uint32_t i = 0; + do { + vli >>= 7; + ++i; + } while (vli != 0); + + assert(i <= LZMA_VLI_BYTES_MAX); + return i; +} Index: contrib/xz/src/liblzma/common/block_decoder.h =================================================================== --- contrib/xz/src/liblzma/common/block_decoder.h (revision 0) +++ contrib/xz/src/liblzma/common/block_decoder.h (revision 0) @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.h +/// \brief Decodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_DECODER_H +#define LZMA_BLOCK_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_block *block); + +#endif Index: contrib/xz/src/liblzma/common/block_util.c =================================================================== --- contrib/xz/src/liblzma/common/block_util.c (revision 0) +++ contrib/xz/src/liblzma/common/block_util.c (revision 0) @@ -0,0 +1,90 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header.c +/// \brief Utility functions to handle lzma_block +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" + + +extern LZMA_API(lzma_ret) +lzma_block_compressed_size(lzma_block *block, lzma_vli total_size) +{ + // Validate everything but Uncompressed Size and filters. + if (lzma_block_unpadded_size(block) == 0) + return LZMA_PROG_ERROR; + + const uint32_t container_size = block->header_size + + lzma_check_size(block->check); + + // Validate that Compressed Size will be greater than zero. + if (container_size <= total_size) + return LZMA_DATA_ERROR; + + // Calculate what Compressed Size is supposed to be. + // If Compressed Size was present in Block Header, + // compare that the new value matches it. + const lzma_vli compressed_size = total_size - container_size; + if (block->compressed_size != LZMA_VLI_UNKNOWN + && block->compressed_size != compressed_size) + return LZMA_DATA_ERROR; + + block->compressed_size = compressed_size; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_vli) +lzma_block_unpadded_size(const lzma_block *block) +{ + // Validate the values that we are interested in i.e. all but + // Uncompressed Size and the filters. + // + // NOTE: This function is used for validation too, so it is + // essential that these checks are always done even if + // Compressed Size is unknown. + if (block == NULL || block->version != 0 + || block->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || block->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (block->header_size & 3) + || !lzma_vli_is_valid(block->compressed_size) + || block->compressed_size == 0 + || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return 0; + + // If Compressed Size is unknown, return that we cannot know + // size of the Block either. + if (block->compressed_size == LZMA_VLI_UNKNOWN) + return LZMA_VLI_UNKNOWN; + + // Calculate Unpadded Size and validate it. + const lzma_vli unpadded_size = block->compressed_size + + block->header_size + + lzma_check_size(block->check); + + assert(unpadded_size >= UNPADDED_SIZE_MIN); + if (unpadded_size > UNPADDED_SIZE_MAX) + return 0; + + return unpadded_size; +} + + +extern LZMA_API(lzma_vli) +lzma_block_total_size(const lzma_block *block) +{ + lzma_vli unpadded_size = lzma_block_unpadded_size(block); + + if (unpadded_size != LZMA_VLI_UNKNOWN) + unpadded_size = vli_ceil4(unpadded_size); + + return unpadded_size; +} Index: contrib/xz/src/liblzma/common/easy_preset.h =================================================================== --- contrib/xz/src/liblzma/common/easy_preset.h (revision 0) +++ contrib/xz/src/liblzma/common/easy_preset.h (revision 0) @@ -0,0 +1,32 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_preset.h +/// \brief Preset handling for easy encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +typedef struct { + /// We need to keep the filters array available in case + /// LZMA_FULL_FLUSH is used. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Options for LZMA2 + lzma_options_lzma opt_lzma; + + // Options for more filters can be added later, so this struct + // is not ready to be put into the public API. + +} lzma_options_easy; + + +/// Set *easy to the settings given by the preset. Returns true on error, +/// false on success. +extern bool lzma_easy_preset(lzma_options_easy *easy, uint32_t preset); Index: contrib/xz/src/liblzma/common/common.c =================================================================== --- contrib/xz/src/liblzma/common/common.c (revision 0) +++ contrib/xz/src/liblzma/common/common.c (revision 0) @@ -0,0 +1,374 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.h +/// \brief Common functions needed in many places in liblzma +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +///////////// +// Version // +///////////// + +extern LZMA_API(uint32_t) +lzma_version_number(void) +{ + return LZMA_VERSION; +} + + +extern LZMA_API(const char *) +lzma_version_string(void) +{ + return LZMA_VERSION_STRING; +} + + +/////////////////////// +// Memory allocation // +/////////////////////// + +extern void * lzma_attribute((malloc)) +lzma_alloc(size_t size, lzma_allocator *allocator) +{ + // Some malloc() variants return NULL if called with size == 0. + if (size == 0) + size = 1; + + void *ptr; + + if (allocator != NULL && allocator->alloc != NULL) + ptr = allocator->alloc(allocator->opaque, 1, size); + else + ptr = malloc(size); + + return ptr; +} + + +extern void +lzma_free(void *ptr, lzma_allocator *allocator) +{ + if (allocator != NULL && allocator->free != NULL) + allocator->free(allocator->opaque, ptr); + else + free(ptr); + + return; +} + + +////////// +// Misc // +////////// + +extern size_t +lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + const size_t in_avail = in_size - *in_pos; + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(in_avail, out_avail); + + memcpy(out + *out_pos, in + *in_pos, copy_size); + + *in_pos += copy_size; + *out_pos += copy_size; + + return copy_size; +} + + +extern lzma_ret +lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + lzma_next_coder_init(filters[0].init, next, allocator); + next->id = filters[0].id; + return filters[0].init == NULL + ? LZMA_OK : filters[0].init(next, allocator, filters); +} + + +extern lzma_ret +lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *reversed_filters) +{ + // Check that the application isn't trying to change the Filter ID. + // End of filters is indicated with LZMA_VLI_UNKNOWN in both + // reversed_filters[0].id and next->id. + if (reversed_filters[0].id != next->id) + return LZMA_PROG_ERROR; + + if (reversed_filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_OK; + + assert(next->update != NULL); + return next->update(next->coder, allocator, NULL, reversed_filters); +} + + +extern void +lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator) +{ + if (next->init != (uintptr_t)(NULL)) { + // To avoid tiny end functions that simply call + // lzma_free(coder, allocator), we allow leaving next->end + // NULL and call lzma_free() here. + if (next->end != NULL) + next->end(next->coder, allocator); + else + lzma_free(next->coder, allocator); + + // Reset the variables so the we don't accidentally think + // that it is an already initialized coder. + *next = LZMA_NEXT_CODER_INIT; + } + + return; +} + + +////////////////////////////////////// +// External to internal API wrapper // +////////////////////////////////////// + +extern lzma_ret +lzma_strm_init(lzma_stream *strm) +{ + if (strm == NULL) + return LZMA_PROG_ERROR; + + if (strm->internal == NULL) { + strm->internal = lzma_alloc(sizeof(lzma_internal), + strm->allocator); + if (strm->internal == NULL) + return LZMA_MEM_ERROR; + + strm->internal->next = LZMA_NEXT_CODER_INIT; + } + + strm->internal->supported_actions[LZMA_RUN] = false; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = false; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = false; + strm->internal->supported_actions[LZMA_FINISH] = false; + strm->internal->sequence = ISEQ_RUN; + strm->internal->allow_buf_error = false; + + strm->total_in = 0; + strm->total_out = 0; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_code(lzma_stream *strm, lzma_action action) +{ + // Sanity checks + if ((strm->next_in == NULL && strm->avail_in != 0) + || (strm->next_out == NULL && strm->avail_out != 0) + || strm->internal == NULL + || strm->internal->next.code == NULL + || (unsigned int)(action) > LZMA_FINISH + || !strm->internal->supported_actions[action]) + return LZMA_PROG_ERROR; + + switch (strm->internal->sequence) { + case ISEQ_RUN: + switch (action) { + case LZMA_RUN: + break; + + case LZMA_SYNC_FLUSH: + strm->internal->sequence = ISEQ_SYNC_FLUSH; + break; + + case LZMA_FULL_FLUSH: + strm->internal->sequence = ISEQ_FULL_FLUSH; + break; + + case LZMA_FINISH: + strm->internal->sequence = ISEQ_FINISH; + break; + } + + break; + + case ISEQ_SYNC_FLUSH: + // The same action must be used until we return + // LZMA_STREAM_END, and the amount of input must not change. + if (action != LZMA_SYNC_FLUSH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FULL_FLUSH: + if (action != LZMA_FULL_FLUSH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FINISH: + if (action != LZMA_FINISH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_END: + return LZMA_STREAM_END; + + case ISEQ_ERROR: + default: + return LZMA_PROG_ERROR; + } + + size_t in_pos = 0; + size_t out_pos = 0; + lzma_ret ret = strm->internal->next.code( + strm->internal->next.coder, strm->allocator, + strm->next_in, &in_pos, strm->avail_in, + strm->next_out, &out_pos, strm->avail_out, action); + + strm->next_in += in_pos; + strm->avail_in -= in_pos; + strm->total_in += in_pos; + + strm->next_out += out_pos; + strm->avail_out -= out_pos; + strm->total_out += out_pos; + + strm->internal->avail_in = strm->avail_in; + + switch (ret) { + case LZMA_OK: + // Don't return LZMA_BUF_ERROR when it happens the first time. + // This is to avoid returning LZMA_BUF_ERROR when avail_out + // was zero but still there was no more data left to written + // to next_out. + if (out_pos == 0 && in_pos == 0) { + if (strm->internal->allow_buf_error) + ret = LZMA_BUF_ERROR; + else + strm->internal->allow_buf_error = true; + } else { + strm->internal->allow_buf_error = false; + } + break; + + case LZMA_STREAM_END: + if (strm->internal->sequence == ISEQ_SYNC_FLUSH + || strm->internal->sequence == ISEQ_FULL_FLUSH) + strm->internal->sequence = ISEQ_RUN; + else + strm->internal->sequence = ISEQ_END; + + // Fall through + + case LZMA_NO_CHECK: + case LZMA_UNSUPPORTED_CHECK: + case LZMA_GET_CHECK: + case LZMA_MEMLIMIT_ERROR: + // Something else than LZMA_OK, but not a fatal error, + // that is, coding may be continued (except if ISEQ_END). + strm->internal->allow_buf_error = false; + break; + + default: + // All the other errors are fatal; coding cannot be continued. + assert(ret != LZMA_BUF_ERROR); + strm->internal->sequence = ISEQ_ERROR; + break; + } + + return ret; +} + + +extern LZMA_API(void) +lzma_end(lzma_stream *strm) +{ + if (strm != NULL && strm->internal != NULL) { + lzma_next_end(&strm->internal->next, strm->allocator); + lzma_free(strm->internal, strm->allocator); + strm->internal = NULL; + } + + return; +} + + +extern LZMA_API(lzma_check) +lzma_get_check(const lzma_stream *strm) +{ + // Return LZMA_CHECK_NONE if we cannot know the check type. + // It's a bug in the application if this happens. + if (strm->internal->next.get_check == NULL) + return LZMA_CHECK_NONE; + + return strm->internal->next.get_check(strm->internal->next.coder); +} + + +extern LZMA_API(uint64_t) +lzma_memusage(const lzma_stream *strm) +{ + uint64_t memusage; + uint64_t old_memlimit; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL + || strm->internal->next.memconfig( + strm->internal->next.coder, + &memusage, &old_memlimit, 0) != LZMA_OK) + return 0; + + return memusage; +} + + +extern LZMA_API(uint64_t) +lzma_memlimit_get(const lzma_stream *strm) +{ + uint64_t old_memlimit; + uint64_t memusage; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL + || strm->internal->next.memconfig( + strm->internal->next.coder, + &memusage, &old_memlimit, 0) != LZMA_OK) + return 0; + + return old_memlimit; +} + + +extern LZMA_API(lzma_ret) +lzma_memlimit_set(lzma_stream *strm, uint64_t new_memlimit) +{ + // Dummy variables to simplify memconfig functions + uint64_t old_memlimit; + uint64_t memusage; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL) + return LZMA_PROG_ERROR; + + if (new_memlimit != 0 && new_memlimit < LZMA_MEMUSAGE_BASE) + return LZMA_MEMLIMIT_ERROR; + + return strm->internal->next.memconfig(strm->internal->next.coder, + &memusage, &old_memlimit, new_memlimit); +} Index: contrib/xz/src/liblzma/common/vli_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/vli_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/vli_encoder.c (revision 0) @@ -0,0 +1,69 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_encoder.c +/// \brief Encodes variable-length integers +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_ret) +lzma_vli_encode(lzma_vli vli, size_t *vli_pos, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) { + vli_pos = &vli_pos_internal; + + // In single-call mode, we expect that the caller has + // reserved enough output space. + if (*out_pos >= out_size) + return LZMA_PROG_ERROR; + } else { + // This never happens when we are called by liblzma, but + // may happen if called directly from an application. + if (*out_pos >= out_size) + return LZMA_BUF_ERROR; + } + + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || vli > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + // Shift vli so that the next bits to encode are the lowest. In + // single-call mode this never changes vli since *vli_pos is zero. + vli >>= *vli_pos * 7; + + // Write the non-last bytes in a loop. + while (vli >= 0x80) { + // We don't need *vli_pos during this function call anymore, + // but update it here so that it is ready if we need to + // return before the whole integer has been decoded. + ++*vli_pos; + assert(*vli_pos < LZMA_VLI_BYTES_MAX); + + // Write the next byte. + out[*out_pos] = (uint8_t)(vli) | 0x80; + vli >>= 7; + + if (++*out_pos == out_size) + return vli_pos == &vli_pos_internal + ? LZMA_PROG_ERROR : LZMA_OK; + } + + // Write the last byte. + out[*out_pos] = (uint8_t)(vli); + ++*out_pos; + ++*vli_pos; + + return vli_pos == &vli_pos_internal ? LZMA_OK : LZMA_STREAM_END; + +} Index: contrib/xz/src/liblzma/common/stream_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/stream_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/stream_encoder.c (revision 0) @@ -0,0 +1,331 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.c +/// \brief Encodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_encoder.h" +#include "block_encoder.h" +#include "index_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_ENCODE, + SEQ_INDEX_ENCODE, + SEQ_STREAM_FOOTER, + } sequence; + + /// True if Block encoder has been initialized by + /// lzma_stream_encoder_init() or stream_encoder_update() + /// and thus doesn't need to be initialized in stream_encode(). + bool block_encoder_is_initialized; + + /// Block + lzma_next_coder block_encoder; + + /// Options for the Block encoder + lzma_block block_options; + + /// The filter chain currently in use + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Index encoder. This is separate from Block encoder, because this + /// doesn't take much memory, and when encoding multiple Streams + /// with the same encoding options we avoid reallocating memory. + lzma_next_coder index_encoder; + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Read position in buffer[] + size_t buffer_pos; + + /// Total number of bytes in buffer[] + size_t buffer_size; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +block_encoder_init(lzma_coder *coder, lzma_allocator *allocator) +{ + // Prepare the Block options. Even though Block encoder doesn't need + // compressed_size, uncompressed_size, and header_size to be + // initialized, it is a good idea to do it here, because this way + // we catch if someone gave us Filter ID that cannot be used in + // Blocks/Streams. + coder->block_options.compressed_size = LZMA_VLI_UNKNOWN; + coder->block_options.uncompressed_size = LZMA_VLI_UNKNOWN; + + return_if_error(lzma_block_header_size(&coder->block_options)); + + // Initialize the actual Block encoder. + return lzma_block_encoder_init(&coder->block_encoder, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + case SEQ_BLOCK_HEADER: + case SEQ_STREAM_FOOTER: + lzma_bufcpy(coder->buffer, &coder->buffer_pos, + coder->buffer_size, out, out_pos, out_size); + if (coder->buffer_pos < coder->buffer_size) + return LZMA_OK; + + if (coder->sequence == SEQ_STREAM_FOOTER) + return LZMA_STREAM_END; + + coder->buffer_pos = 0; + ++coder->sequence; + break; + + case SEQ_BLOCK_INIT: { + if (*in_pos == in_size) { + // If we are requested to flush or finish the current + // Block, return LZMA_STREAM_END immediately since + // there's nothing to do. + if (action != LZMA_FINISH) + return action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + + // The application had used LZMA_FULL_FLUSH to finish + // the previous Block, but now wants to finish without + // encoding new data, or it is simply creating an + // empty Stream with no Blocks. + // + // Initialize the Index encoder, and continue to + // actually encoding the Index. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX_ENCODE; + break; + } + + // Initialize the Block encoder unless it was already + // initialized by lzma_stream_encoder_init() or + // stream_encoder_update(). + if (!coder->block_encoder_is_initialized) + return_if_error(block_encoder_init(coder, allocator)); + + // Make it false so that we don't skip the initialization + // with the next Block. + coder->block_encoder_is_initialized = false; + + // Encode the Block Header. This shouldn't fail since we have + // already initialized the Block encoder. + if (lzma_block_header_encode(&coder->block_options, + coder->buffer) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = coder->block_options.header_size; + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_BLOCK_ENCODE: { + static const lzma_action convert[4] = { + LZMA_RUN, + LZMA_SYNC_FLUSH, + LZMA_FINISH, + LZMA_FINISH, + }; + + const lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, convert[action]); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + // Add a new Index Record. + const lzma_vli unpadded_size = lzma_block_unpadded_size( + &coder->block_options); + assert(unpadded_size != 0); + return_if_error(lzma_index_append(coder->index, allocator, + unpadded_size, + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_INIT; + break; + } + + case SEQ_INDEX_ENCODE: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + const lzma_stream_flags stream_flags = { + .version = 0, + .backward_size = lzma_index_size(coder->index), + .check = coder->block_options.check, + }; + + if (lzma_stream_footer_encode(&stream_flags, coder->buffer) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + coder->sequence = SEQ_STREAM_FOOTER; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->block_encoder, allocator); + lzma_next_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + + for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) + lzma_free(coder->filters[i].options, allocator); + + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters, + const lzma_filter *reversed_filters) +{ + if (coder->sequence <= SEQ_BLOCK_INIT) { + // There is no incomplete Block waiting to be finished, + // thus we can change the whole filter chain. Start by + // trying to initialize the Block encoder with the new + // chain. This way we detect if the chain is valid. + coder->block_encoder_is_initialized = false; + coder->block_options.filters = (lzma_filter *)(filters); + const lzma_ret ret = block_encoder_init(coder, allocator); + coder->block_options.filters = coder->filters; + if (ret != LZMA_OK) + return ret; + + coder->block_encoder_is_initialized = true; + + } else if (coder->sequence <= SEQ_BLOCK_ENCODE) { + // We are in the middle of a Block. Try to update only + // the filter-specific options. + return_if_error(coder->block_encoder.update( + coder->block_encoder.coder, allocator, + filters, reversed_filters)); + } else { + // Trying to update the filter chain when we are already + // encoding Index or Stream Footer. + return LZMA_PROG_ERROR; + } + + // Free the copy of the old chain and make a copy of the new chain. + for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) + lzma_free(coder->filters[i].options, allocator); + + return lzma_filters_copy(filters, coder->filters, allocator); +} + + +extern lzma_ret +lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters, lzma_check check) +{ + lzma_next_coder_init(&lzma_stream_encoder_init, next, allocator); + + if (filters == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_encode; + next->end = &stream_encoder_end; + next->update = &stream_encoder_update; + + next->coder->block_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index = NULL; + } + + // Basic initializations + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.version = 0; + next->coder->block_options.check = check; + next->coder->filters[0].id = LZMA_VLI_UNKNOWN; + + // Initialize the Index + lzma_index_end(next->coder->index, allocator); + next->coder->index = lzma_index_init(allocator); + if (next->coder->index == NULL) + return LZMA_MEM_ERROR; + + // Encode the Stream Header + lzma_stream_flags stream_flags = { + .version = 0, + .check = check, + }; + return_if_error(lzma_stream_header_encode( + &stream_flags, next->coder->buffer)); + + next->coder->buffer_pos = 0; + next->coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + + // Initialize the Block encoder. This way we detect unsupported + // filter chains when initializing the Stream encoder instead of + // giving an error after Stream Header has already written out. + return stream_encoder_update( + next->coder, allocator, filters, NULL); +} + + +extern LZMA_API(lzma_ret) +lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) +{ + lzma_next_strm_init(lzma_stream_encoder_init, strm, filters, check); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/index_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/index_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/index_decoder.c (revision 0) @@ -0,0 +1,343 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_MEMUSAGE, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Memory usage limit + uint64_t memlimit; + + /// Target Index + lzma_index *index; + + /// Pointer give by the application, which is set after + /// successful decoding. + lzma_index **index_ptr; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Unpadded Size field + lzma_vli unpadded_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + coder->pos = 0; + coder->sequence = SEQ_MEMUSAGE; + + // Fall through + + case SEQ_MEMUSAGE: + if (lzma_index_memusage(1, coder->count) > coder->memlimit) { + ret = LZMA_MEMLIMIT_ERROR; + goto out; + } + + // Tell the Index handling code how many Records this + // Index has to allow it to allocate memory more efficiently. + lzma_index_prealloc(coder->index, coder->count); + + ret = LZMA_OK; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + break; + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_UNPADDED + ? &coder->unpadded_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_UNPADDED) { + // Validate that encoded Unpadded Size isn't too small + // or too big. + if (coder->unpadded_size < UNPADDED_SIZE_MIN + || coder->unpadded_size + > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->unpadded_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_UNPADDED; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++coder->pos < 4); + + // Decoding was successful, now we can let the application + // see the decoded Index. + *coder->index_ptr = coder->index; + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + return ret; +} + + +static void +index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = lzma_index_memusage(1, coder->count); + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < *memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +static lzma_ret +index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit) +{ + // Remember the pointer given by the application. We will set it + // to point to the decoded Index only if decoding is successful. + // Before that, keep it NULL so that applications can always safely + // pass it to lzma_index_end() no matter did decoding succeed or not. + coder->index_ptr = i; + *i = NULL; + + // We always allocate a new lzma_index. + coder->index = lzma_index_init(allocator); + if (coder->index == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + coder->sequence = SEQ_INDICATOR; + coder->memlimit = memlimit; + coder->count = 0; // Needs to be initialized due to _memconfig(). + coder->pos = 0; + coder->crc32 = 0; + + return LZMA_OK; +} + + +static lzma_ret +index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit) +{ + lzma_next_coder_init(&index_decoder_init, next, allocator); + + if (i == NULL || memlimit == 0) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_decode; + next->end = &index_decoder_end; + next->memconfig = &index_decoder_memconfig; + next->coder->index = NULL; + } else { + lzma_index_end(next->coder->index, allocator); + } + + return index_decoder_reset(next->coder, allocator, i, memlimit); +} + + +extern LZMA_API(lzma_ret) +lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) +{ + lzma_next_strm_init(index_decoder_init, strm, i, memlimit); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_buffer_decode( + lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // Sanity checks + if (i == NULL || memlimit == NULL + || in == NULL || in_pos == NULL || *in_pos > in_size) + return LZMA_PROG_ERROR; + + // Initialize the decoder. + lzma_coder coder; + return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); + + // Store the input start position so that we can restore it in case + // of an error. + const size_t in_start = *in_pos; + + // Do the actual decoding. + lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, + NULL, NULL, 0, LZMA_RUN); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // Something went wrong, free the Index structure and restore + // the input position. + lzma_index_end(coder.index, allocator); + *in_pos = in_start; + + if (ret == LZMA_OK) { + // The input is truncated or otherwise corrupt. + // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR + // like lzma_vli_decode() does in single-call mode. + ret = LZMA_DATA_ERROR; + + } else if (ret == LZMA_MEMLIMIT_ERROR) { + // Tell the caller how much memory would have + // been needed. + *memlimit = lzma_index_memusage(1, coder.count); + } + } + + return ret; +} Index: contrib/xz/src/liblzma/common/stream_flags_decoder.c =================================================================== --- contrib/xz/src/liblzma/common/stream_flags_decoder.c (revision 0) +++ contrib/xz/src/liblzma/common/stream_flags_decoder.c (revision 0) @@ -0,0 +1,82 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_decoder.c +/// \brief Decodes Stream Header and Stream Footer from .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +static bool +stream_flags_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Reserved bits must be unset. + if (in[0] != 0x00 || (in[1] & 0xF0)) + return true; + + options->version = 0; + options->check = in[1] & 0x0F; + + return false; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Magic + if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // Verify the CRC32 so we can distinguish between corrupt + // and unsupported files. + const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != unaligned_read32le(in + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(lzma_header_magic))) + return LZMA_OPTIONS_ERROR; + + // Set Backward Size to indicate unknown value. That way + // lzma_stream_flags_compare() can be used to compare Stream Header + // and Stream Footer while keeping it useful also for comparing + // two Stream Footers. + options->backward_size = LZMA_VLI_UNKNOWN; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Magic + if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), + sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != unaligned_read32le(in)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(uint32_t) * 2)) + return LZMA_OPTIONS_ERROR; + + // Backward Size + options->backward_size = unaligned_read32le(in + sizeof(uint32_t)); + options->backward_size = (options->backward_size + 1) * 4; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/common.h =================================================================== --- contrib/xz/src/liblzma/common/common.h (revision 0) +++ contrib/xz/src/liblzma/common/common.h (revision 0) @@ -0,0 +1,290 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.h +/// \brief Definitions common to the whole liblzma library +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_COMMON_H +#define LZMA_COMMON_H + +#include "sysdefs.h" +#include "mythread.h" +#include "tuklib_integer.h" + +#if defined(_WIN32) || defined(__CYGWIN__) +# ifdef DLL_EXPORT +# define LZMA_API_EXPORT __declspec(dllexport) +# else +# define LZMA_API_EXPORT +# endif +// Don't use ifdef or defined() below. +#elif HAVE_VISIBILITY +# define LZMA_API_EXPORT __attribute__((__visibility__("default"))) +#else +# define LZMA_API_EXPORT +#endif + +#define LZMA_API(type) LZMA_API_EXPORT type LZMA_API_CALL + +#include "lzma.h" + +// These allow helping the compiler in some often-executed branches, whose +// result is almost always the same. +#ifdef __GNUC__ +# define likely(expr) __builtin_expect(expr, true) +# define unlikely(expr) __builtin_expect(expr, false) +#else +# define likely(expr) (expr) +# define unlikely(expr) (expr) +#endif + + +/// Size of temporary buffers needed in some filters +#define LZMA_BUFFER_SIZE 4096 + + +/// Starting value for memory usage estimates. Instead of calculating size +/// of _every_ structure and taking into account malloc() overhead etc., we +/// add a base size to all memory usage estimates. It's not very accurate +/// but should be easily good enough. +#define LZMA_MEMUSAGE_BASE (UINT64_C(1) << 15) + +/// Start of internal Filter ID space. These IDs must never be used +/// in Streams. +#define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62) + + +/// Internal helper filter used by Subblock decoder. It is mapped to an +/// otherwise invalid Filter ID, which is impossible to get from any input +/// file (even if malicious file). +#define LZMA_FILTER_SUBBLOCK_HELPER LZMA_VLI_C(0x7000000000000001) + + +/// Supported flags that can be passed to lzma_stream_decoder() +/// or lzma_auto_decoder(). +#define LZMA_SUPPORTED_FLAGS \ + ( LZMA_TELL_NO_CHECK \ + | LZMA_TELL_UNSUPPORTED_CHECK \ + | LZMA_TELL_ANY_CHECK \ + | LZMA_CONCATENATED ) + + +/// Type of encoder/decoder specific data; the actual structure is defined +/// differently in different coders. +typedef struct lzma_coder_s lzma_coder; + +typedef struct lzma_next_coder_s lzma_next_coder; + +typedef struct lzma_filter_info_s lzma_filter_info; + + +/// Type of a function used to initialize a filter encoder or decoder +typedef lzma_ret (*lzma_init_function)( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +/// Type of a function to do some kind of coding work (filters, Stream, +/// Block encoders/decoders etc.). Some special coders use don't use both +/// input and output buffers, but for simplicity they still use this same +/// function prototype. +typedef lzma_ret (*lzma_code_function)( + lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// Type of a function to free the memory allocated for the coder +typedef void (*lzma_end_function)( + lzma_coder *coder, lzma_allocator *allocator); + + +/// Raw coder validates and converts an array of lzma_filter structures to +/// an array of lzma_filter_info structures. This array is used with +/// lzma_next_filter_init to initialize the filter chain. +struct lzma_filter_info_s { + /// Filter ID. This is used only by the encoder + /// with lzma_filters_update(). + lzma_vli id; + + /// Pointer to function used to initialize the filter. + /// This is NULL to indicate end of array. + lzma_init_function init; + + /// Pointer to filter's options structure + void *options; +}; + + +/// Hold data and function pointers of the next filter in the chain. +struct lzma_next_coder_s { + /// Pointer to coder-specific data + lzma_coder *coder; + + /// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't + /// point to a filter coder. + lzma_vli id; + + /// "Pointer" to init function. This is never called here. + /// We need only to detect if we are initializing a coder + /// that was allocated earlier. See lzma_next_coder_init and + /// lzma_next_strm_init macros in this file. + uintptr_t init; + + /// Pointer to function to do the actual coding + lzma_code_function code; + + /// Pointer to function to free lzma_next_coder.coder. This can + /// be NULL; in that case, lzma_free is called to free + /// lzma_next_coder.coder. + lzma_end_function end; + + /// Pointer to function to return the type of the integrity check. + /// Most coders won't support this. + lzma_check (*get_check)(const lzma_coder *coder); + + /// Pointer to function to get and/or change the memory usage limit. + /// If new_memlimit == 0, the limit is not changed. + lzma_ret (*memconfig)(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit); + + /// Update the filter-specific options or the whole filter chain + /// in the encoder. + lzma_ret (*update)(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters, + const lzma_filter *reversed_filters); +}; + + +/// Macro to initialize lzma_next_coder structure +#define LZMA_NEXT_CODER_INIT \ + (lzma_next_coder){ \ + .coder = NULL, \ + .init = (uintptr_t)(NULL), \ + .id = LZMA_VLI_UNKNOWN, \ + .code = NULL, \ + .end = NULL, \ + .get_check = NULL, \ + .memconfig = NULL, \ + .update = NULL, \ + } + + +/// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to +/// this is stored in lzma_stream. +struct lzma_internal_s { + /// The actual coder that should do something useful + lzma_next_coder next; + + /// Track the state of the coder. This is used to validate arguments + /// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH + /// is used on every call to lzma_code until next.code has returned + /// LZMA_STREAM_END. + enum { + ISEQ_RUN, + ISEQ_SYNC_FLUSH, + ISEQ_FULL_FLUSH, + ISEQ_FINISH, + ISEQ_END, + ISEQ_ERROR, + } sequence; + + /// A copy of lzma_stream avail_in. This is used to verify that the + /// amount of input doesn't change once e.g. LZMA_FINISH has been + /// used. + size_t avail_in; + + /// Indicates which lzma_action values are allowed by next.code. + bool supported_actions[4]; + + /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was + /// made (no input consumed and no output produced by next.code). + bool allow_buf_error; +}; + + +/// Allocates memory +extern void *lzma_alloc(size_t size, lzma_allocator *allocator) + lzma_attribute((malloc)); + +/// Frees memory +extern void lzma_free(void *ptr, lzma_allocator *allocator); + + +/// Allocates strm->internal if it is NULL, and initializes *strm and +/// strm->internal. This function is only called via lzma_next_strm_init macro. +extern lzma_ret lzma_strm_init(lzma_stream *strm); + +/// Initializes the next filter in the chain, if any. This takes care of +/// freeing the memory of previously initialized filter if it is different +/// than the filter being initialized now. This way the actual filter +/// initialization functions don't need to use lzma_next_coder_init macro. +extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +/// Update the next filter in the chain, if any. This checks that +/// the application is not trying to change the Filter IDs. +extern lzma_ret lzma_next_filter_update( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *reversed_filters); + +/// Frees the memory allocated for next->coder either using next->end or, +/// if next->end is NULL, using lzma_free. +extern void lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator); + + +/// Copy as much data as possible from in[] to out[] and update *in_pos +/// and *out_pos accordingly. Returns the number of bytes copied. +extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + +/// \brief Return if expression doesn't evaluate to LZMA_OK +/// +/// There are several situations where we want to return immediately +/// with the value of expr if it isn't LZMA_OK. This macro shortens +/// the code a little. +#define return_if_error(expr) \ +do { \ + const lzma_ret ret_ = (expr); \ + if (ret_ != LZMA_OK) \ + return ret_; \ +} while (0) + + +/// If next isn't already initialized, free the previous coder. Then mark +/// that next is _possibly_ initialized for the coder using this macro. +/// "Possibly" means that if e.g. allocation of next->coder fails, the +/// structure isn't actually initialized for this coder, but leaving +/// next->init to func is still OK. +#define lzma_next_coder_init(func, next, allocator) \ +do { \ + if ((uintptr_t)(func) != (next)->init) \ + lzma_next_end(next, allocator); \ + (next)->init = (uintptr_t)(func); \ +} while (0) + + +/// Initializes lzma_strm and calls func() to initialize strm->internal->next. +/// (The function being called will use lzma_next_coder_init()). If +/// initialization fails, memory that wasn't freed by func() is freed +/// along strm->internal. +#define lzma_next_strm_init(func, strm, ...) \ +do { \ + return_if_error(lzma_strm_init(strm)); \ + const lzma_ret ret_ = func(&(strm)->internal->next, \ + (strm)->allocator, __VA_ARGS__); \ + if (ret_ != LZMA_OK) { \ + lzma_end(strm); \ + return ret_; \ + } \ +} while (0) + +#endif Index: contrib/xz/src/liblzma/common/alone_encoder.c =================================================================== --- contrib/xz/src/liblzma/common/alone_encoder.c (revision 0) +++ contrib/xz/src/liblzma/common/alone_encoder.c (revision 0) @@ -0,0 +1,157 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.c +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "lzma_encoder.h" + + +#define ALONE_HEADER_SIZE (1 + 4 + 8) + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + SEQ_HEADER, + SEQ_CODE, + } sequence; + + size_t header_pos; + uint8_t header[ALONE_HEADER_SIZE]; +}; + + +static lzma_ret +alone_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_HEADER: + lzma_bufcpy(coder->header, &coder->header_pos, + ALONE_HEADER_SIZE, + out, out_pos, out_size); + if (coder->header_pos < ALONE_HEADER_SIZE) + return LZMA_OK; + + coder->sequence = SEQ_CODE; + break; + + case SEQ_CODE: + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +// At least for now, this is not used by any internal function. +static lzma_ret +alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_lzma *options) +{ + lzma_next_coder_init(&alone_encoder_init, next, allocator); + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &alone_encode; + next->end = &alone_encoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + next->coder->sequence = SEQ_HEADER; + next->coder->header_pos = 0; + + // Encode the header: + // - Properties (1 byte) + if (lzma_lzma_lclppb_encode(options, next->coder->header)) + return LZMA_OPTIONS_ERROR; + + // - Dictionary size (4 bytes) + if (options->dict_size < LZMA_DICT_SIZE_MIN) + return LZMA_OPTIONS_ERROR; + + // Round up to to the next 2^n or 2^n + 2^(n - 1) depending on which + // one is the next unless it is UINT32_MAX. While the header would + // allow any 32-bit integer, we do this to keep the decoder of liblzma + // accepting the resulting files. + uint32_t d = options->dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + if (d != UINT32_MAX) + ++d; + + unaligned_write32le(next->coder->header + 1, d); + + // - Uncompressed size (always unknown and using EOPM) + memset(next->coder->header + 1 + 4, 0xFF, 8); + + // Initialize the LZMA encoder. + const lzma_filter_info filters[2] = { + { + .init = &lzma_lzma_encoder_init, + .options = (void *)(options), + }, { + .init = NULL, + } + }; + + return lzma_next_filter_init(&next->coder->next, allocator, filters); +} + + +/* +extern lzma_ret +lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_alone *options) +{ + lzma_next_coder_init(&alone_encoder_init, next, allocator, options); +} +*/ + + +extern LZMA_API(lzma_ret) +lzma_alone_encoder(lzma_stream *strm, const lzma_options_lzma *options) +{ + lzma_next_strm_init(alone_encoder_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/common/filter_common.c =================================================================== --- contrib/xz/src/liblzma/common/filter_common.c (revision 0) +++ contrib/xz/src/liblzma/common/filter_common.c (revision 0) @@ -0,0 +1,346 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_common.c +/// \brief Filter-specific stuff common for both encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_common.h" + + +static const struct { + /// Filter ID + lzma_vli id; + + /// Size of the filter-specific options structure + size_t options_size; + + /// True if it is OK to use this filter as non-last filter in + /// the chain. + bool non_last_ok; + + /// True if it is OK to use this filter as the last filter in + /// the chain. + bool last_ok; + + /// True if the filter may change the size of the data (that is, the + /// amount of encoded output can be different than the amount of + /// uncompressed input). + bool changes_size; + +} features[] = { +#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) + { + .id = LZMA_FILTER_LZMA1, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, +#endif +#ifdef HAVE_DECODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, +#endif +#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK) + { + .id = LZMA_FILTER_SUBBLOCK, + .options_size = sizeof(lzma_options_subblock), + .non_last_ok = true, + .last_ok = true, + .changes_size = true, + }, +#endif +#ifdef HAVE_DECODER_X86 + { + .id = LZMA_FILTER_X86, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) + { + .id = LZMA_FILTER_POWERPC, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#ifdef HAVE_DECODER_IA64 + { + .id = LZMA_FILTER_IA64, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) + { + .id = LZMA_FILTER_ARM, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) + { + .id = LZMA_FILTER_ARMTHUMB, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) + { + .id = LZMA_FILTER_SPARC, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + { + .id = LZMA_FILTER_DELTA, + .options_size = sizeof(lzma_options_delta), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif + { + .id = LZMA_VLI_UNKNOWN + } +}; + + +extern LZMA_API(lzma_ret) +lzma_filters_copy(const lzma_filter *src, lzma_filter *dest, + lzma_allocator *allocator) +{ + if (src == NULL || dest == NULL) + return LZMA_PROG_ERROR; + + lzma_ret ret; + size_t i; + for (i = 0; src[i].id != LZMA_VLI_UNKNOWN; ++i) { + // There must be a maximum of four filters plus + // the array terminator. + if (i == LZMA_FILTERS_MAX) { + ret = LZMA_OPTIONS_ERROR; + goto error; + } + + dest[i].id = src[i].id; + + if (src[i].options == NULL) { + dest[i].options = NULL; + } else { + // See if the filter is supported only when the + // options is not NULL. This might be convenient + // sometimes if the app is actually copying only + // a partial filter chain with a place holder ID. + // + // When options is not NULL, the Filter ID must be + // supported by us, because otherwise we don't know + // how big the options are. + size_t j; + for (j = 0; src[i].id != features[j].id; ++j) { + if (features[j].id == LZMA_VLI_UNKNOWN) { + ret = LZMA_OPTIONS_ERROR; + goto error; + } + } + + // Allocate and copy the options. + dest[i].options = lzma_alloc(features[j].options_size, + allocator); + if (dest[i].options == NULL) { + ret = LZMA_MEM_ERROR; + goto error; + } + + memcpy(dest[i].options, src[i].options, + features[j].options_size); + } + } + + // Terminate the filter array. + assert(i <= LZMA_FILTERS_MAX + 1); + dest[i].id = LZMA_VLI_UNKNOWN; + dest[i].options = NULL; + + return LZMA_OK; + +error: + // Free the options which we have already allocated. + while (i-- > 0) { + lzma_free(dest[i].options, allocator); + dest[i].options = NULL; + } + + return ret; +} + + +static lzma_ret +validate_chain(const lzma_filter *filters, size_t *count) +{ + // There must be at least one filter. + if (filters == NULL || filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + // Number of non-last filters that may change the size of the data + // significantly (that is, more than 1-2 % or so). + size_t changes_size_count = 0; + + // True if it is OK to add a new filter after the current filter. + bool non_last_ok = true; + + // True if the last filter in the given chain is actually usable as + // the last filter. Only filters that support embedding End of Payload + // Marker can be used as the last filter in the chain. + bool last_ok = false; + + size_t i = 0; + do { + size_t j; + for (j = 0; filters[i].id != features[j].id; ++j) + if (features[j].id == LZMA_VLI_UNKNOWN) + return LZMA_OPTIONS_ERROR; + + // If the previous filter in the chain cannot be a non-last + // filter, the chain is invalid. + if (!non_last_ok) + return LZMA_OPTIONS_ERROR; + + non_last_ok = features[j].non_last_ok; + last_ok = features[j].last_ok; + changes_size_count += features[j].changes_size; + + } while (filters[++i].id != LZMA_VLI_UNKNOWN); + + // There must be 1-4 filters. The last filter must be usable as + // the last filter in the chain. A maximum of three filters are + // allowed to change the size of the data. + if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3) + return LZMA_OPTIONS_ERROR; + + *count = i; + return LZMA_OK; +} + + +extern lzma_ret +lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options, + lzma_filter_find coder_find, bool is_encoder) +{ + // Do some basic validation and get the number of filters. + size_t count; + return_if_error(validate_chain(options, &count)); + + // Set the filter functions and copy the options pointer. + lzma_filter_info filters[LZMA_FILTERS_MAX + 1]; + if (is_encoder) { + for (size_t i = 0; i < count; ++i) { + // The order of the filters is reversed in the + // encoder. It allows more efficient handling + // of the uncompressed data. + const size_t j = count - i - 1; + + const lzma_filter_coder *const fc + = coder_find(options[i].id); + if (fc == NULL || fc->init == NULL) + return LZMA_OPTIONS_ERROR; + + filters[j].id = options[i].id; + filters[j].init = fc->init; + filters[j].options = options[i].options; + } + } else { + for (size_t i = 0; i < count; ++i) { + const lzma_filter_coder *const fc + = coder_find(options[i].id); + if (fc == NULL || fc->init == NULL) + return LZMA_OPTIONS_ERROR; + + filters[i].id = options[i].id; + filters[i].init = fc->init; + filters[i].options = options[i].options; + } + } + + // Terminate the array. + filters[count].id = LZMA_VLI_UNKNOWN; + filters[count].init = NULL; + + // Initialize the filters. + const lzma_ret ret = lzma_next_filter_init(next, allocator, filters); + if (ret != LZMA_OK) + lzma_next_end(next, allocator); + + return ret; +} + + +extern uint64_t +lzma_raw_coder_memusage(lzma_filter_find coder_find, + const lzma_filter *filters) +{ + // The chain has to have at least one filter. + { + size_t tmp; + if (validate_chain(filters, &tmp) != LZMA_OK) + return UINT64_MAX; + } + + uint64_t total = 0; + size_t i = 0; + + do { + const lzma_filter_coder *const fc + = coder_find(filters[i].id); + if (fc == NULL) + return UINT64_MAX; // Unsupported Filter ID + + if (fc->memusage == NULL) { + // This filter doesn't have a function to calculate + // the memory usage and validate the options. Such + // filters need only little memory, so we use 1 KiB + // as a good estimate. They also accept all possible + // options, so there's no need to worry about lack + // of validation. + total += 1024; + } else { + // Call the filter-specific memory usage calculation + // function. + const uint64_t usage + = fc->memusage(filters[i].options); + if (usage == UINT64_MAX) + return UINT64_MAX; // Invalid options + + total += usage; + } + } while (filters[++i].id != LZMA_VLI_UNKNOWN); + + // Add some fixed amount of extra. It's to compensate memory usage + // of Stream, Block etc. coders, malloc() overhead, stack etc. + return total + LZMA_MEMUSAGE_BASE; +} Index: contrib/xz/src/liblzma/simple/simple_decoder.c =================================================================== --- contrib/xz/src/liblzma/simple/simple_decoder.c (revision 0) +++ contrib/xz/src/liblzma/simple/simple_decoder.c (revision 0) @@ -0,0 +1,40 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_decoder.c +/// \brief Properties decoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_decoder.h" + + +extern lzma_ret +lzma_simple_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size == 0) + return LZMA_OK; + + if (props_size != 4) + return LZMA_OPTIONS_ERROR; + + lzma_options_bcj *opt = lzma_alloc( + sizeof(lzma_options_bcj), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->start_offset = unaligned_read32le(props); + + // Don't leave an options structure allocated if start_offset is zero. + if (opt->start_offset == 0) + lzma_free(opt, allocator); + else + *options = opt; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/simple/arm.c =================================================================== --- contrib/xz/src/liblzma/simple/arm.c (revision 0) +++ contrib/xz/src/liblzma/simple/arm.c (revision 0) @@ -0,0 +1,69 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file arm.c +/// \brief Filter for ARM binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +arm_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + if (buffer[i + 3] == 0xEB) { + uint32_t src = (buffer[i + 2] << 16) + | (buffer[i + 1] << 8) + | (buffer[i + 0]); + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 8 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 8); + + dest >>= 2; + buffer[i + 2] = (dest >> 16); + buffer[i + 1] = (dest >> 8); + buffer[i + 0] = dest; + } + } + + return i; +} + + +static lzma_ret +arm_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &arm_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_arm_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_arm_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, false); +} Index: contrib/xz/src/liblzma/simple/powerpc.c =================================================================== --- contrib/xz/src/liblzma/simple/powerpc.c (revision 0) +++ contrib/xz/src/liblzma/simple/powerpc.c (revision 0) @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file powerpc.c +/// \brief Filter for PowerPC (big endian) binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +powerpc_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + // PowerPC branch 6(48) 24(Offset) 1(Abs) 1(Link) + if ((buffer[i] >> 2) == 0x12 + && ((buffer[i + 3] & 3) == 1)) { + + const uint32_t src = ((buffer[i + 0] & 3) << 24) + | (buffer[i + 1] << 16) + | (buffer[i + 2] << 8) + | (buffer[i + 3] & (~3)); + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + buffer[i + 0] = 0x48 | ((dest >> 24) & 0x03); + buffer[i + 1] = (dest >> 16); + buffer[i + 2] = (dest >> 8); + buffer[i + 3] &= 0x03; + buffer[i + 3] |= dest; + } + } + + return i; +} + + +static lzma_ret +powerpc_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &powerpc_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, false); +} Index: contrib/xz/src/liblzma/simple/armthumb.c =================================================================== --- contrib/xz/src/liblzma/simple/armthumb.c (revision 0) +++ contrib/xz/src/liblzma/simple/armthumb.c (revision 0) @@ -0,0 +1,74 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file armthumb.c +/// \brief Filter for ARM-Thumb binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +armthumb_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 2) { + if ((buffer[i + 1] & 0xF8) == 0xF0 + && (buffer[i + 3] & 0xF8) == 0xF8) { + uint32_t src = ((buffer[i + 1] & 0x7) << 19) + | (buffer[i + 0] << 11) + | ((buffer[i + 3] & 0x7) << 8) + | (buffer[i + 2]); + + src <<= 1; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 4 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 4); + + dest >>= 1; + buffer[i + 1] = 0xF0 | ((dest >> 19) & 0x7); + buffer[i + 0] = (dest >> 11); + buffer[i + 3] = 0xF8 | ((dest >> 8) & 0x7); + buffer[i + 2] = (dest); + i += 2; + } + } + + return i; +} + + +static lzma_ret +armthumb_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &armthumb_code, 0, 4, 2, is_encoder); +} + + +extern lzma_ret +lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, false); +} Index: contrib/xz/src/liblzma/simple/simple_decoder.h =================================================================== --- contrib/xz/src/liblzma/simple/simple_decoder.h (revision 0) +++ contrib/xz/src/liblzma/simple/simple_decoder.h (revision 0) @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_decoder.h +/// \brief Properties decoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_DECODER_H +#define LZMA_SIMPLE_DECODER_H + +#include "simple_coder.h" + +extern lzma_ret lzma_simple_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif Index: contrib/xz/src/liblzma/simple/ia64.c =================================================================== --- contrib/xz/src/liblzma/simple/ia64.c (revision 0) +++ contrib/xz/src/liblzma/simple/ia64.c (revision 0) @@ -0,0 +1,110 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file ia64.c +/// \brief Filter for IA64 (Itanium) binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +ia64_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const uint32_t BRANCH_TABLE[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + size_t i; + for (i = 0; i + 16 <= size; i += 16) { + const uint32_t instr_template = buffer[i] & 0x1F; + const uint32_t mask = BRANCH_TABLE[instr_template]; + uint32_t bit_pos = 5; + + for (size_t slot = 0; slot < 3; ++slot, bit_pos += 41) { + if (((mask >> slot) & 1) == 0) + continue; + + const size_t byte_pos = (bit_pos >> 3); + const uint32_t bit_res = bit_pos & 0x7; + uint64_t instruction = 0; + + for (size_t j = 0; j < 6; ++j) + instruction += (uint64_t)( + buffer[i + j + byte_pos]) + << (8 * j); + + uint64_t inst_norm = instruction >> bit_res; + + if (((inst_norm >> 37) & 0xF) == 0x5 + && ((inst_norm >> 9) & 0x7) == 0 + /* && (inst_norm & 0x3F)== 0 */ + ) { + uint32_t src = (uint32_t)( + (inst_norm >> 13) & 0xFFFFF); + src |= ((inst_norm >> 36) & 1) << 20; + + src <<= 4; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 4; + + inst_norm &= ~((uint64_t)(0x8FFFFF) << 13); + inst_norm |= (uint64_t)(dest & 0xFFFFF) << 13; + inst_norm |= (uint64_t)(dest & 0x100000) + << (36 - 20); + + instruction &= (1 << bit_res) - 1; + instruction |= (inst_norm << bit_res); + + for (size_t j = 0; j < 6; j++) + buffer[i + j + byte_pos] = (uint8_t)( + instruction + >> (8 * j)); + } + } + } + + return i; +} + + +static lzma_ret +ia64_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &ia64_code, 0, 16, 16, is_encoder); +} + + +extern lzma_ret +lzma_simple_ia64_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_ia64_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, false); +} Index: contrib/xz/src/liblzma/simple/x86.c =================================================================== --- contrib/xz/src/liblzma/simple/x86.c (revision 0) +++ contrib/xz/src/liblzma/simple/x86.c (revision 0) @@ -0,0 +1,154 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file x86.c +/// \brief Filter for x86 binaries (BCJ filter) +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) + + +struct lzma_simple_s { + uint32_t prev_mask; + uint32_t prev_pos; +}; + + +static size_t +x86_code(lzma_simple *simple, uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const bool MASK_TO_ALLOWED_STATUS[8] + = { true, true, true, false, true, false, false, false }; + + static const uint32_t MASK_TO_BIT_NUMBER[8] + = { 0, 1, 2, 2, 3, 3, 3, 3 }; + + uint32_t prev_mask = simple->prev_mask; + uint32_t prev_pos = simple->prev_pos; + + if (size < 5) + return 0; + + if (now_pos - prev_pos > 5) + prev_pos = now_pos - 5; + + const size_t limit = size - 5; + size_t buffer_pos = 0; + + while (buffer_pos <= limit) { + uint8_t b = buffer[buffer_pos]; + if (b != 0xE8 && b != 0xE9) { + ++buffer_pos; + continue; + } + + const uint32_t offset = now_pos + (uint32_t)(buffer_pos) + - prev_pos; + prev_pos = now_pos + (uint32_t)(buffer_pos); + + if (offset > 5) { + prev_mask = 0; + } else { + for (uint32_t i = 0; i < offset; ++i) { + prev_mask &= 0x77; + prev_mask <<= 1; + } + } + + b = buffer[buffer_pos + 4]; + + if (Test86MSByte(b) + && MASK_TO_ALLOWED_STATUS[(prev_mask >> 1) & 0x7] + && (prev_mask >> 1) < 0x10) { + + uint32_t src = ((uint32_t)(b) << 24) + | ((uint32_t)(buffer[buffer_pos + 3]) << 16) + | ((uint32_t)(buffer[buffer_pos + 2]) << 8) + | (buffer[buffer_pos + 1]); + + uint32_t dest; + while (true) { + if (is_encoder) + dest = src + (now_pos + (uint32_t)( + buffer_pos) + 5); + else + dest = src - (now_pos + (uint32_t)( + buffer_pos) + 5); + + if (prev_mask == 0) + break; + + const uint32_t i = MASK_TO_BIT_NUMBER[ + prev_mask >> 1]; + + b = (uint8_t)(dest >> (24 - i * 8)); + + if (!Test86MSByte(b)) + break; + + src = dest ^ ((1 << (32 - i * 8)) - 1); + } + + buffer[buffer_pos + 4] + = (uint8_t)(~(((dest >> 24) & 1) - 1)); + buffer[buffer_pos + 3] = (uint8_t)(dest >> 16); + buffer[buffer_pos + 2] = (uint8_t)(dest >> 8); + buffer[buffer_pos + 1] = (uint8_t)(dest); + buffer_pos += 5; + prev_mask = 0; + + } else { + ++buffer_pos; + prev_mask |= 1; + if (Test86MSByte(b)) + prev_mask |= 0x10; + } + } + + simple->prev_mask = prev_mask; + simple->prev_pos = prev_pos; + + return buffer_pos; +} + + +static lzma_ret +x86_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters, + &x86_code, sizeof(lzma_simple), 5, 1, is_encoder); + + if (ret == LZMA_OK) { + next->coder->simple->prev_mask = 0; + next->coder->simple->prev_pos = (uint32_t)(-5); + } + + return ret; +} + + +extern lzma_ret +lzma_simple_x86_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_x86_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, false); +} Index: contrib/xz/src/liblzma/simple/simple_coder.c =================================================================== --- contrib/xz/src/liblzma/simple/simple_coder.c (revision 0) +++ contrib/xz/src/liblzma/simple/simple_coder.c (revision 0) @@ -0,0 +1,280 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.c +/// \brief Wrapper for simple filters +/// +/// Simple filters don't change the size of the data i.e. number of bytes +/// in equals the number of bytes out. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +/// Copied or encodes/decodes more data to out[]. +static lzma_ret +copy_or_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(!coder->end_was_reached); + + if (coder->next.code == NULL) { + lzma_bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Check if end of stream was reached. + if (coder->is_encoder && action == LZMA_FINISH + && *in_pos == in_size) + coder->end_was_reached = true; + + } else { + // Call the next coder in the chain to provide us some data. + // We don't care about uncompressed_size here, because + // the next filter in the chain will do it for us (since + // we don't change the size of the data). + const lzma_ret ret = coder->next.code( + coder->next.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + + if (ret == LZMA_STREAM_END) { + assert(!coder->is_encoder + || action == LZMA_FINISH); + coder->end_was_reached = true; + + } else if (ret != LZMA_OK) { + return ret; + } + } + + return LZMA_OK; +} + + +static size_t +call_filter(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t filtered = coder->filter(coder->simple, + coder->now_pos, coder->is_encoder, + buffer, size); + coder->now_pos += filtered; + return filtered; +} + + +static lzma_ret +simple_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // TODO: Add partial support for LZMA_SYNC_FLUSH. We can support it + // in cases when the filter is able to filter everything. With most + // simple filters it can be done at offset that is a multiple of 2, + // 4, or 16. With x86 filter, it needs good luck, and thus cannot + // be made to work predictably. + if (action == LZMA_SYNC_FLUSH) + return LZMA_OPTIONS_ERROR; + + // Flush already filtered data from coder->buffer[] to out[]. + if (coder->pos < coder->filtered) { + lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + + // If we couldn't flush all the filtered data, return to + // application immediately. + if (coder->pos < coder->filtered) + return LZMA_OK; + + if (coder->end_was_reached) { + assert(coder->filtered == coder->size); + return LZMA_STREAM_END; + } + } + + // If we get here, there is no filtered data left in the buffer. + coder->filtered = 0; + + assert(!coder->end_was_reached); + + // If there is more output space left than there is unfiltered data + // in coder->buffer[], flush coder->buffer[] to out[], and copy/code + // more data to out[] hopefully filling it completely. Then filter + // the data in out[]. This step is where most of the data gets + // filtered if the buffer sizes used by the application are reasonable. + const size_t out_avail = out_size - *out_pos; + const size_t buf_avail = coder->size - coder->pos; + if (out_avail > buf_avail) { + // Store the old position so that we know from which byte + // to start filtering. + const size_t out_start = *out_pos; + + // Flush data from coder->buffer[] to out[], but don't reset + // coder->pos and coder->size yet. This way the coder can be + // restarted if the next filter in the chain returns e.g. + // LZMA_MEM_ERROR. + memcpy(out + *out_pos, coder->buffer + coder->pos, buf_avail); + *out_pos += buf_avail; + + // Copy/Encode/Decode more data to out[]. + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + // Filter out[]. + const size_t size = *out_pos - out_start; + const size_t filtered = call_filter( + coder, out + out_start, size); + + const size_t unfiltered = size - filtered; + assert(unfiltered <= coder->allocated / 2); + + // Now we can update coder->pos and coder->size, because + // the next coder in the chain (if any) was successful. + coder->pos = 0; + coder->size = unfiltered; + + if (coder->end_was_reached) { + // The last byte has been copied to out[] already. + // They are left as is. + coder->size = 0; + + } else if (unfiltered > 0) { + // There is unfiltered data left in out[]. Copy it to + // coder->buffer[] and rewind *out_pos appropriately. + *out_pos -= unfiltered; + memcpy(coder->buffer, out + *out_pos, unfiltered); + } + } else if (coder->pos > 0) { + memmove(coder->buffer, coder->buffer + coder->pos, buf_avail); + coder->size -= coder->pos; + coder->pos = 0; + } + + assert(coder->pos == 0); + + // If coder->buffer[] isn't empty, try to fill it by copying/decoding + // more data. Then filter coder->buffer[] and copy the successfully + // filtered data to out[]. It is probable, that some filtered and + // unfiltered data will be left to coder->buffer[]. + if (coder->size > 0) { + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + coder->buffer, &coder->size, + coder->allocated, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + coder->filtered = call_filter( + coder, coder->buffer, coder->size); + + // Everything is considered to be filtered if coder->buffer[] + // contains the last bytes of the data. + if (coder->end_was_reached) + coder->filtered = coder->size; + + // Flush as much as possible. + lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + } + + // Check if we got everything done. + if (coder->end_was_reached && coder->pos == coder->size) + return LZMA_STREAM_END; + + return LZMA_OK; +} + + +static void +simple_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder->simple, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +simple_coder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + // No update support, just call the next filter in the chain. + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_simple_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, + uint32_t alignment, bool is_encoder) +{ + // Allocate memory for the lzma_coder structure if needed. + if (next->coder == NULL) { + // Here we allocate space also for the temporary buffer. We + // need twice the size of unfiltered_max, because then it + // is always possible to filter at least unfiltered_max bytes + // more data in coder->buffer[] if it can be filled completely. + next->coder = lzma_alloc(sizeof(lzma_coder) + + 2 * unfiltered_max, allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &simple_code; + next->end = &simple_coder_end; + next->update = &simple_coder_update; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->filter = filter; + next->coder->allocated = 2 * unfiltered_max; + + // Allocate memory for filter-specific data structure. + if (simple_size > 0) { + next->coder->simple = lzma_alloc( + simple_size, allocator); + if (next->coder->simple == NULL) + return LZMA_MEM_ERROR; + } else { + next->coder->simple = NULL; + } + } + + if (filters[0].options != NULL) { + const lzma_options_bcj *simple = filters[0].options; + next->coder->now_pos = simple->start_offset; + if (next->coder->now_pos & (alignment - 1)) + return LZMA_OPTIONS_ERROR; + } else { + next->coder->now_pos = 0; + } + + // Reset variables. + next->coder->is_encoder = is_encoder; + next->coder->end_was_reached = false; + next->coder->pos = 0; + next->coder->filtered = 0; + next->coder->size = 0; + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} Index: contrib/xz/src/liblzma/simple/simple_private.h =================================================================== --- contrib/xz/src/liblzma/simple/simple_private.h (revision 0) +++ contrib/xz/src/liblzma/simple/simple_private.h (revision 0) @@ -0,0 +1,76 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_private.h +/// \brief Private definitions for so called simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_PRIVATE_H +#define LZMA_SIMPLE_PRIVATE_H + +#include "simple_coder.h" + + +typedef struct lzma_simple_s lzma_simple; + +struct lzma_coder_s { + /// Next filter in the chain + lzma_next_coder next; + + /// True if the next coder in the chain has returned LZMA_STREAM_END + /// or if we have processed uncompressed_size bytes. + bool end_was_reached; + + /// True if filter() should encode the data; false to decode. + /// Currently all simple filters use the same function for encoding + /// and decoding, because the difference between encoders and decoders + /// is very small. + bool is_encoder; + + /// Pointer to filter-specific function, which does + /// the actual filtering. + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size); + + /// Pointer to filter-specific data, or NULL if filter doesn't need + /// any extra data. + lzma_simple *simple; + + /// The lowest 32 bits of the current position in the data. Most + /// filters need this to do conversions between absolute and relative + /// addresses. + uint32_t now_pos; + + /// Size of the memory allocated for the buffer. + size_t allocated; + + /// Flushing position in the temporary buffer. buffer[pos] is the + /// next byte to be copied to out[]. + size_t pos; + + /// buffer[filtered] is the first unfiltered byte. When pos is smaller + /// than filtered, there is unflushed filtered data in the buffer. + size_t filtered; + + /// Total number of bytes (both filtered and unfiltered) currently + /// in the temporary buffer. + size_t size; + + /// Temporary buffer + uint8_t buffer[]; +}; + + +extern lzma_ret lzma_simple_coder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, + uint32_t alignment, bool is_encoder); + +#endif Index: contrib/xz/src/liblzma/simple/sparc.c =================================================================== --- contrib/xz/src/liblzma/simple/sparc.c (revision 0) +++ contrib/xz/src/liblzma/simple/sparc.c (revision 0) @@ -0,0 +1,81 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sparc.c +/// \brief Filter for SPARC binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +sparc_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + + if ((buffer[i] == 0x40 && (buffer[i + 1] & 0xC0) == 0x00) + || (buffer[i] == 0x7F + && (buffer[i + 1] & 0xC0) == 0xC0)) { + + uint32_t src = ((uint32_t)buffer[i + 0] << 24) + | ((uint32_t)buffer[i + 1] << 16) + | ((uint32_t)buffer[i + 2] << 8) + | ((uint32_t)buffer[i + 3]); + + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 2; + + dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) + | (dest & 0x3FFFFF) + | 0x40000000; + + buffer[i + 0] = (uint8_t)(dest >> 24); + buffer[i + 1] = (uint8_t)(dest >> 16); + buffer[i + 2] = (uint8_t)(dest >> 8); + buffer[i + 3] = (uint8_t)(dest); + } + } + + return i; +} + + +static lzma_ret +sparc_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &sparc_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_sparc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_sparc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, false); +} Index: contrib/xz/src/liblzma/simple/simple_encoder.c =================================================================== --- contrib/xz/src/liblzma/simple/simple_encoder.c (revision 0) +++ contrib/xz/src/liblzma/simple/simple_encoder.c (revision 0) @@ -0,0 +1,38 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_encoder.c +/// \brief Properties encoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_encoder.h" + + +extern lzma_ret +lzma_simple_props_size(uint32_t *size, const void *options) +{ + const lzma_options_bcj *const opt = options; + *size = (opt == NULL || opt->start_offset == 0) ? 0 : 4; + return LZMA_OK; +} + + +extern lzma_ret +lzma_simple_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_bcj *const opt = options; + + // The default start offset is zero, so we don't need to store any + // options unless the start offset is non-zero. + if (opt == NULL || opt->start_offset == 0) + return LZMA_OK; + + unaligned_write32le(out, opt->start_offset); + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/simple/simple_coder.h =================================================================== --- contrib/xz/src/liblzma/simple/simple_coder.h (revision 0) +++ contrib/xz/src/liblzma/simple/simple_coder.h (revision 0) @@ -0,0 +1,60 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.h +/// \brief Wrapper for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_CODER_H +#define LZMA_SIMPLE_CODER_H + +#include "common.h" + + +extern lzma_ret lzma_simple_x86_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_x86_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_ia64_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_ia64_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_arm_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_arm_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_sparc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif Index: contrib/xz/src/liblzma/simple/simple_encoder.h =================================================================== --- contrib/xz/src/liblzma/simple/simple_encoder.h (revision 0) +++ contrib/xz/src/liblzma/simple/simple_encoder.h (revision 0) @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_encoder.c +/// \brief Properties encoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_ENCODER_H +#define LZMA_SIMPLE_ENCODER_H + +#include "simple_coder.h" + + +extern lzma_ret lzma_simple_props_size(uint32_t *size, const void *options); + +extern lzma_ret lzma_simple_props_encode(const void *options, uint8_t *out); + +#endif Index: contrib/xz/src/liblzma/rangecoder/range_encoder.h =================================================================== --- contrib/xz/src/liblzma/rangecoder/range_encoder.h (revision 0) +++ contrib/xz/src/liblzma/rangecoder/range_encoder.h (revision 0) @@ -0,0 +1,231 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_encoder.h +/// \brief Range Encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_ENCODER_H +#define LZMA_RANGE_ENCODER_H + +#include "range_common.h" +#include "price.h" + + +/// Maximum number of symbols that can be put pending into lzma_range_encoder +/// structure between calls to lzma_rc_encode(). For LZMA, 52+5 is enough +/// (match with big distance and length followed by range encoder flush). +#define RC_SYMBOLS_MAX 58 + + +typedef struct { + uint64_t low; + uint64_t cache_size; + uint32_t range; + uint8_t cache; + + /// Number of symbols in the tables + size_t count; + + /// rc_encode()'s position in the tables + size_t pos; + + /// Symbols to encode + enum { + RC_BIT_0, + RC_BIT_1, + RC_DIRECT_0, + RC_DIRECT_1, + RC_FLUSH, + } symbols[RC_SYMBOLS_MAX]; + + /// Probabilities associated with RC_BIT_0 or RC_BIT_1 + probability *probs[RC_SYMBOLS_MAX]; + +} lzma_range_encoder; + + +static inline void +rc_reset(lzma_range_encoder *rc) +{ + rc->low = 0; + rc->cache_size = 1; + rc->range = UINT32_MAX; + rc->cache = 0; + rc->count = 0; + rc->pos = 0; +} + + +static inline void +rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) +{ + rc->symbols[rc->count] = bit; + rc->probs[rc->count] = prob; + ++rc->count; +} + + +static inline void +rc_bittree(lzma_range_encoder *rc, probability *probs, + uint32_t bit_count, uint32_t symbol) +{ + uint32_t model_index = 1; + + do { + const uint32_t bit = (symbol >> --bit_count) & 1; + rc_bit(rc, &probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (bit_count != 0); +} + + +static inline void +rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, + uint32_t bit_count, uint32_t symbol) +{ + uint32_t model_index = 1; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + rc_bit(rc, &probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (--bit_count != 0); +} + + +static inline void +rc_direct(lzma_range_encoder *rc, + uint32_t value, uint32_t bit_count) +{ + do { + rc->symbols[rc->count++] + = RC_DIRECT_0 + ((value >> --bit_count) & 1); + } while (bit_count != 0); +} + + +static inline void +rc_flush(lzma_range_encoder *rc) +{ + for (size_t i = 0; i < 5; ++i) + rc->symbols[rc->count++] = RC_FLUSH; +} + + +static inline bool +rc_shift_low(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) + || (uint32_t)(rc->low >> 32) != 0) { + do { + if (*out_pos == out_size) + return true; + + out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); + ++*out_pos; + rc->cache = 0xFF; + + } while (--rc->cache_size != 0); + + rc->cache = (rc->low >> 24) & 0xFF; + } + + ++rc->cache_size; + rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS; + + return false; +} + + +static inline bool +rc_encode(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + assert(rc->count <= RC_SYMBOLS_MAX); + + while (rc->pos < rc->count) { + // Normalize + if (rc->range < RC_TOP_VALUE) { + if (rc_shift_low(rc, out, out_pos, out_size)) + return true; + + rc->range <<= RC_SHIFT_BITS; + } + + // Encode a bit + switch (rc->symbols[rc->pos]) { + case RC_BIT_0: { + probability prob = *rc->probs[rc->pos]; + rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) + * prob; + prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS; + *rc->probs[rc->pos] = prob; + break; + } + + case RC_BIT_1: { + probability prob = *rc->probs[rc->pos]; + const uint32_t bound = prob * (rc->range + >> RC_BIT_MODEL_TOTAL_BITS); + rc->low += bound; + rc->range -= bound; + prob -= prob >> RC_MOVE_BITS; + *rc->probs[rc->pos] = prob; + break; + } + + case RC_DIRECT_0: + rc->range >>= 1; + break; + + case RC_DIRECT_1: + rc->range >>= 1; + rc->low += rc->range; + break; + + case RC_FLUSH: + // Prevent further normalizations. + rc->range = UINT32_MAX; + + // Flush the last five bytes (see rc_flush()). + do { + if (rc_shift_low(rc, out, out_pos, out_size)) + return true; + } while (++rc->pos < rc->count); + + // Reset the range encoder so we are ready to continue + // encoding if we weren't finishing the stream. + rc_reset(rc); + return false; + + default: + assert(0); + break; + } + + ++rc->pos; + } + + rc->count = 0; + rc->pos = 0; + + return false; +} + + +static inline uint64_t +rc_pending(const lzma_range_encoder *rc) +{ + return rc->cache_size + 5 - 1; +} + +#endif Index: contrib/xz/src/liblzma/rangecoder/price_tablegen.c =================================================================== --- contrib/xz/src/liblzma/rangecoder/price_tablegen.c (revision 0) +++ contrib/xz/src/liblzma/rangecoder/price_tablegen.c (revision 0) @@ -0,0 +1,87 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file price_tablegen.c +/// \brief Probability price table generator +/// +/// Compiling: gcc -std=c99 -o price_tablegen price_tablegen.c +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include "range_common.h" +#include "price.h" + + +static uint32_t rc_prices[RC_PRICE_TABLE_SIZE]; + + +static void +init_price_table(void) +{ + for (uint32_t i = (UINT32_C(1) << RC_MOVE_REDUCING_BITS) / 2; + i < RC_BIT_MODEL_TOTAL; + i += (UINT32_C(1) << RC_MOVE_REDUCING_BITS)) { + const uint32_t cycles_bits = RC_BIT_PRICE_SHIFT_BITS; + uint32_t w = i; + uint32_t bit_count = 0; + + for (uint32_t j = 0; j < cycles_bits; ++j) { + w *= w; + bit_count <<= 1; + + while (w >= (UINT32_C(1) << 16)) { + w >>= 1; + ++bit_count; + } + } + + rc_prices[i >> RC_MOVE_REDUCING_BITS] + = (RC_BIT_MODEL_TOTAL_BITS << cycles_bits) + - 15 - bit_count; + } + + return; +} + + +static void +print_price_table(void) +{ + printf("/* This file has been automatically generated by " + "price_tablegen.c. */\n\n" + "#include \"range_encoder.h\"\n\n" + "const uint8_t lzma_rc_prices[" + "RC_PRICE_TABLE_SIZE] = {"); + + const size_t array_size = sizeof(lzma_rc_prices) + / sizeof(lzma_rc_prices[0]); + for (size_t i = 0; i < array_size; ++i) { + if (i % 8 == 0) + printf("\n\t"); + + printf("%4" PRIu32, rc_prices[i]); + + if (i != array_size - 1) + printf(","); + } + + printf("\n};\n"); + + return; +} + + +int +main(void) +{ + init_price_table(); + print_price_table(); + return 0; +} Index: contrib/xz/src/liblzma/rangecoder/range_decoder.h =================================================================== --- contrib/xz/src/liblzma/rangecoder/range_decoder.h (revision 0) +++ contrib/xz/src/liblzma/rangecoder/range_decoder.h (revision 0) @@ -0,0 +1,179 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_decoder.h +/// \brief Range Decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_DECODER_H +#define LZMA_RANGE_DECODER_H + +#include "range_common.h" + + +typedef struct { + uint32_t range; + uint32_t code; + uint32_t init_bytes_left; +} lzma_range_decoder; + + +/// Reads the first five bytes to initialize the range decoder. +static inline bool +rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + while (rc->init_bytes_left > 0) { + if (*in_pos == in_size) + return false; + + rc->code = (rc->code << 8) | in[*in_pos]; + ++*in_pos; + --rc->init_bytes_left; + } + + return true; +} + + +/// Makes local copies of range decoder and *in_pos variables. Doing this +/// improves speed significantly. The range decoder macros expect also +/// variables `in' and `in_size' to be defined. +#define rc_to_local(range_decoder, in_pos) \ + lzma_range_decoder rc = range_decoder; \ + size_t rc_in_pos = (in_pos); \ + uint32_t rc_bound + + +/// Stores the local copes back to the range decoder structure. +#define rc_from_local(range_decoder, in_pos) \ +do { \ + range_decoder = rc; \ + in_pos = rc_in_pos; \ +} while (0) + + +/// Resets the range decoder structure. +#define rc_reset(range_decoder) \ +do { \ + (range_decoder).range = UINT32_MAX; \ + (range_decoder).code = 0; \ + (range_decoder).init_bytes_left = 5; \ +} while (0) + + +/// When decoding has been properly finished, rc.code is always zero unless +/// the input stream is corrupt. So checking this can catch some corrupt +/// files especially if they don't have any other integrity check. +#define rc_is_finished(range_decoder) \ + ((range_decoder).code == 0) + + +/// Read the next input byte if needed. If more input is needed but there is +/// no more input available, "goto out" is used to jump out of the main +/// decoder loop. +#define rc_normalize(seq) \ +do { \ + if (rc.range < RC_TOP_VALUE) { \ + if (unlikely(rc_in_pos == in_size)) { \ + coder->sequence = seq; \ + goto out; \ + } \ + rc.range <<= RC_SHIFT_BITS; \ + rc.code = (rc.code << RC_SHIFT_BITS) | in[rc_in_pos++]; \ + } \ +} while (0) + + +/// Start decoding a bit. This must be used together with rc_update_0() +/// and rc_update_1(): +/// +/// rc_if_0(prob, seq) { +/// rc_update_0(prob); +/// // Do something +/// } else { +/// rc_update_1(prob); +/// // Do something else +/// } +/// +#define rc_if_0(prob, seq) \ + rc_normalize(seq); \ + rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \ + if (rc.code < rc_bound) + + +/// Update the range decoder state and the used probability variable to +/// match a decoded bit of 0. +#define rc_update_0(prob) \ +do { \ + rc.range = rc_bound; \ + prob += (RC_BIT_MODEL_TOTAL - (prob)) >> RC_MOVE_BITS; \ +} while (0) + + +/// Update the range decoder state and the used probability variable to +/// match a decoded bit of 1. +#define rc_update_1(prob) \ +do { \ + rc.range -= rc_bound; \ + rc.code -= rc_bound; \ + prob -= (prob) >> RC_MOVE_BITS; \ +} while (0) + + +/// Decodes one bit and runs action0 or action1 depending on the decoded bit. +/// This macro is used as the last step in bittree reverse decoders since +/// those don't use "symbol" for anything else than indexing the probability +/// arrays. +#define rc_bit_last(prob, action0, action1, seq) \ +do { \ + rc_if_0(prob, seq) { \ + rc_update_0(prob); \ + action0; \ + } else { \ + rc_update_1(prob); \ + action1; \ + } \ +} while (0) + + +/// Decodes one bit, updates "symbol", and runs action0 or action1 depending +/// on the decoded bit. +#define rc_bit(prob, action0, action1, seq) \ + rc_bit_last(prob, \ + symbol <<= 1; action0, \ + symbol = (symbol << 1) + 1; action1, \ + seq); + + +/// Like rc_bit() but add "case seq:" as a prefix. This makes the unrolled +/// loops more readable because the code isn't littered with "case" +/// statements. On the other hand this also makes it less readable, since +/// spotting the places where the decoder loop may be restarted is less +/// obvious. +#define rc_bit_case(prob, action0, action1, seq) \ + case seq: rc_bit(prob, action0, action1, seq) + + +/// Decode a bit without using a probability. +#define rc_direct(dest, seq) \ +do { \ + rc_normalize(seq); \ + rc.range >>= 1; \ + rc.code -= rc.range; \ + rc_bound = UINT32_C(0) - (rc.code >> 31); \ + rc.code += rc.range & rc_bound; \ + dest = (dest << 1) + (rc_bound + 1); \ +} while (0) + + +// NOTE: No macros are provided for bittree decoding. It seems to be simpler +// to just write them open in the code. + +#endif Index: contrib/xz/src/liblzma/rangecoder/price.h =================================================================== --- contrib/xz/src/liblzma/rangecoder/price.h (revision 0) +++ contrib/xz/src/liblzma/rangecoder/price.h (revision 0) @@ -0,0 +1,92 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file price.h +/// \brief Probability price calculation +// +// Author: Igor Pavlov +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_PRICE_H +#define LZMA_PRICE_H + + +#define RC_MOVE_REDUCING_BITS 4 +#define RC_BIT_PRICE_SHIFT_BITS 4 +#define RC_PRICE_TABLE_SIZE (RC_BIT_MODEL_TOTAL >> RC_MOVE_REDUCING_BITS) + +#define RC_INFINITY_PRICE (UINT32_C(1) << 30) + + +/// Lookup table for the inline functions defined in this file. +extern const uint8_t lzma_rc_prices[RC_PRICE_TABLE_SIZE]; + + +static inline uint32_t +rc_bit_price(const probability prob, const uint32_t bit) +{ + return lzma_rc_prices[(prob ^ ((UINT32_C(0) - bit) + & (RC_BIT_MODEL_TOTAL - 1))) >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bit_0_price(const probability prob) +{ + return lzma_rc_prices[prob >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bit_1_price(const probability prob) +{ + return lzma_rc_prices[(prob ^ (RC_BIT_MODEL_TOTAL - 1)) + >> RC_MOVE_REDUCING_BITS]; +} + + +static inline uint32_t +rc_bittree_price(const probability *const probs, + const uint32_t bit_levels, uint32_t symbol) +{ + uint32_t price = 0; + symbol += UINT32_C(1) << bit_levels; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + price += rc_bit_price(probs[symbol], bit); + } while (symbol != 1); + + return price; +} + + +static inline uint32_t +rc_bittree_reverse_price(const probability *const probs, + uint32_t bit_levels, uint32_t symbol) +{ + uint32_t price = 0; + uint32_t model_index = 1; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + price += rc_bit_price(probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (--bit_levels != 0); + + return price; +} + + +static inline uint32_t +rc_direct_price(const uint32_t bits) +{ + return bits << RC_BIT_PRICE_SHIFT_BITS; +} + +#endif Index: contrib/xz/src/liblzma/rangecoder/price_table.c =================================================================== --- contrib/xz/src/liblzma/rangecoder/price_table.c (revision 0) +++ contrib/xz/src/liblzma/rangecoder/price_table.c (revision 0) @@ -0,0 +1,22 @@ +/* This file has been automatically generated by price_tablegen.c. */ + +#include "range_encoder.h" + +const uint8_t lzma_rc_prices[RC_PRICE_TABLE_SIZE] = { + 128, 103, 91, 84, 78, 73, 69, 66, + 63, 61, 58, 56, 54, 52, 51, 49, + 48, 46, 45, 44, 43, 42, 41, 40, + 39, 38, 37, 36, 35, 34, 34, 33, + 32, 31, 31, 30, 29, 29, 28, 28, + 27, 26, 26, 25, 25, 24, 24, 23, + 23, 22, 22, 22, 21, 21, 20, 20, + 19, 19, 19, 18, 18, 17, 17, 17, + 16, 16, 16, 15, 15, 15, 14, 14, + 14, 13, 13, 13, 12, 12, 12, 11, + 11, 11, 11, 10, 10, 10, 10, 9, + 9, 9, 9, 8, 8, 8, 8, 7, + 7, 7, 7, 6, 6, 6, 6, 5, + 5, 5, 5, 5, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 2, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 1 +}; Index: contrib/xz/src/liblzma/rangecoder/range_common.h =================================================================== --- contrib/xz/src/liblzma/rangecoder/range_common.h (revision 0) +++ contrib/xz/src/liblzma/rangecoder/range_common.h (revision 0) @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_common.h +/// \brief Common things for range encoder and decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_COMMON_H +#define LZMA_RANGE_COMMON_H + +#ifdef HAVE_CONFIG_H +# include "common.h" +#endif + + +/////////////// +// Constants // +/////////////// + +#define RC_SHIFT_BITS 8 +#define RC_TOP_BITS 24 +#define RC_TOP_VALUE (UINT32_C(1) << RC_TOP_BITS) +#define RC_BIT_MODEL_TOTAL_BITS 11 +#define RC_BIT_MODEL_TOTAL (UINT32_C(1) << RC_BIT_MODEL_TOTAL_BITS) +#define RC_MOVE_BITS 5 + + +//////////// +// Macros // +//////////// + +// Resets the probability so that both 0 and 1 have probability of 50 % +#define bit_reset(prob) \ + prob = RC_BIT_MODEL_TOTAL >> 1 + +// This does the same for a complete bit tree. +// (A tree represented as an array.) +#define bittree_reset(probs, bit_levels) \ + for (uint32_t bt_i = 0; bt_i < (1 << (bit_levels)); ++bt_i) \ + bit_reset((probs)[bt_i]) + + +////////////////////// +// Type definitions // +////////////////////// + +/// \brief Type of probabilities used with range coder +/// +/// This needs to be at least 12-bit integer, so uint16_t is a logical choice. +/// However, on some architecture and compiler combinations, a bigger type +/// may give better speed, because the probability variables are accessed +/// a lot. On the other hand, bigger probability type increases cache +/// footprint, since there are 2 to 14 thousand probability variables in +/// LZMA (assuming the limit of lc + lp <= 4; with lc + lp <= 12 there +/// would be about 1.5 million variables). +/// +/// With malicious files, the initialization speed of the LZMA decoder can +/// become important. In that case, smaller probability variables mean that +/// there is less bytes to write to RAM, which makes initialization faster. +/// With big probability type, the initialization can become so slow that it +/// can be a problem e.g. for email servers doing virus scanning. +/// +/// I will be sticking to uint16_t unless some specific architectures +/// are *much* faster (20-50 %) with uint32_t. +typedef uint16_t probability; + +#endif Index: contrib/xz/src/liblzma/api/lzma/delta.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/delta.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/delta.h (revision 0) @@ -0,0 +1,77 @@ +/** + * \file lzma/delta.h + * \brief Delta filter + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Delta filter. This is used as lzma_filter.id. + */ +#define LZMA_FILTER_DELTA LZMA_VLI_C(0x03) + + +/** + * \brief Type of the delta calculation + * + * Currently only byte-wise delta is supported. Other possible types could + * be, for example, delta of 16/32/64-bit little/big endian integers, but + * these are not currently planned since byte-wise delta is almost as good. + */ +typedef enum { + LZMA_DELTA_TYPE_BYTE +} lzma_delta_type; + + +/** + * \brief Options for the Delta filter + * + * These options are needed by both encoder and decoder. + */ +typedef struct { + /** For now, this must always be LZMA_DELTA_TYPE_BYTE. */ + lzma_delta_type type; + + /** + * \brief Delta distance + * + * With the only currently supported type, LZMA_DELTA_TYPE_BYTE, + * the distance is as bytes. + * + * Examples: + * - 16-bit stereo audio: distance = 4 bytes + * - 24-bit RGB image data: distance = 3 bytes + */ + uint32_t dist; +# define LZMA_DELTA_DIST_MIN 1 +# define LZMA_DELTA_DIST_MAX 256 + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * when type is LZMA_DELTA_TYPE_BYTE, so it is safe to leave these + * uninitialized. + */ + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + void *reserved_ptr1; + void *reserved_ptr2; + +} lzma_options_delta; Index: contrib/xz/src/liblzma/api/lzma/vli.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/vli.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/vli.h (revision 0) @@ -0,0 +1,168 @@ +/** + * \file lzma/vli.h + * \brief Variable-length integer handling + * + * In the .xz format, most integers are encoded in a variable-length + * representation, which is sometimes called little endian base-128 encoding. + * This saves space when smaller values are more likely than bigger values. + * + * The encoding scheme encodes seven bits to every byte, using minimum + * number of bytes required to represent the given value. Encodings that use + * non-minimum number of bytes are invalid, thus every integer has exactly + * one encoded representation. The maximum number of bits in a VLI is 63, + * thus the vli argument must be less than or equal to UINT64_MAX / 2. You + * should use LZMA_VLI_MAX for clarity. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum supported value of variable-length integer + */ +#define LZMA_VLI_MAX (UINT64_MAX / 2) + +/** + * \brief VLI value to denote that the value is unknown + */ +#define LZMA_VLI_UNKNOWN UINT64_MAX + +/** + * \brief Maximum supported length of variable length integers + */ +#define LZMA_VLI_BYTES_MAX 9 + + +/** + * \brief VLI constant suffix + */ +#define LZMA_VLI_C(n) UINT64_C(n) + + +/** + * \brief Variable-length integer type + * + * This will always be unsigned integer. Valid VLI values are in the range + * [0, LZMA_VLI_MAX]. Unknown value is indicated with LZMA_VLI_UNKNOWN, + * which is the maximum value of the underlaying integer type. + * + * In future, even if lzma_vli is defined to be something other than uint64_t, + * it is guaranteed that 2 * LZMA_VLI_MAX will not overflow lzma_vli. + * This simplifies integer overflow detection. + */ +typedef uint64_t lzma_vli; + + +/** + * \brief Simple macro to validate variable-length integer + * + * This is useful to test that application has given acceptable values + * for example in the uncompressed_size and compressed_size variables. + * + * \return True if the integer is representable as VLI or if it + * indicates unknown value. + */ +#define lzma_vli_is_valid(vli) \ + ((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN) + + +/** + * \brief Encode a variable-length integer + * + * This function has two modes: single-call and multi-call. Single-call mode + * encodes the whole integer at once; it is an error if the output buffer is + * too small. Multi-call mode saves the position in *vli_pos, and thus it is + * possible to continue encoding if the buffer becomes full before the whole + * integer has been encoded. + * + * \param vli Integer to be encoded + * \param vli_pos How many VLI-encoded bytes have already been written + * out. When starting to encode a new integer, *vli_pos + * must be set to zero. To use single-call encoding, + * set vli_pos to NULL. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully encoded. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to too little output space; single-call mode doesn't use + * LZMA_BUF_ERROR, since the application should have checked + * the encoded size with lzma_vli_size(). + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely written out yet. + * - LZMA_STREAM_END: Integer successfully encoded. + * - LZMA_BUF_ERROR: No output space was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli, + size_t *vli_pos, uint8_t *lzma_restrict out, + size_t *lzma_restrict out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Decode a variable-length integer + * + * Like lzma_vli_encode(), this function has single-call and multi-call modes. + * + * \param vli Pointer to decoded integer. The decoder will + * initialize it to zero when *vli_pos == 0, so + * application isn't required to initialize *vli. + * \param vli_pos How many bytes have already been decoded. When + * starting to decode a new integer, *vli_pos must + * be initialized to zero. To use single-call decoding, + * set this to NULL. + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting + * the end of the input buffer before the whole integer was + * decoded; providing no input at all will use LZMA_DATA_ERROR. + * - LZMA_PROG_ERROR: Arguments are not sane. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely decoded yet. + * - LZMA_STREAM_END: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_BUF_ERROR: No input was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *lzma_restrict vli, + size_t *vli_pos, const uint8_t *lzma_restrict in, + size_t *lzma_restrict in_pos, size_t in_size) lzma_nothrow; + + +/** + * \brief Get the number of bytes required to encode a VLI + * + * \return Number of bytes on success (1-9). If vli isn't valid, + * zero is returned. + */ +extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli) + lzma_nothrow lzma_attr_pure; Index: contrib/xz/src/liblzma/api/lzma/block.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/block.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/block.h (revision 0) @@ -0,0 +1,529 @@ +/** + * \file lzma/block.h + * \brief .xz Block handling + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Options for the Block and Block Header encoders and decoders + * + * Different Block handling functions use different parts of this structure. + * Some read some members, other functions write, and some do both. Only the + * members listed for reading need to be initialized when the specified + * functions are called. The members marked for writing will be assigned + * new values at some point either by calling the given function or by + * later calls to lzma_code(). + */ +typedef struct { + /** + * \brief Block format version + * + * To prevent API and ABI breakages if new features are needed in + * the Block field, a version number is used to indicate which + * fields in this structure are in use. For now, version must always + * be zero. With non-zero version, most Block related functions will + * return LZMA_OPTIONS_ERROR. + * + * Read by: + * - All functions that take pointer to lzma_block as argument, + * including lzma_block_header_decode(). + * + * Written by: + * - lzma_block_header_decode() + */ + uint32_t version; + + /** + * \brief Size of the Block Header field + * + * This is always a multiple of four. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_size() + * - lzma_block_buffer_encode() + */ + uint32_t header_size; +# define LZMA_BLOCK_HEADER_SIZE_MIN 8 +# define LZMA_BLOCK_HEADER_SIZE_MAX 1024 + + /** + * \brief Type of integrity Check + * + * The Check ID is not stored into the Block Header, thus its value + * must be provided also when decoding. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_check check; + + /** + * \brief Size of the Compressed Data in bytes + * + * Encoding: If this is not LZMA_VLI_UNKNOWN, Block Header encoder + * will store this value to the Block Header. Block encoder doesn't + * care about this value, but will set it once the encoding has been + * finished. + * + * Decoding: If this is not LZMA_VLI_UNKNOWN, Block decoder will + * verify that the size of the Compressed Data field matches + * compressed_size. + * + * Usually you don't know this value when encoding in streamed mode, + * and thus cannot write this field into the Block Header. + * + * In non-streamed mode you can reserve space for this field before + * encoding the actual Block. After encoding the data, finish the + * Block by encoding the Block Header. Steps in detail: + * + * - Set compressed_size to some big enough value. If you don't know + * better, use LZMA_VLI_MAX, but remember that bigger values take + * more space in Block Header. + * + * - Call lzma_block_header_size() to see how much space you need to + * reserve for the Block Header. + * + * - Encode the Block using lzma_block_encoder() and lzma_code(). + * It sets compressed_size to the correct value. + * + * - Use lzma_block_header_encode() to encode the Block Header. + * Because space was reserved in the first step, you don't need + * to call lzma_block_header_size() anymore, because due to + * reserving, header_size has to be big enough. If it is "too big", + * lzma_block_header_encode() will add enough Header Padding to + * make Block Header to match the size specified by header_size. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed Size in bytes + * + * This is handled very similarly to compressed_size above. + * + * uncompressed_size is needed by fewer functions than + * compressed_size. This is because uncompressed_size isn't + * needed to validate that Block stays within proper limits. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli uncompressed_size; + + /** + * \brief Array of filters + * + * There can be 1-4 filters. The end of the array is marked with + * .id = LZMA_VLI_UNKNOWN. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode(): Note that this does NOT free() + * the old filter options structures. All unused filters[] will + * have .id == LZMA_VLI_UNKNOWN and .options == NULL. If + * decoding fails, all filters[] are guaranteed to be + * LZMA_VLI_UNKNOWN and NULL. + * + * \note Because of the array is terminated with + * .id = LZMA_VLI_UNKNOWN, the actual array must + * have LZMA_FILTERS_MAX + 1 members or the Block + * Header decoder will overflow the buffer. + */ + lzma_filter *filters; + + /** + * \brief Raw value stored in the Check field + * + * After successful coding, the first lzma_check_size(check) bytes + * of this array contain the raw value stored in the Check field. + * + * Note that CRC32 and CRC64 are stored in little endian byte order. + * Take it into account if you display the Check values to the user. + * + * Written by: + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + uint8_t raw_check[LZMA_CHECK_SIZE_MAX]; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + uint32_t reserved_int1; + uint32_t reserved_int2; + lzma_vli reserved_int3; + lzma_vli reserved_int4; + lzma_vli reserved_int5; + lzma_vli reserved_int6; + lzma_vli reserved_int7; + lzma_vli reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + lzma_bool reserved_bool1; + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + +} lzma_block; + + +/** + * \brief Decode the Block Header Size field + * + * To decode Block Header using lzma_block_header_decode(), the size of the + * Block Header has to be known and stored into lzma_block.header_size. + * The size can be calculated from the first byte of a Block using this macro. + * Note that if the first byte is 0x00, it indicates beginning of Index; use + * this macro only when the byte is not 0x00. + * + * There is no encoding macro, because Block Header encoder is enough for that. + */ +#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) + + +/** + * \brief Calculate Block Header Size + * + * Calculate the minimum size needed for the Block Header field using the + * settings specified in the lzma_block structure. Note that it is OK to + * increase the calculated header_size value as long as it is a multiple of + * four and doesn't exceed LZMA_BLOCK_HEADER_SIZE_MAX. Increasing header_size + * just means that lzma_block_header_encode() will add Header Padding. + * + * \return - LZMA_OK: Size calculated successfully and stored to + * block->header_size. + * - LZMA_OPTIONS_ERROR: Unsupported version, filters or + * filter options. + * - LZMA_PROG_ERROR: Invalid values like compressed_size == 0. + * + * \note This doesn't check that all the options are valid i.e. this + * may return LZMA_OK even if lzma_block_header_encode() or + * lzma_block_encoder() would fail. If you want to validate the + * filter chain, consider using lzma_memlimit_encoder() which as + * a side-effect validates the filter chain. + */ +extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Block Header + * + * The caller must have calculated the size of the Block Header already with + * lzma_block_header_size(). If a value larger than the one calculated by + * lzma_block_header_size() is used, the Block Header will be padded to the + * specified size. + * + * \param out Beginning of the output buffer. This must be + * at least block->header_size bytes. + * \param block Block options to be encoded. + * + * \return - LZMA_OK: Encoding was successful. block->header_size + * bytes were written to output buffer. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_encode( + const lzma_block *block, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Block Header + * + * block->version should be set to the highest value supported by the + * application; currently the only possible version is zero. This function + * will set version to the lowest value that still supports all the features + * required by the Block Header. + * + * The size of the Block Header must have already been decoded with + * lzma_block_header_size_decode() macro and stored to block->header_size. + * + * block->filters must have been allocated, but not necessarily initialized. + * Possible existing filter options are _not_ freed. + * + * \param block Destination for Block options. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() (and also free() + * if an error occurs). + * \param in Beginning of the input buffer. This must be + * at least block->header_size bytes. + * + * \return - LZMA_OK: Decoding was successful. block->header_size + * bytes were read from the input buffer. + * - LZMA_OPTIONS_ERROR: The Block Header specifies some + * unsupported options such as unsupported filters. This can + * happen also if block->version was set to a too low value + * compared to what would be required to properly represent + * the information stored in the Block Header. + * - LZMA_DATA_ERROR: Block Header is corrupt, for example, + * the CRC32 doesn't match. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block, + lzma_allocator *allocator, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Validate and set Compressed Size according to Unpadded Size + * + * Block Header stores Compressed Size, but Index has Unpadded Size. If the + * application has already parsed the Index and is now decoding Blocks, + * it can calculate Compressed Size from Unpadded Size. This function does + * exactly that with error checking: + * + * - Compressed Size calculated from Unpadded Size must be positive integer, + * that is, Unpadded Size must be big enough that after Block Header and + * Check fields there's still at least one byte for Compressed Size. + * + * - If Compressed Size was present in Block Header, the new value + * calculated from Unpadded Size is compared against the value + * from Block Header. + * + * \note This function must be called _after_ decoding the Block Header + * field so that it can properly validate Compressed Size if it + * was present in Block Header. + * + * \return - LZMA_OK: block->compressed_size was set successfully. + * - LZMA_DATA_ERROR: unpadded_size is too small compared to + * block->header_size and lzma_check_size(block->check). + * - LZMA_PROG_ERROR: Some values are invalid. For example, + * block->header_size must be a multiple of four and + * between 8 and 1024 inclusive. + */ +extern LZMA_API(lzma_ret) lzma_block_compressed_size( + lzma_block *block, lzma_vli unpadded_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate Unpadded Size + * + * The Index field stores Unpadded Size and Uncompressed Size. The latter + * can be taken directly from the lzma_block structure after coding a Block, + * but Unpadded Size needs to be calculated from Block Header Size, + * Compressed Size, and size of the Check field. This is where this function + * is needed. + * + * \return Unpadded Size on success, or zero on error. + */ +extern LZMA_API(lzma_vli) lzma_block_unpadded_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. + */ +extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Block encoder + * + * Valid actions for lzma_code() are LZMA_RUN, LZMA_SYNC_FLUSH (only if the + * filter chain supports it), and LZMA_FINISH. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_UNSUPPORTED_CHECK: block->check specifies a Check ID + * that is not supported by this buid of liblzma. Initializing + * the encoder failed. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_encoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Block decoder + * + * Valid actions for lzma_code() are LZMA_RUN and LZMA_FINISH. Using + * LZMA_FINISH is not required. It is supported only for convenience. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but + * the given Check ID is not supported, thus Check will be + * ignored. + * - LZMA_PROG_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_decoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate maximum output size for single-call Block encoding + * + * This is equivalent to lzma_stream_buffer_bound() but for .xz Blocks. + * See the documentation of lzma_stream_buffer_bound(). + */ +extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Block encoder + * + * In contrast to the multi-call encoder initialized with + * lzma_block_encoder(), this function encodes also the Block Header. This + * is required to make it possible to write appropriate Block Header also + * in case the data isn't compressible, and different filter chain has to be + * used to encode the data in uncompressed form using uncompressed chunks + * of the LZMA2 filter. + * + * When the data isn't compressible, header_size, compressed_size, and + * uncompressed_size are set just like when the data was compressible, but + * it is possible that header_size is too small to hold the filter chain + * specified in block->filters, because that isn't necessarily the filter + * chain that was actually used to encode the data. lzma_block_unpadded_size() + * still works normally, because it doesn't read the filters array. + * + * \param block Block options: block->version, block->check, + * and block->filters must have been initialized. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_encode( + lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Block decoder + * + * This is single-call equivalent of lzma_block_decoder(), and requires that + * the caller has already decoded Block Header and checked its memory usage. + * + * \param block Block options just like with lzma_block_decoder(). + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_MEM_ERROR + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_decode( + lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow; Index: contrib/xz/src/liblzma/api/lzma/hardware.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/hardware.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/hardware.h (revision 0) @@ -0,0 +1,51 @@ +/** + * \file lzma/hardware.h + * \brief Hardware information + * + * Since liblzma can consume a lot of system resources, it also provides + * ways to limit the resource usage. Applications linking against liblzma + * need to do the actual decisions how much resources to let liblzma to use. + * To ease making these decisions, liblzma provides functions to find out + * the relevant capabilities of the underlaying hardware. Currently there + * is only a function to find out the amount of RAM, but in the future there + * will be also a function to detect how many concurrent threads the system + * can run. + * + * \note On some operating systems, these function may temporarily + * load a shared library or open file descriptor(s) to find out + * the requested hardware information. Unless the application + * assumes that specific file descriptors are not touched by + * other threads, this should have no effect on thread safety. + * Possible operations involving file descriptors will restart + * the syscalls if they return EINTR. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Get the total amount of physical memory (RAM) in bytes + * + * This function may be useful when determining a reasonable memory + * usage limit for decompressing or how much memory it is OK to use + * for compressing. For example, the default limit used by the xz + * command line tool is 40 % of RAM. + * + * \return On success, the total amount of physical memory in bytes + * is returned. If the amount of RAM cannot be determined, + * zero is returned. This can happen if an error occurs + * or if there is no code in liblzma to detect the amount + * of RAM on the specific operating system. + */ +extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow; Index: contrib/xz/src/liblzma/api/lzma/bcj.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/bcj.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/bcj.h (revision 0) @@ -0,0 +1,90 @@ +/** + * \file lzma/bcj.h + * \brief Branch/Call/Jump conversion filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* Filter IDs for lzma_filter.id */ + +#define LZMA_FILTER_X86 LZMA_VLI_C(0x04) + /**< + * Filter for x86 binaries + */ + +#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05) + /**< + * Filter for Big endian PowerPC binaries + */ + +#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06) + /**< + * Filter for IA64 (Itanium) binaries. + */ + +#define LZMA_FILTER_ARM LZMA_VLI_C(0x07) + /**< + * Filter for ARM binaries. + */ + +#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08) + /**< + * Filter for ARMThumb binaries. + */ + +#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09) + /**< + * Filter for SPARC binaries. + */ + + +/** + * \brief Options for BCJ filters + * + * The BCJ filters never change the size of the data. Specifying options + * for them is optional: if pointer to options is NULL, default value is + * used. You probably never need to specify options to BCJ filters, so just + * set the options pointer to NULL and be happy. + * + * If options with non-default values have been specified when encoding, + * the same options must also be specified when decoding. + * + * \note At the moment, none of the BCJ filters support + * LZMA_SYNC_FLUSH. If LZMA_SYNC_FLUSH is specified, + * LZMA_OPTIONS_ERROR will be returned. If there is need, + * partial support for LZMA_SYNC_FLUSH can be added in future. + * Partial means that flushing would be possible only at + * offsets that are multiple of 2, 4, or 16 depending on + * the filter, except x86 which cannot be made to support + * LZMA_SYNC_FLUSH predictably. + */ +typedef struct { + /** + * \brief Start offset for conversions + * + * This setting is useful only when the same filter is used + * _separately_ for multiple sections of the same executable file, + * and the sections contain cross-section branch/call/jump + * instructions. In that case it is beneficial to set the start + * offset of the non-first sections so that the relative addresses + * of the cross-section branch/call/jump instructions will use the + * same absolute addresses as in the first section. + * + * When the pointer to options is NULL, the default value (zero) + * is used. + */ + uint32_t start_offset; + +} lzma_options_bcj; Index: contrib/xz/src/liblzma/api/lzma/lzma.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/lzma.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/lzma.h (revision 0) @@ -0,0 +1,397 @@ +/** + * \file lzma/lzma.h + * \brief LZMA1 and LZMA2 filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief LZMA1 Filter ID + * + * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, + * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from + * accidentally using LZMA when they actually want LZMA2. + * + * LZMA1 shouldn't be used for new applications unless you _really_ know + * what you are doing. LZMA2 is almost always a better choice. + */ +#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) + +/** + * \brief LZMA2 Filter ID + * + * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds + * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion + * when trying to compress uncompressible data), possibility to change + * lc/lp/pb in the middle of encoding, and some other internal improvements. + */ +#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) + + +/** + * \brief Match finders + * + * Match finder has major effect on both speed and compression ratio. + * Usually hash chains are faster than binary trees. + * + * The memory usage formulas are only rough estimates, which are closest to + * reality when dict_size is a power of two. The formulas are more complex + * in reality, and can also change a little between liblzma versions. Use + * lzma_memusage_encoder() to get more accurate estimate of memory usage. + */ +typedef enum { + LZMA_MF_HC3 = 0x03, + /**< + * \brief Hash Chain with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 7.5 + * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB + */ + + LZMA_MF_HC4 = 0x04, + /**< + * \brief Hash Chain with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: dict_size * 7.5 + */ + + LZMA_MF_BT2 = 0x12, + /**< + * \brief Binary Tree with 2-byte hashing + * + * Minimum nice_len: 2 + * + * Memory usage: dict_size * 9.5 + */ + + LZMA_MF_BT3 = 0x13, + /**< + * \brief Binary Tree with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 11.5 + * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB + */ + + LZMA_MF_BT4 = 0x14 + /**< + * \brief Binary Tree with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: dict_size * 11.5 + */ +} lzma_match_finder; + + +/** + * \brief Test if given match finder is supported + * + * Return true if the given match finder is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * isn't listed in lzma_match_finder enumeration; the return value will be + * false. + * + * There is no way to list which match finders are available in this + * particular liblzma version and build. It would be useless, because + * a new match finder, which the application developer wasn't aware, + * could require giving additional options to the encoder that the older + * match finders don't need. + */ +extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Compression modes + * + * This selects the function used to analyze the data produced by the match + * finder. + */ +typedef enum { + LZMA_MODE_FAST = 1, + /**< + * \brief Fast compression + * + * Fast mode is usually at its best when combined with + * a hash chain match finder. + */ + + LZMA_MODE_NORMAL = 2 + /**< + * \brief Normal compression + * + * This is usually notably slower than fast mode. Use this + * together with binary tree match finders to expose the + * full potential of the LZMA1 or LZMA2 encoder. + */ +} lzma_mode; + + +/** + * \brief Test if given compression mode is supported + * + * Return true if the given compression mode is supported by this liblzma + * build. Otherwise false is returned. It is safe to call this with a value + * that isn't listed in lzma_mode enumeration; the return value will be false. + * + * There is no way to list which modes are available in this particular + * liblzma version and build. It would be useless, because a new compression + * mode, which the application developer wasn't aware, could require giving + * additional options to the encoder that the older modes don't need. + */ +extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Options specific to the LZMA1 and LZMA2 filters + * + * Since LZMA1 and LZMA2 share most of the code, it's simplest to share + * the options structure too. For encoding, all but the reserved variables + * need to be initialized unless specifically mentioned otherwise. + * + * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and + * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb. + */ +typedef struct { + /** + * \brief Dictionary size in bytes + * + * Dictionary size indicates how many bytes of the recently processed + * uncompressed data is kept in memory. One method to reduce size of + * the uncompressed data is to store distance-length pairs, which + * indicate what data to repeat from the dictionary buffer. Thus, + * the bigger the dictionary, the better the compression ratio + * usually is. + * + * Maximum size of the dictionary depends on multiple things: + * - Memory usage limit + * - Available address space (not a problem on 64-bit systems) + * - Selected match finder (encoder only) + * + * Currently the maximum dictionary size for encoding is 1.5 GiB + * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit + * systems for certain match finder implementation reasons. In the + * future, there may be match finders that support bigger + * dictionaries. + * + * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e. + * UINT32_MAX), so increasing the maximum dictionary size of the + * encoder won't cause problems for old decoders. + * + * Because extremely small dictionaries sizes would have unneeded + * overhead in the decoder, the minimum dictionary size is 4096 bytes. + * + * \note When decoding, too big dictionary does no other harm + * than wasting memory. + */ + uint32_t dict_size; +# define LZMA_DICT_SIZE_MIN UINT32_C(4096) +# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) + + /** + * \brief Pointer to an initial dictionary + * + * It is possible to initialize the LZ77 history window using + * a preset dictionary. It is useful when compressing many + * similar, relatively small chunks of data independently from + * each other. The preset dictionary should contain typical + * strings that occur in the files being compressed. The most + * probable strings should be near the end of the preset dictionary. + * + * This feature should be used only in special situations. For + * now, it works correctly only with raw encoding and decoding. + * Currently none of the container formats supported by + * liblzma allow preset dictionary when decoding, thus if + * you create a .xz or .lzma file with preset dictionary, it + * cannot be decoded with the regular decoder functions. In the + * future, the .xz format will likely get support for preset + * dictionary though. + */ + const uint8_t *preset_dict; + + /** + * \brief Size of the preset dictionary + * + * Specifies the size of the preset dictionary. If the size is + * bigger than dict_size, only the last dict_size bytes are + * processed. + * + * This variable is read only when preset_dict is not NULL. + * If preset_dict is not NULL but preset_dict_size is zero, + * no preset dictionary is used (identical to only setting + * preset_dict to NULL). + */ + uint32_t preset_dict_size; + + /** + * \brief Number of literal context bits + * + * How many of the highest bits of the previous uncompressed + * eight-bit byte (also known as `literal') are taken into + * account when predicting the bits of the next literal. + * + * \todo Example + * + * There is a limit that applies to literal context bits and literal + * position bits together: lc + lp <= 4. Without this limit the + * decoding could become very slow, which could have security related + * results in some cases like email servers doing virus scanning. + * This limit also simplifies the internal implementation in liblzma. + * + * There may be LZMA1 streams that have lc + lp > 4 (maximum possible + * lc would be 8). It is not possible to decode such streams with + * liblzma. + */ + uint32_t lc; +# define LZMA_LCLP_MIN 0 +# define LZMA_LCLP_MAX 4 +# define LZMA_LC_DEFAULT 3 + + /** + * \brief Number of literal position bits + * + * How many of the lowest bits of the current position (number + * of bytes from the beginning of the uncompressed data) in the + * uncompressed data is taken into account when predicting the + * bits of the next literal (a single eight-bit byte). + * + * \todo Example + */ + uint32_t lp; +# define LZMA_LP_DEFAULT 0 + + /** + * \brief Number of position bits + * + * How many of the lowest bits of the current position in the + * uncompressed data is taken into account when estimating + * probabilities of matches. A match is a sequence of bytes for + * which a matching sequence is found from the dictionary and + * thus can be stored as distance-length pair. + * + * Example: If most of the matches occur at byte positions of + * 8 * n + 3, that is, 3, 11, 19, ... set pb to 3, because 2**3 == 8. + */ + uint32_t pb; +# define LZMA_PB_MIN 0 +# define LZMA_PB_MAX 4 +# define LZMA_PB_DEFAULT 2 + + /** Compression mode */ + lzma_mode mode; + + /** + * \brief Nice length of a match + * + * This determines how many bytes the encoder compares from the match + * candidates when looking for the best match. Once a match of at + * least nice_len bytes long is found, the encoder stops looking for + * better candidates and encodes the match. (Naturally, if the found + * match is actually longer than nice_len, the actual length is + * encoded; it's not truncated to nice_len.) + * + * Bigger values usually increase the compression ratio and + * compression time. For most files, 32 to 128 is a good value, + * which gives very good compression ratio at good speed. + * + * The exact minimum value depends on the match finder. The maximum + * is 273, which is the maximum length of a match that LZMA1 and + * LZMA2 can encode. + */ + uint32_t nice_len; + + /** Match finder ID */ + lzma_match_finder mf; + + /** + * \brief Maximum search depth in the match finder + * + * For every input byte, match finder searches through the hash chain + * or binary tree in a loop, each iteration going one step deeper in + * the chain or tree. The searching stops if + * - a match of at least nice_len bytes long is found; + * - all match candidates from the hash chain or binary tree have + * been checked; or + * - maximum search depth is reached. + * + * Maximum search depth is needed to prevent the match finder from + * wasting too much time in case there are lots of short match + * candidates. On the other hand, stopping the search before all + * candidates have been checked can reduce compression ratio. + * + * Setting depth to zero tells liblzma to use an automatic default + * value, that depends on the selected match finder and nice_len. + * The default is in the range [10, 200] or so (it may vary between + * liblzma versions). + * + * Using a bigger depth value than the default can increase + * compression ratio in some cases. There is no strict maximum value, + * but high values (thousands or millions) should be used with care: + * the encoder could remain fast enough with typical input, but + * malicious input could cause the match finder to slow down + * dramatically, possibly creating a denial of service attack. + */ + uint32_t depth; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + void *reserved_ptr1; + void *reserved_ptr2; + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + uint32_t reserved_int5; + uint32_t reserved_int6; + uint32_t reserved_int7; + uint32_t reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + +} lzma_options_lzma; + + +/** + * \brief Set a compression preset to lzma_options_lzma structure + * + * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9 + * of the xz command line tool. In addition, it is possible to bitwise-or + * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported. + * The flags are defined in container.h, because the flags are used also + * with lzma_easy_encoder(). + * + * The preset values are subject to changes between liblzma versions. + * + * This function is available only if LZMA1 or LZMA2 encoder has been enabled + * when building liblzma. + */ +extern LZMA_API(lzma_bool) lzma_lzma_preset( + lzma_options_lzma *options, uint32_t preset) lzma_nothrow; Index: contrib/xz/src/liblzma/api/lzma/subblock.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/subblock.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/subblock.h (revision 0) @@ -0,0 +1,200 @@ +/** + * \file lzma/subblock.h + * \brief Subblock filter + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Subblock filter. This is used as lzma_filter.id. + */ +#define LZMA_FILTER_SUBBLOCK LZMA_VLI_C(0x01) + + +/** + * \brief Subfilter mode + * + * See lzma_options_subblock.subfilter_mode for details. + */ +typedef enum { + LZMA_SUBFILTER_NONE, + /**< + * No Subfilter is in use. + */ + + LZMA_SUBFILTER_SET, + /**< + * New Subfilter has been requested to be initialized. + */ + + LZMA_SUBFILTER_RUN, + /**< + * Subfilter is active. + */ + + LZMA_SUBFILTER_FINISH + /**< + * Subfilter has been requested to be finished. + */ +} lzma_subfilter_mode; + + +/** + * \brief Options for the Subblock filter + * + * Specifying options for the Subblock filter is optional: if the pointer + * options is NULL, no subfilters are allowed and the default value is used + * for subblock_data_size. + */ +typedef struct { + /* Options for encoder and decoder */ + + /** + * \brief Allowing subfilters + * + * If this true, subfilters are allowed. + * + * In the encoder, if this is set to false, subfilter_mode and + * subfilter_options are completely ignored. + */ + lzma_bool allow_subfilters; + + /* Options for encoder only */ + + /** + * \brief Alignment + * + * The Subblock filter encapsulates the input data into Subblocks. + * Each Subblock has a header which takes a few bytes of space. + * When the output of the Subblock encoder is fed to another filter + * that takes advantage of the alignment of the input data (e.g. LZMA), + * the Subblock filter can add padding to keep the actual data parts + * in the Subblocks aligned correctly. + * + * The alignment should be a positive integer. Subblock filter will + * add enough padding between Subblocks so that this is true for + * every payload byte: + * input_offset % alignment == output_offset % alignment + * + * The Subblock filter assumes that the first output byte will be + * written to a position in the output stream that is properly + * aligned. This requirement is automatically met when the start + * offset of the Stream or Block is correctly told to Block or + * Stream encoder. + */ + uint32_t alignment; +# define LZMA_SUBBLOCK_ALIGNMENT_MIN 1 +# define LZMA_SUBBLOCK_ALIGNMENT_MAX 32 +# define LZMA_SUBBLOCK_ALIGNMENT_DEFAULT 4 + + /** + * \brief Size of the Subblock Data part of each Subblock + * + * This value is re-read every time a new Subblock is started. + * + * Bigger values + * - save a few bytes of space; + * - increase latency in the encoder (but no effect for decoding); + * - decrease memory locality (increased cache pollution) in the + * encoder (no effect in decoding). + */ + uint32_t subblock_data_size; +# define LZMA_SUBBLOCK_DATA_SIZE_MIN 1 +# define LZMA_SUBBLOCK_DATA_SIZE_MAX (UINT32_C(1) << 28) +# define LZMA_SUBBLOCK_DATA_SIZE_DEFAULT 4096 + + /** + * \brief Run-length encoder remote control + * + * The Subblock filter has an internal run-length encoder (RLE). It + * can be useful when the data includes byte sequences that repeat + * very many times. The RLE can be used also when a Subfilter is + * in use; the RLE will be applied to the output of the Subfilter. + * + * Note that in contrast to traditional RLE, this RLE is intended to + * be used only when there's a lot of data to be repeated. If the + * input data has e.g. 500 bytes of NULs now and then, this RLE + * is probably useless, because plain LZMA should provide better + * results. + * + * Due to above reasons, it was decided to keep the implementation + * of the RLE very simple. When the rle variable is non-zero, it + * subblock_data_size must be a multiple of rle. Once the Subblock + * encoder has got subblock_data_size bytes of input, it will check + * if the whole buffer of the last subblock_data_size can be + * represented with repeats of chunks having size of rle bytes. + * + * If there are consecutive identical buffers of subblock_data_size + * bytes, they will be encoded using a single repeat entry if + * possible. + * + * If need arises, more advanced RLE can be implemented later + * without breaking API or ABI. + */ + uint32_t rle; +# define LZMA_SUBBLOCK_RLE_OFF 0 +# define LZMA_SUBBLOCK_RLE_MIN 1 +# define LZMA_SUBBLOCK_RLE_MAX 256 + + /** + * \brief Subfilter remote control + * + * When the Subblock filter is initialized, this variable must be + * LZMA_SUBFILTER_NONE or LZMA_SUBFILTER_SET. + * + * When subfilter_mode is LZMA_SUBFILTER_NONE, the application may + * put Subfilter options to subfilter_options structure, and then + * set subfilter_mode to LZMA_SUBFILTER_SET. No new input data will + * be read until the Subfilter has been enabled. Once the Subfilter + * has been enabled, liblzma will set subfilter_mode to + * LZMA_SUBFILTER_RUN. + * + * When subfilter_mode is LZMA_SUBFILTER_RUN, the application may + * set subfilter_mode to LZMA_SUBFILTER_FINISH. All the input + * currently available will be encoded before unsetting the + * Subfilter. Application must not change the amount of available + * input until the Subfilter has finished. Once the Subfilter has + * finished, liblzma will set subfilter_mode to LZMA_SUBFILTER_NONE. + * + * If the intent is to have Subfilter enabled to the very end of + * the data, it is not needed to separately disable Subfilter with + * LZMA_SUBFILTER_FINISH. Using LZMA_FINISH as the second argument + * of lzma_code() will make the Subblock encoder to disable the + * Subfilter once all the data has been ran through the Subfilter. + * + * After the first call with LZMA_SYNC_FLUSH or LZMA_FINISH, the + * application must not change subfilter_mode until LZMA_STREAM_END. + * Setting LZMA_SUBFILTER_SET/LZMA_SUBFILTER_FINISH and + * LZMA_SYNC_FLUSH/LZMA_FINISH _at the same time_ is fine. + * + * \note This variable is ignored if allow_subfilters is false. + */ + lzma_subfilter_mode subfilter_mode; + + /** + * \brief Subfilter and its options + * + * When no Subfilter is used, the data is copied as is into Subblocks. + * Setting a Subfilter allows encoding some parts of the data with + * an additional filter. It is possible to many different Subfilters + * in the same Block, although only one can be used at once. + * + * \note This variable is ignored if allow_subfilters is false. + */ + lzma_filter subfilter_options; + +} lzma_options_subblock; Index: contrib/xz/src/liblzma/api/lzma/stream_flags.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/stream_flags.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/stream_flags.h (revision 0) @@ -0,0 +1,227 @@ +/** + * \file lzma/stream_flags.h + * \brief .xz Stream Header and Stream Footer encoder and decoder + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Size of Stream Header and Stream Footer + * + * Stream Header and Stream Footer have the same size and they are not + * going to change even if a newer version of the .xz file format is + * developed in future. + */ +#define LZMA_STREAM_HEADER_SIZE 12 + + +/** + * \brief Options for encoding/decoding Stream Header and Stream Footer + */ +typedef struct { + /** + * \brief Stream Flags format version + * + * To prevent API and ABI breakages if new features are needed in + * Stream Header or Stream Footer, a version number is used to + * indicate which fields in this structure are in use. For now, + * version must always be zero. With non-zero version, the + * lzma_stream_header_encode() and lzma_stream_footer_encode() + * will return LZMA_OPTIONS_ERROR. + * + * lzma_stream_header_decode() and lzma_stream_footer_decode() + * will always set this to the lowest value that supports all the + * features indicated by the Stream Flags field. The application + * must check that the version number set by the decoding functions + * is supported by the application. Otherwise it is possible that + * the application will decode the Stream incorrectly. + */ + uint32_t version; + + /** + * \brief Backward Size + * + * Backward Size must be a multiple of four bytes. In this Stream + * format version, Backward Size is the size of the Index field. + * + * Backward Size isn't actually part of the Stream Flags field, but + * it is convenient to include in this structure anyway. Backward + * Size is present only in the Stream Footer. There is no need to + * initialize backward_size when encoding Stream Header. + * + * lzma_stream_header_decode() always sets backward_size to + * LZMA_VLI_UNKNOWN so that it is convenient to use + * lzma_stream_flags_compare() when both Stream Header and Stream + * Footer have been decoded. + */ + lzma_vli backward_size; +# define LZMA_BACKWARD_SIZE_MIN 4 +# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34) + + /** + * \brief Check ID + * + * This indicates the type of the integrity check calculated from + * uncompressed data. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the + * names of these variables may change. + * + * (We will never be able to use all of these since Stream Flags + * is just two bytes plus Backward Size of four bytes. But it's + * nice to have the proper types when they are needed.) + */ + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + lzma_reserved_enum reserved_enum5; + lzma_reserved_enum reserved_enum6; + lzma_bool reserved_bool1; + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + +} lzma_stream_flags; + + +/** + * \brief Encode Stream Header + * + * \param options Stream Header options to be encoded. + * options->backward_size is ignored and doesn't + * need to be initialized. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Stream Footer + * + * \param options Stream Footer options to be encoded. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Header + * + * \param options Stream Header options to be encoded. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * options->backward_size is always set to LZMA_VLI_UNKNOWN. This is to + * help comparing Stream Flags from Stream Header and Stream Footer with + * lzma_stream_flags_compare(). + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Header. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in the header. + * + * \note When decoding .xz files that contain multiple Streams, it may + * make sense to print "file format not recognized" only if + * decoding of the Stream Header of the _first_ Stream gives + * LZMA_FORMAT_ERROR. If non-first Stream Header gives + * LZMA_FORMAT_ERROR, the message used for LZMA_DATA_ERROR is + * probably more appropriate. + * + * For example, Stream decoder in liblzma uses LZMA_DATA_ERROR if + * LZMA_FORMAT_ERROR is returned by lzma_stream_header_decode() + * when decoding non-first Stream. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Footer + * + * \param options Stream Header options to be encoded. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Footer. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the Stream Footer + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in Stream Footer. + * + * \note If Stream Header was already decoded successfully, but + * decoding Stream Footer returns LZMA_FORMAT_ERROR, the + * application should probably report some other error message + * than "file format not recognized", since the file more likely + * is corrupt (possibly truncated). Stream decoder in liblzma + * uses LZMA_DATA_ERROR in this situation. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Compare two lzma_stream_flags structures + * + * backward_size values are compared only if both are not + * LZMA_VLI_UNKNOWN. + * + * \return - LZMA_OK: Both are equal. If either had backward_size set + * to LZMA_VLI_UNKNOWN, backward_size values were not + * compared or validated. + * - LZMA_DATA_ERROR: The structures differ. + * - LZMA_OPTIONS_ERROR: version in either structure is greater + * than the maximum supported version (currently zero). + * - LZMA_PROG_ERROR: Invalid value, e.g. invalid check or + * backward_size. + */ +extern LZMA_API(lzma_ret) lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) + lzma_nothrow lzma_attr_pure; Index: contrib/xz/src/liblzma/api/lzma/index.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/index.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/index.h (revision 0) @@ -0,0 +1,677 @@ +/** + * \file lzma/index.h + * \brief Handling of .xz Index and related information + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Opaque data type to hold the Index(es) and other information + * + * lzma_index often holds just one .xz Index and possibly the Stream Flags + * of the same Stream and size of the Stream Padding field. However, + * multiple lzma_indexes can be concatenated with lzma_index_cat() and then + * there may be information about multiple Streams in the same lzma_index. + * + * Notes about thread safety: Only one thread may modify lzma_index at + * a time. All functions that take non-const pointer to lzma_index + * modify it. As long as no thread is modifying the lzma_index, getting + * information from the same lzma_index can be done from multiple threads + * at the same time with functions that take a const pointer to + * lzma_index or use lzma_index_iter. The same iterator must be used + * only by one thread at a time, of course, but there can be as many + * iterators for the same lzma_index as needed. + */ +typedef struct lzma_index_s lzma_index; + + +/** + * \brief Iterator to get information about Blocks and Streams + */ +typedef struct { + struct { + /** + * \brief Pointer to Stream Flags + * + * This is NULL if Stream Flags have not been set for + * this Stream with lzma_index_stream_flags(). + */ + const lzma_stream_flags *flags; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + + /** + * \brief Stream number in the lzma_index + * + * The first Stream is 1. + */ + lzma_vli number; + + /** + * \brief Number of Blocks in the Stream + * + * If this is zero, the block structure below has + * undefined values. + */ + lzma_vli block_count; + + /** + * \brief Compressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli compressed_offset; + + /** + * \brief Uncompressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli uncompressed_offset; + + /** + * \brief Compressed size of this Stream + * + * This includes all headers except the possible + * Stream Padding after this Stream. + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed size of this Stream + */ + lzma_vli uncompressed_size; + + /** + * \brief Size of Stream Padding after this Stream + * + * If it hasn't been set with lzma_index_stream_padding(), + * this defaults to zero. Stream Padding is always + * a multiple of four bytes. + */ + lzma_vli padding; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + } stream; + + struct { + /** + * \brief Block number in the file + * + * The first Block is 1. + */ + lzma_vli number_in_file; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the + * lzma_index (i.e. usually the beginning of the .xz file). + * Normally this is where you should seek in the .xz file + * to start decompressing this Block. + */ + lzma_vli compressed_file_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli uncompressed_file_offset; + + /** + * \brief Block number in this Stream + * + * The first Block is 1. + */ + lzma_vli number_in_stream; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli compressed_stream_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli uncompressed_stream_offset; + + /** + * \brief Uncompressed size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. + * + * When doing random-access reading, it is possible that + * the target offset is not exactly at Block boundary. One + * will need to compare the target offset against + * uncompressed_file_offset or uncompressed_stream_offset, + * and possibly decode and throw away some amount of data + * before reaching the target offset. + */ + lzma_vli uncompressed_size; + + /** + * \brief Unpadded size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. + */ + lzma_vli unpadded_size; + + /** + * \brief Total compressed size + * + * This includes all headers and padding in this Block. + * This is useful if you need to know how many bytes + * the Block decoder will actually read. + */ + lzma_vli total_size; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + const void *reserved_ptr4; + } block; + + /* + * Internal data which is used to store the state of the iterator. + * The exact format may vary between liblzma versions, so don't + * touch these in any way. + */ + union { + const void *p; + size_t s; + lzma_vli v; + } internal[6]; +} lzma_index_iter; + + +/** + * \brief Operation mode for lzma_index_iter_next() + */ +typedef enum { + LZMA_INDEX_ITER_ANY = 0, + /**< + * \brief Get the next Block or Stream + * + * Go to the next Block if the current Stream has at least + * one Block left. Otherwise go to the next Stream even if + * it has no Blocks. If the Stream has no Blocks + * (lzma_index_iter.stream.block_count == 0), + * lzma_index_iter.block will have undefined values. + */ + + LZMA_INDEX_ITER_STREAM = 1, + /**< + * \brief Get the next Stream + * + * Go to the next Stream even if the current Stream has + * unread Blocks left. If the next Stream has at least one + * Block, the iterator will point to the first Block. + * If there are no Blocks, lzma_index_iter.block will have + * undefined values. + */ + + LZMA_INDEX_ITER_BLOCK = 2, + /**< + * \brief Get the next Block + * + * Go to the next Block if the current Stream has at least + * one Block left. If the current Stream has no Blocks left, + * the next Stream with at least one Block is located and + * the iterator will be made to point to the first Block of + * that Stream. + */ + + LZMA_INDEX_ITER_NONEMPTY_BLOCK = 3 + /**< + * \brief Get the next non-empty Block + * + * This is like LZMA_INDEX_ITER_BLOCK except that it will + * skip Blocks whose Uncompressed Size is zero. + */ + +} lzma_index_iter_mode; + + +/** + * \brief Calculate memory usage of lzma_index + * + * On disk, the size of the Index field depends on both the number of Records + * stored and how big values the Records store (due to variable-length integer + * encoding). When the Index is kept in lzma_index structure, the memory usage + * depends only on the number of Records/Blocks stored in the Index(es), and + * in case of concatenated lzma_indexes, the number of Streams. The size in + * RAM is almost always significantly bigger than in the encoded form on disk. + * + * This function calculates an approximate amount of memory needed hold + * the given number of Streams and Blocks in lzma_index structure. This + * value may vary between CPU architectures and also between liblzma versions + * if the internal implementation is modified. + */ +extern LZMA_API(uint64_t) lzma_index_memusage( + lzma_vli streams, lzma_vli blocks) lzma_nothrow; + + +/** + * \brief Calculate the memory usage of an existing lzma_index + * + * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i), + * lzma_index_block_count(i)). + */ +extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) + lzma_nothrow; + + +/** + * \brief Allocate and initialize a new lzma_index structure + * + * \return On success, a pointer to an empty initialized lzma_index is + * returned. If allocation fails, NULL is returned. + */ +extern LZMA_API(lzma_index *) lzma_index_init(lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Deallocate lzma_index + * + * If i is NULL, this does nothing. + */ +extern LZMA_API(void) lzma_index_end(lzma_index *i, lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Block to lzma_index + * + * \param i Pointer to a lzma_index structure + * \param allocator Pointer to lzma_allocator, or NULL to + * use malloc() + * \param unpadded_size Unpadded Size of a Block. This can be + * calculated with lzma_block_unpadded_size() + * after encoding or decoding the Block. + * \param uncompressed_size Uncompressed Size of a Block. This can be + * taken directly from lzma_block structure + * after encoding or decoding the Block. + * + * Appending a new Block does not invalidate iterators. For example, + * if an iterator was pointing to the end of the lzma_index, after + * lzma_index_append() it is possible to read the next Block with + * an existing iterator. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_append( + lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Set the Stream Flags + * + * Set the Stream Flags of the last (and typically the only) Stream + * in lzma_index. This can be useful when reading information from the + * lzma_index, because to decode Blocks, knowing the integrity check type + * is needed. + * + * The given Stream Flags are copied into internal preallocated structure + * in the lzma_index, thus the caller doesn't need to keep the *stream_flags + * available after calling this function. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_flags( + lzma_index *i, const lzma_stream_flags *stream_flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the types of integrity Checks + * + * If lzma_index_stream_padding() is used to set the Stream Flags for + * every Stream, lzma_index_checks() can be used to get a bitmask to + * indicate which Check types have been used. It can be useful e.g. if + * showing the Check types to the user. + * + * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. + */ +extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the amount of Stream Padding + * + * Set the amount of Stream Padding of the last (and typically the only) + * Stream in the lzma_index. This is needed when planning to do random-access + * reading within multiple concatenated Streams. + * + * By default, the amount of Stream Padding is assumed to be zero bytes. + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: The file size would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_padding( + lzma_index *i, lzma_vli stream_padding) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the number of Streams + */ +extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the number of Blocks + * + * This returns the total number of Blocks in lzma_index. To get number + * of Blocks in individual Streams, use lzma_index_iter. + */ +extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Stream + * + * If multiple lzma_indexes have been combined, this works as if the Blocks + * were in a single Stream. This is useful if you are going to combine + * Blocks from multiple Streams into a single new Stream. + */ +extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Blocks + * + * This doesn't include the Stream Header, Stream Footer, Stream Padding, + * or Index fields. + */ +extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the file + * + * When no lzma_indexes have been combined with lzma_index_cat() and there is + * no Stream Padding, this function is identical to lzma_index_stream_size(). + * If multiple lzma_indexes have been combined, this includes also the headers + * of each separate Stream and the possible Stream Padding fields. + */ +extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the uncompressed size of the file + */ +extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize an iterator + * + * \param iter Pointer to a lzma_index_iter structure + * \param i lzma_index to which the iterator will be associated + * + * This function associates the iterator with the given lzma_index, and calls + * lzma_index_iter_rewind() on the iterator. + * + * This function doesn't allocate any memory, thus there is no + * lzma_index_iter_end(). The iterator is valid as long as the + * associated lzma_index is valid, that is, until lzma_index_end() or + * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't + * become invalid if new Blocks are added to it with lzma_index_append() or + * if it is used as the destination in lzma_index_cat(). + * + * It is safe to make copies of an initialized lzma_index_iter, for example, + * to easily restart reading at some particular position. + */ +extern LZMA_API(void) lzma_index_iter_init( + lzma_index_iter *iter, const lzma_index *i) lzma_nothrow; + + +/** + * \brief Rewind the iterator + * + * Rewind the iterator so that next call to lzma_index_iter_next() will + * return the first Block or Stream. + */ +extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) + lzma_nothrow; + + +/** + * \brief Get the next Block or Stream + * + * \param iter Iterator initialized with lzma_index_iter_init() + * \param mode Specify what kind of information the caller wants + * to get. See lzma_index_iter_mode for details. + * + * \return If next Block or Stream matching the mode was found, *iter + * is updated and this function returns false. If no Block or + * Stream matching the mode is found, *iter is not modified + * and this function returns true. If mode is set to an unknown + * value, *iter is not modified and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_next( + lzma_index_iter *iter, lzma_index_iter_mode mode) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Locate a Block + * + * If it is possible to seek in the .xz file, it is possible to parse + * the Index field(s) and use lzma_index_iter_locate() to do random-access + * reading with granularity of Block size. + * + * \param iter Iterator that was earlier initialized with + * lzma_index_iter_init(). + * \param target Uncompressed target offset which the caller would + * like to locate from the Stream + * + * If the target is smaller than the uncompressed size of the Stream (can be + * checked with lzma_index_uncompressed_size()): + * - Information about the Stream and Block containing the requested + * uncompressed offset is stored into *iter. + * - Internal state of the iterator is adjusted so that + * lzma_index_iter_next() can be used to read subsequent Blocks or Streams. + * - This function returns false. + * + * If target is greater than the uncompressed size of the Stream, *iter + * is not modified, and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_locate( + lzma_index_iter *iter, lzma_vli target) lzma_nothrow; + + +/** + * \brief Concatenate lzma_indexes + * + * Concatenating lzma_indexes is useful when doing random-access reading in + * multi-Stream .xz file, or when combining multiple Streams into single + * Stream. + * + * \param dest lzma_index after which src is appended + * \param src lzma_index to be appended after dest. If this + * function succeeds, the memory allocated for src + * is freed or moved to be part of dest, and all + * iterators pointing to src will become invalid. + * \param allocator Custom memory allocator; can be NULL to use + * malloc() and free(). + * + * \return - LZMA_OK: lzma_indexes were concatenated successfully. + * src is now a dangling pointer. + * - LZMA_DATA_ERROR: *dest would grow too big. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *lzma_restrict dest, + lzma_index *lzma_restrict src, + lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Duplicate lzma_index + * + * \return A copy of the lzma_index, or NULL if memory allocation failed. + */ +extern LZMA_API(lzma_index *) lzma_index_dup( + const lzma_index *i, lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index encoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i Pointer to lzma_index which should be encoded. + * + * The only valid action value for lzma_code() is LZMA_RUN. + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_encoder( + lzma_stream *strm, const lzma_index *i) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i The decoded Index will be made available via + * this pointer. Initially this function will + * set *i to NULL (the old value is ignored). If + * decoding succeeds (lzma_code() returns + * LZMA_STREAM_END), *i will be set to point + * to a new lzma_index, which the application + * has to later free with lzma_index_end(). + * \param memlimit How much memory the resulting lzma_index is + * allowed to require. + * + * The only valid action value for lzma_code() is LZMA_RUN. + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_decoder( + lzma_stream *strm, lzma_index **i, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Index encoder + * + * \param i lzma_index to be encoded + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Output buffer is too small. Use + * lzma_index_size() to find out how much output + * space is needed. + * - LZMA_PROG_ERROR + * + * \note This function doesn't take allocator argument since all + * the internal data is allocated on stack. + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call .xz Index decoder + * + * \param i If decoding succeeds, *i will point to a new + * lzma_index, which the application has to + * later free with lzma_index_end(). If an error + * occurs, *i will be NULL. The old value of *i + * is always ignored and thus doesn't need to be + * initialized by the caller. + * \param memlimit Pointer to how much memory the resulting + * lzma_index is allowed to require. The value + * pointed by this pointer is modified if and only + * if LZMA_MEMLIMIT_ERROR is returned. + * \param allocator Pointer to lzma_allocator, or NULL to use malloc() + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, + uint64_t *memlimit, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; Index: contrib/xz/src/liblzma/api/lzma/index_hash.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/index_hash.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/index_hash.h (revision 0) @@ -0,0 +1,107 @@ +/** + * \file lzma/index_hash.h + * \brief Validates Index by using a hash function + * + * Hashing makes it possible to use constant amount of memory to validate + * Index of arbitrary size. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + +/** + * \brief Opaque data type to hold the Index hash + */ +typedef struct lzma_index_hash_s lzma_index_hash; + + +/** + * \brief Allocate and initialize a new lzma_index_hash structure + * + * If index_hash is NULL, a new lzma_index_hash structure is allocated, + * initialized, and a pointer to it returned. If allocation fails, NULL + * is returned. + * + * If index_hash is non-NULL, it is reinitialized and the same pointer + * returned. In this case, return value cannot be NULL or a different + * pointer than the index_hash that was given as an argument. + */ +extern LZMA_API(lzma_index_hash *) lzma_index_hash_init( + lzma_index_hash *index_hash, lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Deallocate lzma_index_hash structure + */ +extern LZMA_API(void) lzma_index_hash_end( + lzma_index_hash *index_hash, lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Record to an Index hash + * + * \param index Pointer to a lzma_index_hash structure + * \param unpadded_size Unpadded Size of a Block + * \param uncompressed_size Uncompressed Size of a Block + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR: Invalid arguments or this function is being + * used when lzma_index_hash_decode() has already been used. + */ +extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode and validate the Index field + * + * After telling the sizes of all Blocks with lzma_index_hash_append(), + * the actual Index field is decoded with this function. Specifically, + * once decoding of the Index field has been started, no more Records + * can be added using lzma_index_hash_append(). + * + * This function doesn't use lzma_stream structure to pass the input data. + * Instead, the input buffer is specified using three arguments. This is + * because it matches better the internal APIs of liblzma. + * + * \param index_hash Pointer to a lzma_index_hash structure + * \param in Pointer to the beginning of the input buffer + * \param in_pos in[*in_pos] is the next byte to process + * \param in_size in[in_size] is the first byte not to process + * + * \return - LZMA_OK: So far good, but more input is needed. + * - LZMA_STREAM_END: Index decoded successfully and it matches + * the Records given with lzma_index_hash_append(). + * - LZMA_DATA_ERROR: Index is corrupt or doesn't match the + * information given with lzma_index_hash_append(). + * - LZMA_BUF_ERROR: Cannot progress because *in_pos >= in_size. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_hash_size( + const lzma_index_hash *index_hash) + lzma_nothrow lzma_attr_pure; Index: contrib/xz/src/liblzma/api/lzma/base.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/base.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/base.h (revision 0) @@ -0,0 +1,596 @@ +/** + * \file lzma/base.h + * \brief Data types and functions used in many places in liblzma API + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Boolean + * + * This is here because C89 doesn't have stdbool.h. To set a value for + * variables having type lzma_bool, you can use + * - C99's `true' and `false' from stdbool.h; + * - C++'s internal `true' and `false'; or + * - integers one (true) and zero (false). + */ +typedef unsigned char lzma_bool; + + +/** + * \brief Type of reserved enumeration variable in structures + * + * To avoid breaking library ABI when new features are added, several + * structures contain extra variables that may be used in future. Since + * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may + * even vary depending on the range of enumeration constants, we specify + * a separate type to be used for reserved enumeration variables. All + * enumeration constants in liblzma API will be non-negative and less + * than 128, which should guarantee that the ABI won't break even when + * new constants are added to existing enumerations. + */ +typedef enum { + LZMA_RESERVED_ENUM = 0 +} lzma_reserved_enum; + + +/** + * \brief Return values used by several functions in liblzma + * + * Check the descriptions of specific functions to find out which return + * values they can return. With some functions the return values may have + * more specific meanings than described here; those differences are + * described per-function basis. + */ +typedef enum { + LZMA_OK = 0, + /**< + * \brief Operation completed successfully + */ + + LZMA_STREAM_END = 1, + /**< + * \brief End of stream was reached + * + * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or + * LZMA_FINISH was finished. In decoder, this indicates + * that all the data was successfully decoded. + * + * In all cases, when LZMA_STREAM_END is returned, the last + * output bytes should be picked from strm->next_out. + */ + + LZMA_NO_CHECK = 2, + /**< + * \brief Input stream has no integrity check + * + * This return value can be returned only if the + * LZMA_TELL_NO_CHECK flag was used when initializing + * the decoder. LZMA_NO_CHECK is just a warning, and + * the decoding can be continued normally. + * + * It is possible to call lzma_get_check() immediately after + * lzma_code has returned LZMA_NO_CHECK. The result will + * naturally be LZMA_CHECK_NONE, but the possibility to call + * lzma_get_check() may be convenient in some applications. + */ + + LZMA_UNSUPPORTED_CHECK = 3, + /**< + * \brief Cannot calculate the integrity check + * + * The usage of this return value is different in encoders + * and decoders. + * + * Encoders can return this value only from the initialization + * function. If initialization fails with this value, the + * encoding cannot be done, because there's no way to produce + * output with the correct integrity check. + * + * Decoders can return this value only from lzma_code() and + * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when + * initializing the decoder. The decoding can still be + * continued normally even if the check type is unsupported, + * but naturally the check will not be validated, and possible + * errors may go undetected. + * + * With decoder, it is possible to call lzma_get_check() + * immediately after lzma_code() has returned + * LZMA_UNSUPPORTED_CHECK. This way it is possible to find + * out what the unsupported Check ID was. + */ + + LZMA_GET_CHECK = 4, + /**< + * \brief Integrity check type is now available + * + * This value can be returned only by the lzma_code() function + * and only if the decoder was initialized with the + * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the + * application that it may now call lzma_get_check() to find + * out the Check ID. This can be used, for example, to + * implement a decoder that accepts only files that have + * strong enough integrity check. + */ + + LZMA_MEM_ERROR = 5, + /**< + * \brief Cannot allocate memory + * + * Memory allocation failed, or the size of the allocation + * would be greater than SIZE_MAX. + * + * Due to internal implementation reasons, the coding cannot + * be continued even if more memory were made available after + * LZMA_MEM_ERROR. + */ + + LZMA_MEMLIMIT_ERROR = 6, + /** + * \brief Memory usage limit was reached + * + * Decoder would need more memory than allowed by the + * specified memory usage limit. To continue decoding, + * the memory usage limit has to be increased with + * lzma_memlimit_set(). + */ + + LZMA_FORMAT_ERROR = 7, + /**< + * \brief File format not recognized + * + * The decoder did not recognize the input as supported file + * format. This error can occur, for example, when trying to + * decode .lzma format file with lzma_stream_decoder, + * because lzma_stream_decoder accepts only the .xz format. + */ + + LZMA_OPTIONS_ERROR = 8, + /**< + * \brief Invalid or unsupported options + * + * Invalid or unsupported options, for example + * - unsupported filter(s) or filter options; or + * - reserved bits set in headers (decoder only). + * + * Rebuilding liblzma with more features enabled, or + * upgrading to a newer version of liblzma may help. + */ + + LZMA_DATA_ERROR = 9, + /**< + * \brief Data is corrupt + * + * The usage of this return value is different in encoders + * and decoders. In both encoder and decoder, the coding + * cannot continue after this error. + * + * Encoders return this if size limits of the target file + * format would be exceeded. These limits are huge, thus + * getting this error from an encoder is mostly theoretical. + * For example, the maximum compressed and uncompressed + * size of a .xz Stream is roughly 8 EiB (2^63 bytes). + * + * Decoders return this error if the input data is corrupt. + * This can mean, for example, invalid CRC32 in headers + * or invalid check of uncompressed data. + */ + + LZMA_BUF_ERROR = 10, + /**< + * \brief No progress is possible + * + * This error code is returned when the coder cannot consume + * any new input and produce any new output. The most common + * reason for this error is that the input stream being + * decoded is truncated or corrupt. + * + * This error is not fatal. Coding can be continued normally + * by providing more input and/or more output space, if + * possible. + * + * Typically the first call to lzma_code() that can do no + * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only + * the second consecutive call doing no progress will return + * LZMA_BUF_ERROR. This is intentional. + * + * With zlib, Z_BUF_ERROR may be returned even if the + * application is doing nothing wrong, so apps will need + * to handle Z_BUF_ERROR specially. The above hack + * guarantees that liblzma never returns LZMA_BUF_ERROR + * to properly written applications unless the input file + * is truncated or corrupt. This should simplify the + * applications a little. + */ + + LZMA_PROG_ERROR = 11, + /**< + * \brief Programming error + * + * This indicates that the arguments given to the function are + * invalid or the internal state of the decoder is corrupt. + * - Function arguments are invalid or the structures + * pointed by the argument pointers are invalid + * e.g. if strm->next_out has been set to NULL and + * strm->avail_out > 0 when calling lzma_code(). + * - lzma_* functions have been called in wrong order + * e.g. lzma_code() was called right after lzma_end(). + * - If errors occur randomly, the reason might be flaky + * hardware. + * + * If you think that your code is correct, this error code + * can be a sign of a bug in liblzma. See the documentation + * how to report bugs. + */ +} lzma_ret; + + +/** + * \brief The `action' argument for lzma_code() + * + * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH, + * the same `action' must is used until lzma_code() returns LZMA_STREAM_END. + * Also, the amount of input (that is, strm->avail_in) must not be modified + * by the application until lzma_code() returns LZMA_STREAM_END. Changing the + * `action' or modifying the amount of input will make lzma_code() return + * LZMA_PROG_ERROR. + */ +typedef enum { + LZMA_RUN = 0, + /**< + * \brief Continue coding + * + * Encoder: Encode as much input as possible. Some internal + * buffering will probably be done (depends on the filter + * chain in use), which causes latency: the input used won't + * usually be decodeable from the output of the same + * lzma_code() call. + * + * Decoder: Decode as much input as possible and produce as + * much output as possible. + */ + + LZMA_SYNC_FLUSH = 1, + /**< + * \brief Make all the input available at output + * + * Normally the encoder introduces some latency. + * LZMA_SYNC_FLUSH forces all the buffered data to be + * available at output without resetting the internal + * state of the encoder. This way it is possible to use + * compressed stream for example for communication over + * network. + * + * Only some filters support LZMA_SYNC_FLUSH. Trying to use + * LZMA_SYNC_FLUSH with filters that don't support it will + * make lzma_code() return LZMA_OPTIONS_ERROR. For example, + * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does. + * + * Using LZMA_SYNC_FLUSH very often can dramatically reduce + * the compression ratio. With some filters (for example, + * LZMA2), fine-tuning the compression options may help + * mitigate this problem significantly. + * + * Decoders don't support LZMA_SYNC_FLUSH. + */ + + LZMA_FULL_FLUSH = 2, + /**< + * \brief Make all the input available at output + * + * Finish encoding of the current Block. All the input + * data going to the current Block must have been given + * to the encoder (the last bytes can still be pending in + * next_in). Call lzma_code() with LZMA_FULL_FLUSH until + * it returns LZMA_STREAM_END. Then continue normally with + * LZMA_RUN or finish the Stream with LZMA_FINISH. + * + * This action is currently supported only by Stream encoder + * and easy encoder (which uses Stream encoder). If there is + * no unfinished Block, no empty Block is created. + */ + + LZMA_FINISH = 3 + /**< + * \brief Finish the coding operation + * + * Finishes the coding operation. All the input data must + * have been given to the encoder (the last bytes can still + * be pending in next_in). Call lzma_code() with LZMA_FINISH + * until it returns LZMA_STREAM_END. Once LZMA_FINISH has + * been used, the amount of input must no longer be changed + * by the application. + * + * When decoding, using LZMA_FINISH is optional unless the + * LZMA_CONCATENATED flag was used when the decoder was + * initialized. When LZMA_CONCATENATED was not used, the only + * effect of LZMA_FINISH is that the amount of input must not + * be changed just like in the encoder. + */ +} lzma_action; + + +/** + * \brief Custom functions for memory handling + * + * A pointer to lzma_allocator may be passed via lzma_stream structure + * to liblzma, and some advanced functions take a pointer to lzma_allocator + * as a separate function argument. The library will use the functions + * specified in lzma_allocator for memory handling instead of the default + * malloc() and free(). C++ users should note that the custom memory + * handling functions must not throw exceptions. + * + * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is + * OK to change these function pointers in the middle of the coding + * process, but obviously it must be done carefully to make sure that the + * replacement `free' can deallocate memory allocated by the earlier + * `alloc' function(s). + */ +typedef struct { + /** + * \brief Pointer to a custom memory allocation function + * + * If you don't want a custom allocator, but still want + * custom free(), set this to NULL and liblzma will use + * the standard malloc(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param nmemb Number of elements like in calloc(). liblzma + * will always set nmemb to 1, so it is safe to + * ignore nmemb in a custom allocator if you like. + * The nmemb argument exists only for + * compatibility with zlib and libbzip2. + * \param size Size of an element in bytes. + * liblzma never sets this to zero. + * + * \return Pointer to the beginning of a memory block of + * `size' bytes, or NULL if allocation fails + * for some reason. When allocation fails, functions + * of liblzma return LZMA_MEM_ERROR. + * + * The allocator should not waste time zeroing the allocated buffers. + * This is not only about speed, but also memory usage, since the + * operating system kernel doesn't necessarily allocate the requested + * memory in physical memory until it is actually used. With small + * input files, liblzma may actually need only a fraction of the + * memory that it requested for allocation. + * + * \note LZMA_MEM_ERROR is also used when the size of the + * allocation would be greater than SIZE_MAX. Thus, + * don't assume that the custom allocator must have + * returned NULL if some function from liblzma + * returns LZMA_MEM_ERROR. + */ + void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size); + + /** + * \brief Pointer to a custom memory freeing function + * + * If you don't want a custom freeing function, but still + * want a custom allocator, set this to NULL and liblzma + * will use the standard free(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param ptr Pointer returned by lzma_allocator.alloc(), + * or when it is set to NULL, a pointer returned + * by the standard malloc(). + */ + void (LZMA_API_CALL *free)(void *opaque, void *ptr); + + /** + * \brief Pointer passed to .alloc() and .free() + * + * opaque is passed as the first argument to lzma_allocator.alloc() + * and lzma_allocator.free(). This intended to ease implementing + * custom memory allocation functions for use with liblzma. + * + * If you don't need this, you should set this to NULL. + */ + void *opaque; + +} lzma_allocator; + + +/** + * \brief Internal data structure + * + * The contents of this structure is not visible outside the library. + */ +typedef struct lzma_internal_s lzma_internal; + + +/** + * \brief Passing data to and from liblzma + * + * The lzma_stream structure is used for + * - passing pointers to input and output buffers to liblzma; + * - defining custom memory hander functions; and + * - holding a pointer to coder-specific internal data structures. + * + * Typical usage: + * + * - After allocating lzma_stream (on stack or with malloc()), it must be + * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details). + * + * - Initialize a coder to the lzma_stream, for example by using + * lzma_easy_encoder() or lzma_auto_decoder(). Some notes: + * - In contrast to zlib, strm->next_in and strm->next_out are + * ignored by all initialization functions, thus it is safe + * to not initialize them yet. + * - The initialization functions always set strm->total_in and + * strm->total_out to zero. + * - If the initialization function fails, no memory is left allocated + * that would require freeing with lzma_end() even if some memory was + * associated with the lzma_stream structure when the initialization + * function was called. + * + * - Use lzma_code() to do the actual work. + * + * - Once the coding has been finished, the existing lzma_stream can be + * reused. It is OK to reuse lzma_stream with different initialization + * function without calling lzma_end() first. Old allocations are + * automatically freed. + * + * - Finally, use lzma_end() to free the allocated memory. lzma_end() never + * frees the lzma_stream structure itself. + * + * Application may modify the values of total_in and total_out as it wants. + * They are updated by liblzma to match the amount of data read and + * written, but aren't used for anything else. + */ +typedef struct { + const uint8_t *next_in; /**< Pointer to the next input byte. */ + size_t avail_in; /**< Number of available input bytes in next_in. */ + uint64_t total_in; /**< Total number of bytes read by liblzma. */ + + uint8_t *next_out; /**< Pointer to the next output position. */ + size_t avail_out; /**< Amount of free space in next_out. */ + uint64_t total_out; /**< Total number of bytes written by liblzma. */ + + /** + * \brief Custom memory allocation functions + * + * In most cases this is NULL which makes liblzma use + * the standard malloc() and free(). + */ + lzma_allocator *allocator; + + /** Internal state is not visible to applications. */ + lzma_internal *internal; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. Excluding the initialization of this structure, + * you should not touch these, because the names of these variables + * may change. + */ + void *reserved_ptr1; + void *reserved_ptr2; + uint64_t reserved_int1; + uint64_t reserved_int2; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + +} lzma_stream; + + +/** + * \brief Initialization for lzma_stream + * + * When you declare an instance of lzma_stream, you can immediately + * initialize it so that initialization functions know that no memory + * has been allocated yet: + * + * lzma_stream strm = LZMA_STREAM_INIT; + * + * If you need to initialize a dynamically allocated lzma_stream, you can use + * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this + * violates the C standard since NULL may have different internal + * representation than zero, but it should be portable enough in practice. + * Anyway, for maximum portability, you can use something like this: + * + * lzma_stream tmp = LZMA_STREAM_INIT; + * *strm = tmp; + */ +#define LZMA_STREAM_INIT \ + { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ + NULL, NULL, 0, 0, LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } + + +/** + * \brief Encode or decode data + * + * Once the lzma_stream has been successfully initialized (e.g. with + * lzma_stream_encoder()), the actual encoding or decoding is done + * using this function. The application has to update strm->next_in, + * strm->avail_in, strm->next_out, and strm->avail_out to pass input + * to and get output from liblzma. + * + * See the description of the coder-specific initialization function to find + * out what `action' values are supported by the coder. + */ +extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Free memory allocated for the coder data structures + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * + * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other + * members of the lzma_stream structure are touched. + * + * \note zlib indicates an error if application end()s unfinished + * stream structure. liblzma doesn't do this, and assumes that + * application knows what it is doing. + */ +extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow; + + +/** + * \brief Get the memory usage of decoder filter chain + * + * This function is currently supported only when *strm has been initialized + * with a function that takes a memlimit argument. With other functions, you + * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage() + * to estimate the memory requirements. + * + * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big + * the memory usage limit should have been to decode the input. Note that + * this may give misleading information if decoding .xz Streams that have + * multiple Blocks, because each Block can have different memory requirements. + * + * \return Rough estimate of how much memory is currently allocated + * for the filter decoders. If no filter chain is currently + * allocated, some non-zero value is still returned, which is + * less than or equal to what any filter chain would indicate + * as its memory requirement. + * + * If this function isn't supported by *strm or some other error + * occurs, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the current memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return On success, the current memory usage limit is returned + * (always non-zero). On error, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return - LZMA_OK: New memory usage limit successfully set. + * - LZMA_MEMLIMIT_ERROR: The new limit is too small. + * The limit was not changed. + * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't + * support memory usage limit or memlimit was zero. + */ +extern LZMA_API(lzma_ret) lzma_memlimit_set( + lzma_stream *strm, uint64_t memlimit) lzma_nothrow; Index: contrib/xz/src/liblzma/api/lzma/check.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/check.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/check.h (revision 0) @@ -0,0 +1,150 @@ +/** + * \file lzma/check.h + * \brief Integrity checks + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Type of the integrity check (Check ID) + * + * The .xz format supports multiple types of checks that are calculated + * from the uncompressed data. They vary in both speed and ability to + * detect errors. + */ +typedef enum { + LZMA_CHECK_NONE = 0, + /**< + * No Check is calculated. + * + * Size of the Check field: 0 bytes + */ + + LZMA_CHECK_CRC32 = 1, + /**< + * CRC32 using the polynomial from the IEEE 802.3 standard + * + * Size of the Check field: 4 bytes + */ + + LZMA_CHECK_CRC64 = 4, + /**< + * CRC64 using the polynomial from the ECMA-182 standard + * + * Size of the Check field: 8 bytes + */ + + LZMA_CHECK_SHA256 = 10 + /**< + * SHA-256 + * + * Size of the Check field: 32 bytes + */ +} lzma_check; + + +/** + * \brief Maximum valid Check ID + * + * The .xz file format specification specifies 16 Check IDs (0-15). Some + * of them are only reserved, that is, no actual Check algorithm has been + * assigned. When decoding, liblzma still accepts unknown Check IDs for + * future compatibility. If a valid but unsupported Check ID is detected, + * liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK, + * LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK in container.h. + */ +#define LZMA_CHECK_ID_MAX 15 + + +/** + * \brief Test if the given Check ID is supported + * + * Return true if the given Check ID is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * is not in the range [0, 15]; in that case the return value is always false. + * + * You can assume that LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always + * supported (even if liblzma is built with limited features). + */ +extern LZMA_API(lzma_bool) lzma_check_is_supported(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Get the size of the Check field with the given Check ID + * + * Although not all Check IDs have a check algorithm associated, the size of + * every Check is already frozen. This function returns the size (in bytes) of + * the Check field with the specified Check ID. The values are: + * { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 } + * + * If the argument is not in the range [0, 15], UINT32_MAX is returned. + */ +extern LZMA_API(uint32_t) lzma_check_size(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Maximum size of a Check field + */ +#define LZMA_CHECK_SIZE_MAX 64 + + +/** + * \brief Calculate CRC32 + * + * Calculate CRC32 using the polynomial from the IEEE 802.3 standard. + * + * \param buf Pointer to the input buffer + * \param size Size of the input buffer + * \param crc Previously returned CRC value. This is used to + * calculate the CRC of a big buffer in smaller chunks. + * Set to zero when starting a new calculation. + * + * \return Updated CRC value, which can be passed to this function + * again to continue CRC calculation. + */ +extern LZMA_API(uint32_t) lzma_crc32( + const uint8_t *buf, size_t size, uint32_t crc) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate CRC64 + * + * Calculate CRC64 using the polynomial from the ECMA-182 standard. + * + * This function is used similarly to lzma_crc32(). See its documentation. + */ +extern LZMA_API(uint64_t) lzma_crc64( + const uint8_t *buf, size_t size, uint64_t crc) + lzma_nothrow lzma_attr_pure; + + +/* + * SHA-256 functions are currently not exported to public API. + * Contact Lasse Collin if you think it should be. + */ + + +/** + * \brief Get the type of the integrity check + * + * This function can be called only immediately after lzma_code() has + * returned LZMA_NO_CHECK, LZMA_UNSUPPORTED_CHECK, or LZMA_GET_CHECK. + * Calling this function in any other situation has undefined behavior. + */ +extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm) + lzma_nothrow; Index: contrib/xz/src/liblzma/api/lzma/container.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/container.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/container.h (revision 0) @@ -0,0 +1,404 @@ +/** + * \file lzma/container.h + * \brief File formats + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/************ + * Encoding * + ************/ + +/** + * \brief Default compression preset + * + * It's not straightforward to recommend a default preset, because in some + * cases keeping the resource usage relatively low is more important that + * getting the maximum compression ratio. + */ +#define LZMA_PRESET_DEFAULT UINT32_C(6) + + +/** + * \brief Mask for preset level + * + * This is useful only if you need to extract the level from the preset + * variable. That should be rare. + */ +#define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F) + + +/* + * Preset flags + * + * Currently only one flag is defined. + */ + +/** + * \brief Extreme compression preset + * + * This flag modifies the preset to make the encoding significantly slower + * while improving the compression ratio only marginally. This is useful + * when you don't mind wasting time to get as small result as possible. + * + * This flag doesn't affect the memory usage requirements of the decoder (at + * least not significantly). The memory usage of the encoder may be increased + * a little but only at the lowest preset levels (0-2). + */ +#define LZMA_PRESET_EXTREME (UINT32_C(1) << 31) + + +/** + * \brief Calculate rough memory usage of easy encoder + * + * This function is a wrapper for lzma_raw_encoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + */ +extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate rough decoder memory usage of a preset + * + * This function is a wrapper for lzma_raw_decoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + */ +extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Stream encoder using a preset number + * + * This function is intended for those who just want to use the basic features + * if liblzma (that is, most developers out there). + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param preset Compression preset to use. A preset consist of level + * number and zero or more flags. Usually flags aren't + * used, so preset is simply a number [0, 9] which match + * the options -0 .. -9 of the xz command line tool. + * Additional flags can be be set using bitwise-or with + * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME. + * \param check Integrity check type to use. See check.h for available + * checks. If you are unsure, use LZMA_CHECK_CRC32. + * + * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to + * encode your data. + * - LZMA_MEM_ERROR: Memory allocation failed. + * - LZMA_OPTIONS_ERROR: The given compression level is not + * supported by this build of liblzma. + * - LZMA_UNSUPPORTED_CHECK: The given check type is not + * supported by this liblzma build. + * - LZMA_PROG_ERROR: One or more of the parameters have values + * that will never be valid. For example, strm == NULL. + * + * If initialization fails (return value is not LZMA_OK), all the memory + * allocated for *strm by liblzma is always freed. Thus, there is no need + * to call lzma_end() after failed initialization. + * + * If initialization succeeds, use lzma_code() to do the actual encoding. + * Valid values for `action' (the second argument of lzma_code()) are + * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, + * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. + */ +extern LZMA_API(lzma_ret) lzma_easy_encoder( + lzma_stream *strm, uint32_t preset, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream encoding using a preset number + * + * The maximum required output buffer size can be calculated with + * lzma_stream_buffer_bound(). + * + * \param preset Compression preset to use. See the description + * in lzma_easy_encoder(). + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_easy_buffer_encode( + uint32_t preset, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Initialize .xz Stream encoder using a custom filter chain + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h for + * more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma encoder (legacy file format) + * + * The .lzma format is sometimes called the LZMA_Alone format, which is the + * reason for the name of this function. The .lzma format supports only the + * LZMA1 filter. There is no support for integrity checks like CRC32. + * + * Use this function if and only if you need to create files readable by + * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format + * is strongly recommended. + * + * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * No kind of flushing is supported, because the file format doesn't make + * it possible. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_encoder( + lzma_stream *strm, const lzma_options_lzma *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate output buffer size for single-call Stream encoder + * + * When trying to compress uncompressible data, the encoded size will be + * slightly bigger than the input data. This function calculates how much + * output buffer space is required to be sure that lzma_stream_buffer_encode() + * doesn't return LZMA_BUF_ERROR. + * + * The calculated value is not exact, but it is guaranteed to be big enough. + * The actual maximum output space required may be slightly smaller (up to + * about 100 bytes). This should not be a problem in practice. + * + * If the calculated maximum size doesn't fit into size_t or would make the + * Stream grow past LZMA_VLI_MAX (which should never happen in practice), + * zero is returned to indicate the error. + * + * \note The limit calculated by this function applies only to + * single-call encoding. Multi-call encoding may (and probably + * will) have larger maximum expansion when encoding + * uncompressible data. Currently there is no function to + * calculate the maximum expansion of multi-call encoding. + */ +extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Stream encoder + * + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h + * for more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_encode( + lzma_filter *filters, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/************ + * Decoding * + ************/ + +/** + * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream + * being decoded has no integrity check. Note that when used with + * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK + * if LZMA_TELL_NO_CHECK is used. + */ +#define LZMA_TELL_NO_CHECK UINT32_C(0x01) + + +/** + * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input + * stream has an integrity check, but the type of the integrity check is not + * supported by this liblzma version or build. Such files can still be + * decoded, but the integrity check cannot be verified. + */ +#define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02) + + +/** + * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type + * of the integrity check is known. The type can then be got with + * lzma_get_check(). + */ +#define LZMA_TELL_ANY_CHECK UINT32_C(0x04) + + +/** + * This flag enables decoding of concatenated files with file formats that + * allow concatenating compressed files as is. From the formats currently + * supported by liblzma, only the .xz format allows concatenated files. + * Concatenated files are not allowed with the legacy .lzma format. + * + * This flag also affects the usage of the `action' argument for lzma_code(). + * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END + * unless LZMA_FINISH is used as `action'. Thus, the application has to set + * LZMA_FINISH in the same way as it does when encoding. + * + * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH + * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required. + */ +#define LZMA_CONCATENATED UINT32_C(0x08) + + +/** + * \brief Initialize .xz Stream decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Rough memory usage limit as bytes + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + */ +extern LZMA_API(lzma_ret) lzma_stream_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode .xz Streams and .lzma files with autodetection + * + * This decoder autodetects between the .xz and .lzma file formats, and + * calls lzma_stream_decoder() or lzma_alone_decoder() once the type + * of the input file has been detected. + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Rough memory usage limit as bytes + * \param flags Bitwise-or of flags, or zero for no flags. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + */ +extern LZMA_API(lzma_ret) lzma_auto_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma decoder (legacy file format) + * + * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * There is no need to use LZMA_FINISH, but allowing it may simplify + * certain types of applications. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_decoder( + lzma_stream *strm, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream decoder + * + * \param memlimit Pointer to how much memory the decoder is allowed + * to allocate. The value pointed by this pointer is + * modified if and only if LZMA_MEMLIMIT_ERROR is + * returned. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK + * is not allowed and will return LZMA_PROG_ERROR. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_NO_CHECK: This can be returned only if using + * the LZMA_TELL_NO_CHECK flag. + * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using + * the LZMA_TELL_UNSUPPORTED_CHECK flag. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( + uint64_t *memlimit, uint32_t flags, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; Index: contrib/xz/src/liblzma/api/lzma/filter.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/filter.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/filter.h (revision 0) @@ -0,0 +1,421 @@ +/** + * \file lzma/filter.h + * \brief Common filter related types + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum number of filters in a chain + * + * A filter chain can have 1-4 filters, of which three are allowed to change + * the size of the data. Usually only one or two filters are needed. + */ +#define LZMA_FILTERS_MAX 4 + + +/** + * \brief Filter options + * + * This structure is used to pass Filter ID and a pointer filter's + * options to liblzma. A few functions work with a single lzma_filter + * structure, while most functions expect a filter chain. + * + * A filter chain is indicated with an array of lzma_filter structures. + * The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter + * array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to + * be able to hold any arbitrary filter chain. This is important when + * using lzma_block_header_decode() from block.h, because too small + * array would make liblzma write past the end of the filters array. + */ +typedef struct { + /** + * \brief Filter ID + * + * Use constants whose name begin with `LZMA_FILTER_' to specify + * different filters. In an array of lzma_filter structures, use + * LZMA_VLI_UNKNOWN to indicate end of filters. + * + * \note This is not an enum, because on some systems enums + * cannot be 64-bit. + */ + lzma_vli id; + + /** + * \brief Pointer to filter-specific options structure + * + * If the filter doesn't need options, set this to NULL. If id is + * set to LZMA_VLI_UNKNOWN, options is ignored, and thus + * doesn't need be initialized. + * + * Some filters support changing the options in the middle of + * the encoding process. These filters store the pointer of the + * options structure and communicate with the application via + * modifications of the options structure. + */ + void *options; + +} lzma_filter; + + +/** + * \brief Test if the given Filter ID is supported for encoding + * + * Return true if the give Filter ID is supported for encoding by this + * liblzma build. Otherwise false is returned. + * + * There is no way to list which filters are available in this particular + * liblzma version and build. It would be useless, because the application + * couldn't know what kind of options the filter would need. + */ +extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Test if the given Filter ID is supported for decoding + * + * Return true if the give Filter ID is supported for decoding by this + * liblzma build. Otherwise false is returned. + */ +extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Copy the filters array + * + * Copy the Filter IDs and filter-specific options from src to dest. + * Up to LZMA_FILTERS_MAX filters are copied, plus the terminating + * .id == LZMA_VLI_UNKNOWN. Thus, dest should have at least + * LZMA_FILTERS_MAX + 1 elements space unless the caller knows that + * src is smaller than that. + * + * Unless the filter-specific options is NULL, the Filter ID has to be + * supported by liblzma, because liblzma needs to know the size of every + * filter-specific options structure. The filter-specific options are not + * validated. If options is NULL, any unsupported Filter IDs are copied + * without returning an error. + * + * Old filter-specific options in dest are not freed, so dest doesn't + * need to be initialized by the caller in any way. + * + * If an error occurs, memory possibly already allocated by this function + * is always freed. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR: Unsupported Filter ID and its options + * is not NULL. + * - LZMA_PROG_ERROR: src or dest is NULL. + */ +extern LZMA_API(lzma_ret) lzma_filters_copy(const lzma_filter *src, + lzma_filter *dest, lzma_allocator *allocator) lzma_nothrow; + + +/** + * \brief Calculate rough memory requirements for raw encoder + * + * Because the calculation is rough, this function can be used to calculate + * the memory requirements for Block and Stream encoders too. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Rough number of bytes of memory required for the given + * filter chain when encoding. + */ +extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate rough memory requirements for raw decoder + * + * Because the calculation is rough, this function can be used to calculate + * the memory requirements for Block and Stream decoders too. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Rough number of bytes of memory required for the given + * filter chain when decoding. + */ +extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize raw encoder + * + * This function may be useful when implementing custom file formats. + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * + * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the + * filter chain supports it), or LZMA_FINISH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_encoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize raw decoder + * + * The initialization of raw decoder goes similarly to raw encoder. + * + * The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using + * LZMA_FINISH is not required, it is supported just for convenience. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_decoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Update the filter chain in the encoder + * + * This function is for advanced users only. This function has two slightly + * different purposes: + * + * - After LZMA_FULL_FLUSH when using Stream encoder: Set a new filter + * chain, which will be used starting from the next Block. + * + * - After LZMA_SYNC_FLUSH using Raw, Block, or Stream encoder: Change + * the filter-specific options in the middle of encoding. The actual + * filters in the chain (Filter IDs) cannot be changed. In the future, + * it might become possible to change the filter options without + * using LZMA_SYNC_FLUSH. + * + * While rarely useful, this function may be called also when no data has + * been compressed yet. In that case, this function will behave as if + * LZMA_FULL_FLUSH (Stream encoder) or LZMA_SYNC_FLUSH (Raw or Block + * encoder) had been used right before calling this function. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filters_update( + lzma_stream *strm, const lzma_filter *filters) lzma_nothrow; + + +/** + * \brief Single-call raw encoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + * + * \note There is no function to calculate how big output buffer + * would surely be big enough. (lzma_stream_buffer_bound() + * works only for lzma_stream_buffer_encode().) + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_encode( + const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, + size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call raw decoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_decode( + const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Get the size of the Filter Properties field + * + * This function may be useful when implementing custom file formats + * using the raw encoder and decoder. + * + * \param size Pointer to uint32_t to hold the size of the properties + * \param filter Filter ID and options (the size of the properties may + * vary depending on the options) + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note This function validates the Filter ID, but does not + * necessarily validate the options. Thus, it is possible + * that this returns LZMA_OK while the following call to + * lzma_properties_encode() returns LZMA_OPTIONS_ERROR. + */ +extern LZMA_API(lzma_ret) lzma_properties_size( + uint32_t *size, const lzma_filter *filter) lzma_nothrow; + + +/** + * \brief Encode the Filter Properties field + * + * \param filter Filter ID and options + * \param props Buffer to hold the encoded options. The size of + * buffer must have been already determined with + * lzma_properties_size(). + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note Even this function won't validate more options than actually + * necessary. Thus, it is possible that encoding the properties + * succeeds but using the same options to initialize the encoder + * will fail. + * + * \note It is OK to skip calling this function if + * lzma_properties_size() indicated that the size + * of the Filter Properties field is zero. + */ +extern LZMA_API(lzma_ret) lzma_properties_encode( + const lzma_filter *filter, uint8_t *props) lzma_nothrow; + + +/** + * \brief Decode the Filter Properties field + * + * \param filter filter->id must have been set to the correct + * Filter ID. filter->options doesn't need to be + * initialized (it's not freed by this function). The + * decoded options will be stored to filter->options. + * filter->options is set to NULL if there are no + * properties or if an error occurs. + * \param allocator Custom memory allocator used to allocate the + * options. Set to NULL to use the default malloc(), + * and in case of an error, also free(). + * \param props Input buffer containing the properties. + * \param props_size Size of the properties. This must be the exact + * size; giving too much or too little input will + * return LZMA_OPTIONS_ERROR. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_properties_decode( + lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) lzma_nothrow; + + +/** + * \brief Calculate encoded size of a Filter Flags field + * + * Knowing the size of Filter Flags is useful to know when allocating + * memory to hold the encoded Filter Flags. + * + * \param size Pointer to integer to hold the calculated size + * \param filters Filter ID and associated options whose encoded + * size is to be calculated + * + * \return - LZMA_OK: *size set successfully. Note that this doesn't + * guarantee that filters->options is valid, thus + * lzma_filter_flags_encode() may still fail. + * - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options. + * - LZMA_PROG_ERROR: Invalid options + * + * \note If you need to calculate size of List of Filter Flags, + * you need to loop over every lzma_filter entry. + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_size( + uint32_t *size, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Filter Flags into given buffer + * + * In contrast to some functions, this doesn't allocate the needed buffer. + * This is due to how this function is used internally by liblzma. + * + * \param filters Filter ID and options to be encoded + * \param out Beginning of the output buffer + * \param out_pos out[*out_pos] is the next write position. This + * is updated by the encoder. + * \param out_size out[out_size] is the first byte to not write. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid options or not enough output + * buffer space (you should have checked it with + * lzma_filter_flags_size()). + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filters, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Filter Flags from given buffer + * + * The decoded result is stored into *filters. filters->options is + * initialized but the old value is NOT free()d. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_decode( + lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; Index: contrib/xz/src/liblzma/api/lzma/version.h =================================================================== --- contrib/xz/src/liblzma/api/lzma/version.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma/version.h (revision 0) @@ -0,0 +1,121 @@ +/** + * \file lzma/version.h + * \brief Version number + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* + * Version number split into components + */ +#define LZMA_VERSION_MAJOR 4 +#define LZMA_VERSION_MINOR 999 +#define LZMA_VERSION_PATCH 9 +#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_BETA + +#ifndef LZMA_VERSION_COMMIT +# define LZMA_VERSION_COMMIT "" +#endif + + +/* + * Map symbolic stability levels to integers. + */ +#define LZMA_VERSION_STABILITY_ALPHA 0 +#define LZMA_VERSION_STABILITY_BETA 1 +#define LZMA_VERSION_STABILITY_STABLE 2 + + +/** + * \brief Compile-time version number + * + * The version number is of format xyyyzzzs where + * - x = major + * - yyy = minor + * - zzz = revision + * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable + * + * The same xyyyzzz triplet is never reused with different stability levels. + * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta + * or 5.1.0 stable. + * + * \note The version number of liblzma has nothing to with + * the version number of Igor Pavlov's LZMA SDK. + */ +#define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + + LZMA_VERSION_MINOR * UINT32_C(10000) \ + + LZMA_VERSION_PATCH * UINT32_C(10) \ + + LZMA_VERSION_STABILITY) + + +/* + * Macros to construct the compile-time version string + */ +#if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA +# define LZMA_VERSION_STABILITY_STRING "alpha" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA +# define LZMA_VERSION_STABILITY_STRING "beta" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE +# define LZMA_VERSION_STABILITY_STRING "" +#else +# error Incorrect LZMA_VERSION_STABILITY +#endif + +#define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ + #major "." #minor "." #patch stability commit + +#define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ + LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) + + +/** + * \brief Compile-time version as a string + * + * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable + * versions don't have any "stable" suffix). In future, a snapshot built + * from source code repository may include an additional suffix, for example + * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form + * in LZMA_VERSION macro. + */ +#define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ + LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ + LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ + LZMA_VERSION_COMMIT) + + +/* #ifndef is needed for use with windres (MinGW or Cygwin). */ +#ifndef LZMA_H_INTERNAL_RC + +/** + * \brief Run-time version number as an integer + * + * Return the value of LZMA_VERSION macro at the compile time of liblzma. + * This allows the application to compare if it was built against the same, + * older, or newer version of liblzma that is currently running. + */ +extern LZMA_API(uint32_t) lzma_version_number(void) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Run-time version as a string + * + * This function may be useful if you want to display which version of + * liblzma your application is currently using. + */ +extern LZMA_API(const char *) lzma_version_string(void) + lzma_nothrow lzma_attr_const; + +#endif Index: contrib/xz/src/liblzma/api/lzma.h =================================================================== --- contrib/xz/src/liblzma/api/lzma.h (revision 0) +++ contrib/xz/src/liblzma/api/lzma.h (revision 0) @@ -0,0 +1,326 @@ +/** + * \file api/lzma.h + * \brief The public API of liblzma data compression library + * + * liblzma is a public domain general-purpose data compression library with + * a zlib-like API. The native file format is .xz, but also the old .lzma + * format and raw (no headers) streams are supported. Multiple compression + * algorithms (filters) are supported. Currently LZMA2 is the primary filter. + * + * liblzma is part of XZ Utils . XZ Utils includes + * a gzip-like command line tool named xz and some other tools. XZ Utils + * is developed and maintained by Lasse Collin. + * + * Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK + * . + * + * The SHA-256 implementation is based on the public domain code found from + * 7-Zip , which has a modified version of the public + * domain SHA-256 code found from Crypto++ . + * The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef LZMA_H +#define LZMA_H + +/***************************** + * Required standard headers * + *****************************/ + +/* + * liblzma API headers need some standard types and macros. To allow + * including lzma.h without requiring the application to include other + * headers first, lzma.h includes the required standard headers unless + * they already seem to be included already or if LZMA_MANUAL_HEADERS + * has been defined. + * + * Here's what types and macros are needed and from which headers: + * - stddef.h: size_t, NULL + * - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n), + * UINT32_MAX, UINT64_MAX + * + * However, inttypes.h is a little more portable than stdint.h, although + * inttypes.h declares some unneeded things compared to plain stdint.h. + * + * The hacks below aren't perfect, specifically they assume that inttypes.h + * exists and that it typedefs at least uint8_t, uint32_t, and uint64_t, + * and that, in case of incomplete inttypes.h, unsigned int is 32-bit. + * If the application already takes care of setting up all the types and + * macros properly (for example by using gnulib's stdint.h or inttypes.h), + * we try to detect that the macros are already defined and don't include + * inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to + * force this file to never include any system headers. + * + * Some could argue that liblzma API should provide all the required types, + * for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was + * seen unnecessary mess, since most systems already provide all the necessary + * types and macros in the standard headers. + * + * Note that liblzma API still has lzma_bool, because using stdbool.h would + * break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't + * necessarily the same as sizeof(bool) in C++. + */ + +#ifndef LZMA_MANUAL_HEADERS + /* + * I suppose this works portably also in C++. Note that in C++, + * we need to get size_t into the global namespace. + */ +# include + + /* + * Skip inttypes.h if we already have all the required macros. If we + * have the macros, we assume that we have the matching typedefs too. + */ +# if !defined(UINT32_C) || !defined(UINT64_C) \ + || !defined(UINT32_MAX) || !defined(UINT64_MAX) + /* + * MSVC has no C99 support, and thus it cannot be used to + * compile liblzma. The liblzma API has to still be usable + * from MSVC, so we need to define the required standard + * integer types here. + */ +# if defined(_WIN32) && defined(_MSC_VER) + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else + /* Use the standard inttypes.h. */ +# ifdef __cplusplus + /* + * C99 sections 7.18.2 and 7.18.4 specify + * that C++ implementations define the limit + * and constant macros only if specifically + * requested. Note that if you want the + * format macros (PRIu64 etc.) too, you need + * to define __STDC_FORMAT_MACROS before + * including lzma.h, since re-including + * inttypes.h with __STDC_FORMAT_MACROS + * defined doesn't necessarily work. + */ +# ifndef __STDC_LIMIT_MACROS +# define __STDC_LIMIT_MACROS 1 +# endif +# ifndef __STDC_CONSTANT_MACROS +# define __STDC_CONSTANT_MACROS 1 +# endif +# endif + +# include +# endif + + /* + * Some old systems have only the typedefs in inttypes.h, and + * lack all the macros. For those systems, we need a few more + * hacks. We assume that unsigned int is 32-bit and unsigned + * long is either 32-bit or 64-bit. If these hacks aren't + * enough, the application has to setup the types manually + * before including lzma.h. + */ +# ifndef UINT32_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT32_C(n) n ## UI32 +# else +# define UINT32_C(n) n ## U +# endif +# endif + +# ifndef UINT64_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT64_C(n) n ## UI64 +# else + /* Get ULONG_MAX. */ +# include +# if ULONG_MAX == 4294967295UL +# define UINT64_C(n) n ## ULL +# else +# define UINT64_C(n) n ## UL +# endif +# endif +# endif + +# ifndef UINT32_MAX +# define UINT32_MAX (UINT32_C(4294967295)) +# endif + +# ifndef UINT64_MAX +# define UINT64_MAX (UINT64_C(18446744073709551615)) +# endif +# endif +#endif /* ifdef LZMA_MANUAL_HEADERS */ + + +/****************** + * LZMA_API macro * + ******************/ + +/* + * Some systems require that the functions and function pointers are + * declared specially in the headers. LZMA_API_IMPORT is for importing + * symbols and LZMA_API_CALL is to specify the calling convention. + * + * By default it is assumed that the application will link dynamically + * against liblzma. #define LZMA_API_STATIC in your application if you + * want to link against static liblzma. If you don't care about portability + * to operating systems like Windows, or at least don't care about linking + * against static liblzma on them, don't worry about LZMA_API_STATIC. That + * is, most developers will never need to use LZMA_API_STATIC. + * + * The GCC variants are a special case on Windows (Cygwin and MinGW). + * We rely on GCC doing the right thing with its auto-import feature, + * and thus don't use __declspec(dllimport). This way developers don't + * need to worry about LZMA_API_STATIC. Also the calling convention is + * omitted on Cygwin but not on MinGW. + */ +#ifndef LZMA_API_IMPORT +# if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__GNUC__) +# define LZMA_API_IMPORT __declspec(dllimport) +# else +# define LZMA_API_IMPORT +# endif +#endif + +#ifndef LZMA_API_CALL +# if defined(_WIN32) && !defined(__CYGWIN__) +# define LZMA_API_CALL __cdecl +# else +# define LZMA_API_CALL +# endif +#endif + +#ifndef LZMA_API +# define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL +#endif + + +/*********** + * nothrow * + ***********/ + +/* + * None of the functions in liblzma may throw an exception. Even + * the functions that use callback functions won't throw exceptions, + * because liblzma would break if a callback function threw an exception. + */ +#ifndef lzma_nothrow +# if defined(__cplusplus) +# define lzma_nothrow throw() +# elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) +# define lzma_nothrow __attribute__((__nothrow__)) +# else +# define lzma_nothrow +# endif +#endif + + +/******************** + * GNU C extensions * + ********************/ + +/* + * GNU C extensions are used conditionally in the public API. It doesn't + * break anything if these are sometimes enabled and sometimes not, only + * affects warnings and optimizations. + */ +#if __GNUC__ >= 3 +# ifndef lzma_attribute +# define lzma_attribute(attr) __attribute__(attr) +# endif + +# ifndef lzma_restrict +# define lzma_restrict __restrict__ +# endif + + /* warn_unused_result was added in GCC 3.4. */ +# ifndef lzma_attr_warn_unused_result +# if __GNUC__ == 3 && __GNUC_MINOR__ < 4 +# define lzma_attr_warn_unused_result +# endif +# endif + +#else +# ifndef lzma_attribute +# define lzma_attribute(attr) +# endif + +# ifndef lzma_restrict +# if __STDC_VERSION__ >= 199901L +# define lzma_restrict restrict +# else +# define lzma_restrict +# endif +# endif +#endif + + +#ifndef lzma_attr_pure +# define lzma_attr_pure lzma_attribute((__pure__)) +#endif + +#ifndef lzma_attr_const +# define lzma_attr_const lzma_attribute((__const__)) +#endif + +#ifndef lzma_attr_warn_unused_result +# define lzma_attr_warn_unused_result \ + lzma_attribute((__warn_unused_result__)) +#endif + + +/************** + * Subheaders * + **************/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Subheaders check that this is defined. It is to prevent including + * them directly from applications. + */ +#define LZMA_H_INTERNAL 1 + +/* Basic features */ +#include "lzma/version.h" +#include "lzma/base.h" +#include "lzma/vli.h" +#include "lzma/check.h" + +/* Filters */ +#include "lzma/filter.h" +#include "lzma/subblock.h" +#include "lzma/bcj.h" +#include "lzma/delta.h" +#include "lzma/lzma.h" + +/* Container formats */ +#include "lzma/container.h" + +/* Advanced features */ +#include "lzma/stream_flags.h" +#include "lzma/block.h" +#include "lzma/index.h" +#include "lzma/index_hash.h" + +/* Hardware information */ +#include "lzma/hardware.h" + +/* + * All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications + * re-including the subheaders. + */ +#undef LZMA_H_INTERNAL + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef LZMA_H */ Index: contrib/xz/src/liblzma/delta/delta_decoder.c =================================================================== --- contrib/xz/src/liblzma/delta/delta_decoder.c (revision 0) +++ contrib/xz/src/liblzma/delta/delta_decoder.c (revision 0) @@ -0,0 +1,76 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_decoder.c +/// \brief Delta filter decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_decoder.h" +#include "delta_private.h" + + +static void +decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + buffer[i] += coder->history[(distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = buffer[i]; + } +} + + +static lzma_ret +delta_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(coder->next.code != NULL); + + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + decode_buffer(coder, out + out_start, *out_pos - out_start); + + return ret; +} + + +extern lzma_ret +lzma_delta_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + next->code = &delta_decode; + return lzma_delta_coder_init(next, allocator, filters); +} + + +extern lzma_ret +lzma_delta_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + lzma_options_delta *opt + = lzma_alloc(sizeof(lzma_options_delta), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->type = LZMA_DELTA_TYPE_BYTE; + opt->dist = props[0] + 1; + + *options = opt; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/delta/delta_common.c =================================================================== --- contrib/xz/src/liblzma/delta/delta_common.c (revision 0) +++ contrib/xz/src/liblzma/delta/delta_common.c (revision 0) @@ -0,0 +1,70 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_common.c +/// \brief Common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_common.h" +#include "delta_private.h" + + +static void +delta_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // Allocate memory for the decoder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + // End function is the same for encoder and decoder. + next->end = &delta_coder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Validate the options. + if (lzma_delta_coder_memusage(filters[0].options) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // Set the delta distance. + const lzma_options_delta *opt = filters[0].options; + next->coder->distance = opt->dist; + + // Initialize the rest of the variables. + next->coder->pos = 0; + memzero(next->coder->history, LZMA_DELTA_DIST_MAX); + + // Initialize the next decoder in the chain, if any. + return lzma_next_filter_init(&next->coder->next, + allocator, filters + 1); +} + + +extern uint64_t +lzma_delta_coder_memusage(const void *options) +{ + const lzma_options_delta *opt = options; + + if (opt == NULL || opt->type != LZMA_DELTA_TYPE_BYTE + || opt->dist < LZMA_DELTA_DIST_MIN + || opt->dist > LZMA_DELTA_DIST_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder); +} Index: contrib/xz/src/liblzma/delta/delta_decoder.h =================================================================== --- contrib/xz/src/liblzma/delta/delta_decoder.h (revision 0) +++ contrib/xz/src/liblzma/delta/delta_decoder.h (revision 0) @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_decoder.h +/// \brief Delta filter decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_DECODER_H +#define LZMA_DELTA_DECODER_H + +#include "delta_common.h" + +extern lzma_ret lzma_delta_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_delta_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif Index: contrib/xz/src/liblzma/delta/delta_common.h =================================================================== --- contrib/xz/src/liblzma/delta/delta_common.h (revision 0) +++ contrib/xz/src/liblzma/delta/delta_common.h (revision 0) @@ -0,0 +1,20 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_common.h +/// \brief Common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_COMMON_H +#define LZMA_DELTA_COMMON_H + +#include "common.h" + +extern uint64_t lzma_delta_coder_memusage(const void *options); + +#endif Index: contrib/xz/src/liblzma/delta/delta_encoder.c =================================================================== --- contrib/xz/src/liblzma/delta/delta_encoder.c (revision 0) +++ contrib/xz/src/liblzma/delta/delta_encoder.c (revision 0) @@ -0,0 +1,121 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_encoder.c +/// \brief Delta filter encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_encoder.h" +#include "delta_private.h" + + +/// Copies and encodes the data at the same time. This is used when Delta +/// is the first filter in the chain (and thus the last filter in the +/// encoder's filter stack). +static void +copy_and_encode(lzma_coder *coder, + const uint8_t *restrict in, uint8_t *restrict out, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + const uint8_t tmp = coder->history[ + (distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = in[i]; + out[i] = in[i] - tmp; + } +} + + +/// Encodes the data in place. This is used when we are the last filter +/// in the chain (and thus non-last filter in the encoder's filter stack). +static void +encode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + + for (size_t i = 0; i < size; ++i) { + const uint8_t tmp = coder->history[ + (distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = buffer[i]; + buffer[i] -= tmp; + } +} + + +static lzma_ret +delta_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_ret ret; + + if (coder->next.code == NULL) { + const size_t in_avail = in_size - *in_pos; + const size_t out_avail = out_size - *out_pos; + const size_t size = MIN(in_avail, out_avail); + + copy_and_encode(coder, in + *in_pos, out + *out_pos, size); + + *in_pos += size; + *out_pos += size; + + ret = action != LZMA_RUN && *in_pos == in_size + ? LZMA_STREAM_END : LZMA_OK; + + } else { + const size_t out_start = *out_pos; + + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + encode_in_place(coder, out + out_start, *out_pos - out_start); + } + + return ret; +} + + +static lzma_ret +delta_encoder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + // Delta doesn't and will never support changing the options in + // the middle of encoding. If the app tries to change them, we + // simply ignore them. + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_delta_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + next->code = &delta_encode; + next->update = &delta_encoder_update; + return lzma_delta_coder_init(next, allocator, filters); +} + + +extern lzma_ret +lzma_delta_props_encode(const void *options, uint8_t *out) +{ + // The caller must have already validated the options, so it's + // LZMA_PROG_ERROR if they are invalid. + if (lzma_delta_coder_memusage(options) == UINT64_MAX) + return LZMA_PROG_ERROR; + + const lzma_options_delta *opt = options; + out[0] = opt->dist - LZMA_DELTA_DIST_MIN; + + return LZMA_OK; +} Index: contrib/xz/src/liblzma/delta/delta_private.h =================================================================== --- contrib/xz/src/liblzma/delta/delta_private.h (revision 0) +++ contrib/xz/src/liblzma/delta/delta_private.h (revision 0) @@ -0,0 +1,37 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_private.h +/// \brief Private common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_PRIVATE_H +#define LZMA_DELTA_PRIVATE_H + +#include "delta_common.h" + +struct lzma_coder_s { + /// Next coder in the chain + lzma_next_coder next; + + /// Delta distance + size_t distance; + + /// Position in history[] + uint8_t pos; + + /// Buffer to hold history of the original data + uint8_t history[LZMA_DELTA_DIST_MAX]; +}; + + +extern lzma_ret lzma_delta_coder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +#endif Index: contrib/xz/src/liblzma/delta/delta_encoder.h =================================================================== --- contrib/xz/src/liblzma/delta/delta_encoder.h (revision 0) +++ contrib/xz/src/liblzma/delta/delta_encoder.h (revision 0) @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_encoder.h +/// \brief Delta filter encoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_ENCODER_H +#define LZMA_DELTA_ENCODER_H + +#include "delta_common.h" + +extern lzma_ret lzma_delta_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_delta_props_encode(const void *options, uint8_t *out); + +#endif Index: contrib/xz/src/liblzma/check/crc64_fast.c =================================================================== --- contrib/xz/src/liblzma/check/crc64_fast.c (revision 0) +++ contrib/xz/src/liblzma/check/crc64_fast.c (revision 0) @@ -0,0 +1,72 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc64.c +/// \brief CRC64 calculation +/// +/// Calculate the CRC64 using the slice-by-four algorithm. This is the same +/// idea that is used in crc32_fast.c, but for CRC64 we use only four tables +/// instead of eight to avoid increasing CPU cache usage. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" +#include "crc_macros.h" + + +#ifdef WORDS_BIGENDIAN +# define A1(x) ((x) >> 56) +#else +# define A1 A +#endif + + +// See the comments in crc32_fast.c. They aren't duplicated here. +extern LZMA_API(uint64_t) +lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc) +{ + crc = ~crc; + +#ifdef WORDS_BIGENDIAN + crc = bswap64(crc); +#endif + + if (size > 4) { + while ((uintptr_t)(buf) & 3) { + crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); + --size; + } + + const uint8_t *const limit = buf + (size & ~(size_t)(3)); + size &= (size_t)(3); + + while (buf < limit) { +#ifdef WORDS_BIGENDIAN + const uint32_t tmp = (crc >> 32) + ^ *(const uint32_t *)(buf); +#else + const uint32_t tmp = crc ^ *(const uint32_t *)(buf); +#endif + buf += 4; + + crc = lzma_crc64_table[3][A(tmp)] + ^ lzma_crc64_table[2][B(tmp)] + ^ S32(crc) + ^ lzma_crc64_table[1][C(tmp)] + ^ lzma_crc64_table[0][D(tmp)]; + } + } + + while (size-- != 0) + crc = lzma_crc64_table[0][*buf++ ^ A1(crc)] ^ S8(crc); + +#ifdef WORDS_BIGENDIAN + crc = bswap64(crc); +#endif + + return ~crc; +} Index: contrib/xz/src/liblzma/check/crc32_table_be.h =================================================================== --- contrib/xz/src/liblzma/check/crc32_table_be.h (revision 0) +++ contrib/xz/src/liblzma/check/crc32_table_be.h (revision 0) @@ -0,0 +1,525 @@ +/* This file has been automatically generated by crc32_tablegen.c. */ + +const uint32_t lzma_crc32_table[8][256] = { + { + 0x00000000, 0x96300777, 0x2C610EEE, 0xBA510999, + 0x19C46D07, 0x8FF46A70, 0x35A563E9, 0xA395649E, + 0x3288DB0E, 0xA4B8DC79, 0x1EE9D5E0, 0x88D9D297, + 0x2B4CB609, 0xBD7CB17E, 0x072DB8E7, 0x911DBF90, + 0x6410B71D, 0xF220B06A, 0x4871B9F3, 0xDE41BE84, + 0x7DD4DA1A, 0xEBE4DD6D, 0x51B5D4F4, 0xC785D383, + 0x56986C13, 0xC0A86B64, 0x7AF962FD, 0xECC9658A, + 0x4F5C0114, 0xD96C0663, 0x633D0FFA, 0xF50D088D, + 0xC8206E3B, 0x5E10694C, 0xE44160D5, 0x727167A2, + 0xD1E4033C, 0x47D4044B, 0xFD850DD2, 0x6BB50AA5, + 0xFAA8B535, 0x6C98B242, 0xD6C9BBDB, 0x40F9BCAC, + 0xE36CD832, 0x755CDF45, 0xCF0DD6DC, 0x593DD1AB, + 0xAC30D926, 0x3A00DE51, 0x8051D7C8, 0x1661D0BF, + 0xB5F4B421, 0x23C4B356, 0x9995BACF, 0x0FA5BDB8, + 0x9EB80228, 0x0888055F, 0xB2D90CC6, 0x24E90BB1, + 0x877C6F2F, 0x114C6858, 0xAB1D61C1, 0x3D2D66B6, + 0x9041DC76, 0x0671DB01, 0xBC20D298, 0x2A10D5EF, + 0x8985B171, 0x1FB5B606, 0xA5E4BF9F, 0x33D4B8E8, + 0xA2C90778, 0x34F9000F, 0x8EA80996, 0x18980EE1, + 0xBB0D6A7F, 0x2D3D6D08, 0x976C6491, 0x015C63E6, + 0xF4516B6B, 0x62616C1C, 0xD8306585, 0x4E0062F2, + 0xED95066C, 0x7BA5011B, 0xC1F40882, 0x57C40FF5, + 0xC6D9B065, 0x50E9B712, 0xEAB8BE8B, 0x7C88B9FC, + 0xDF1DDD62, 0x492DDA15, 0xF37CD38C, 0x654CD4FB, + 0x5861B24D, 0xCE51B53A, 0x7400BCA3, 0xE230BBD4, + 0x41A5DF4A, 0xD795D83D, 0x6DC4D1A4, 0xFBF4D6D3, + 0x6AE96943, 0xFCD96E34, 0x468867AD, 0xD0B860DA, + 0x732D0444, 0xE51D0333, 0x5F4C0AAA, 0xC97C0DDD, + 0x3C710550, 0xAA410227, 0x10100BBE, 0x86200CC9, + 0x25B56857, 0xB3856F20, 0x09D466B9, 0x9FE461CE, + 0x0EF9DE5E, 0x98C9D929, 0x2298D0B0, 0xB4A8D7C7, + 0x173DB359, 0x810DB42E, 0x3B5CBDB7, 0xAD6CBAC0, + 0x2083B8ED, 0xB6B3BF9A, 0x0CE2B603, 0x9AD2B174, + 0x3947D5EA, 0xAF77D29D, 0x1526DB04, 0x8316DC73, + 0x120B63E3, 0x843B6494, 0x3E6A6D0D, 0xA85A6A7A, + 0x0BCF0EE4, 0x9DFF0993, 0x27AE000A, 0xB19E077D, + 0x44930FF0, 0xD2A30887, 0x68F2011E, 0xFEC20669, + 0x5D5762F7, 0xCB676580, 0x71366C19, 0xE7066B6E, + 0x761BD4FE, 0xE02BD389, 0x5A7ADA10, 0xCC4ADD67, + 0x6FDFB9F9, 0xF9EFBE8E, 0x43BEB717, 0xD58EB060, + 0xE8A3D6D6, 0x7E93D1A1, 0xC4C2D838, 0x52F2DF4F, + 0xF167BBD1, 0x6757BCA6, 0xDD06B53F, 0x4B36B248, + 0xDA2B0DD8, 0x4C1B0AAF, 0xF64A0336, 0x607A0441, + 0xC3EF60DF, 0x55DF67A8, 0xEF8E6E31, 0x79BE6946, + 0x8CB361CB, 0x1A8366BC, 0xA0D26F25, 0x36E26852, + 0x95770CCC, 0x03470BBB, 0xB9160222, 0x2F260555, + 0xBE3BBAC5, 0x280BBDB2, 0x925AB42B, 0x046AB35C, + 0xA7FFD7C2, 0x31CFD0B5, 0x8B9ED92C, 0x1DAEDE5B, + 0xB0C2649B, 0x26F263EC, 0x9CA36A75, 0x0A936D02, + 0xA906099C, 0x3F360EEB, 0x85670772, 0x13570005, + 0x824ABF95, 0x147AB8E2, 0xAE2BB17B, 0x381BB60C, + 0x9B8ED292, 0x0DBED5E5, 0xB7EFDC7C, 0x21DFDB0B, + 0xD4D2D386, 0x42E2D4F1, 0xF8B3DD68, 0x6E83DA1F, + 0xCD16BE81, 0x5B26B9F6, 0xE177B06F, 0x7747B718, + 0xE65A0888, 0x706A0FFF, 0xCA3B0666, 0x5C0B0111, + 0xFF9E658F, 0x69AE62F8, 0xD3FF6B61, 0x45CF6C16, + 0x78E20AA0, 0xEED20DD7, 0x5483044E, 0xC2B30339, + 0x612667A7, 0xF71660D0, 0x4D476949, 0xDB776E3E, + 0x4A6AD1AE, 0xDC5AD6D9, 0x660BDF40, 0xF03BD837, + 0x53AEBCA9, 0xC59EBBDE, 0x7FCFB247, 0xE9FFB530, + 0x1CF2BDBD, 0x8AC2BACA, 0x3093B353, 0xA6A3B424, + 0x0536D0BA, 0x9306D7CD, 0x2957DE54, 0xBF67D923, + 0x2E7A66B3, 0xB84A61C4, 0x021B685D, 0x942B6F2A, + 0x37BE0BB4, 0xA18E0CC3, 0x1BDF055A, 0x8DEF022D + }, { + 0x00000000, 0x41311B19, 0x82623632, 0xC3532D2B, + 0x04C56C64, 0x45F4777D, 0x86A75A56, 0xC796414F, + 0x088AD9C8, 0x49BBC2D1, 0x8AE8EFFA, 0xCBD9F4E3, + 0x0C4FB5AC, 0x4D7EAEB5, 0x8E2D839E, 0xCF1C9887, + 0x5112C24A, 0x1023D953, 0xD370F478, 0x9241EF61, + 0x55D7AE2E, 0x14E6B537, 0xD7B5981C, 0x96848305, + 0x59981B82, 0x18A9009B, 0xDBFA2DB0, 0x9ACB36A9, + 0x5D5D77E6, 0x1C6C6CFF, 0xDF3F41D4, 0x9E0E5ACD, + 0xA2248495, 0xE3159F8C, 0x2046B2A7, 0x6177A9BE, + 0xA6E1E8F1, 0xE7D0F3E8, 0x2483DEC3, 0x65B2C5DA, + 0xAAAE5D5D, 0xEB9F4644, 0x28CC6B6F, 0x69FD7076, + 0xAE6B3139, 0xEF5A2A20, 0x2C09070B, 0x6D381C12, + 0xF33646DF, 0xB2075DC6, 0x715470ED, 0x30656BF4, + 0xF7F32ABB, 0xB6C231A2, 0x75911C89, 0x34A00790, + 0xFBBC9F17, 0xBA8D840E, 0x79DEA925, 0x38EFB23C, + 0xFF79F373, 0xBE48E86A, 0x7D1BC541, 0x3C2ADE58, + 0x054F79F0, 0x447E62E9, 0x872D4FC2, 0xC61C54DB, + 0x018A1594, 0x40BB0E8D, 0x83E823A6, 0xC2D938BF, + 0x0DC5A038, 0x4CF4BB21, 0x8FA7960A, 0xCE968D13, + 0x0900CC5C, 0x4831D745, 0x8B62FA6E, 0xCA53E177, + 0x545DBBBA, 0x156CA0A3, 0xD63F8D88, 0x970E9691, + 0x5098D7DE, 0x11A9CCC7, 0xD2FAE1EC, 0x93CBFAF5, + 0x5CD76272, 0x1DE6796B, 0xDEB55440, 0x9F844F59, + 0x58120E16, 0x1923150F, 0xDA703824, 0x9B41233D, + 0xA76BFD65, 0xE65AE67C, 0x2509CB57, 0x6438D04E, + 0xA3AE9101, 0xE29F8A18, 0x21CCA733, 0x60FDBC2A, + 0xAFE124AD, 0xEED03FB4, 0x2D83129F, 0x6CB20986, + 0xAB2448C9, 0xEA1553D0, 0x29467EFB, 0x687765E2, + 0xF6793F2F, 0xB7482436, 0x741B091D, 0x352A1204, + 0xF2BC534B, 0xB38D4852, 0x70DE6579, 0x31EF7E60, + 0xFEF3E6E7, 0xBFC2FDFE, 0x7C91D0D5, 0x3DA0CBCC, + 0xFA368A83, 0xBB07919A, 0x7854BCB1, 0x3965A7A8, + 0x4B98833B, 0x0AA99822, 0xC9FAB509, 0x88CBAE10, + 0x4F5DEF5F, 0x0E6CF446, 0xCD3FD96D, 0x8C0EC274, + 0x43125AF3, 0x022341EA, 0xC1706CC1, 0x804177D8, + 0x47D73697, 0x06E62D8E, 0xC5B500A5, 0x84841BBC, + 0x1A8A4171, 0x5BBB5A68, 0x98E87743, 0xD9D96C5A, + 0x1E4F2D15, 0x5F7E360C, 0x9C2D1B27, 0xDD1C003E, + 0x120098B9, 0x533183A0, 0x9062AE8B, 0xD153B592, + 0x16C5F4DD, 0x57F4EFC4, 0x94A7C2EF, 0xD596D9F6, + 0xE9BC07AE, 0xA88D1CB7, 0x6BDE319C, 0x2AEF2A85, + 0xED796BCA, 0xAC4870D3, 0x6F1B5DF8, 0x2E2A46E1, + 0xE136DE66, 0xA007C57F, 0x6354E854, 0x2265F34D, + 0xE5F3B202, 0xA4C2A91B, 0x67918430, 0x26A09F29, + 0xB8AEC5E4, 0xF99FDEFD, 0x3ACCF3D6, 0x7BFDE8CF, + 0xBC6BA980, 0xFD5AB299, 0x3E099FB2, 0x7F3884AB, + 0xB0241C2C, 0xF1150735, 0x32462A1E, 0x73773107, + 0xB4E17048, 0xF5D06B51, 0x3683467A, 0x77B25D63, + 0x4ED7FACB, 0x0FE6E1D2, 0xCCB5CCF9, 0x8D84D7E0, + 0x4A1296AF, 0x0B238DB6, 0xC870A09D, 0x8941BB84, + 0x465D2303, 0x076C381A, 0xC43F1531, 0x850E0E28, + 0x42984F67, 0x03A9547E, 0xC0FA7955, 0x81CB624C, + 0x1FC53881, 0x5EF42398, 0x9DA70EB3, 0xDC9615AA, + 0x1B0054E5, 0x5A314FFC, 0x996262D7, 0xD85379CE, + 0x174FE149, 0x567EFA50, 0x952DD77B, 0xD41CCC62, + 0x138A8D2D, 0x52BB9634, 0x91E8BB1F, 0xD0D9A006, + 0xECF37E5E, 0xADC26547, 0x6E91486C, 0x2FA05375, + 0xE836123A, 0xA9070923, 0x6A542408, 0x2B653F11, + 0xE479A796, 0xA548BC8F, 0x661B91A4, 0x272A8ABD, + 0xE0BCCBF2, 0xA18DD0EB, 0x62DEFDC0, 0x23EFE6D9, + 0xBDE1BC14, 0xFCD0A70D, 0x3F838A26, 0x7EB2913F, + 0xB924D070, 0xF815CB69, 0x3B46E642, 0x7A77FD5B, + 0xB56B65DC, 0xF45A7EC5, 0x370953EE, 0x763848F7, + 0xB1AE09B8, 0xF09F12A1, 0x33CC3F8A, 0x72FD2493 + }, { + 0x00000000, 0x376AC201, 0x6ED48403, 0x59BE4602, + 0xDCA80907, 0xEBC2CB06, 0xB27C8D04, 0x85164F05, + 0xB851130E, 0x8F3BD10F, 0xD685970D, 0xE1EF550C, + 0x64F91A09, 0x5393D808, 0x0A2D9E0A, 0x3D475C0B, + 0x70A3261C, 0x47C9E41D, 0x1E77A21F, 0x291D601E, + 0xAC0B2F1B, 0x9B61ED1A, 0xC2DFAB18, 0xF5B56919, + 0xC8F23512, 0xFF98F713, 0xA626B111, 0x914C7310, + 0x145A3C15, 0x2330FE14, 0x7A8EB816, 0x4DE47A17, + 0xE0464D38, 0xD72C8F39, 0x8E92C93B, 0xB9F80B3A, + 0x3CEE443F, 0x0B84863E, 0x523AC03C, 0x6550023D, + 0x58175E36, 0x6F7D9C37, 0x36C3DA35, 0x01A91834, + 0x84BF5731, 0xB3D59530, 0xEA6BD332, 0xDD011133, + 0x90E56B24, 0xA78FA925, 0xFE31EF27, 0xC95B2D26, + 0x4C4D6223, 0x7B27A022, 0x2299E620, 0x15F32421, + 0x28B4782A, 0x1FDEBA2B, 0x4660FC29, 0x710A3E28, + 0xF41C712D, 0xC376B32C, 0x9AC8F52E, 0xADA2372F, + 0xC08D9A70, 0xF7E75871, 0xAE591E73, 0x9933DC72, + 0x1C259377, 0x2B4F5176, 0x72F11774, 0x459BD575, + 0x78DC897E, 0x4FB64B7F, 0x16080D7D, 0x2162CF7C, + 0xA4748079, 0x931E4278, 0xCAA0047A, 0xFDCAC67B, + 0xB02EBC6C, 0x87447E6D, 0xDEFA386F, 0xE990FA6E, + 0x6C86B56B, 0x5BEC776A, 0x02523168, 0x3538F369, + 0x087FAF62, 0x3F156D63, 0x66AB2B61, 0x51C1E960, + 0xD4D7A665, 0xE3BD6464, 0xBA032266, 0x8D69E067, + 0x20CBD748, 0x17A11549, 0x4E1F534B, 0x7975914A, + 0xFC63DE4F, 0xCB091C4E, 0x92B75A4C, 0xA5DD984D, + 0x989AC446, 0xAFF00647, 0xF64E4045, 0xC1248244, + 0x4432CD41, 0x73580F40, 0x2AE64942, 0x1D8C8B43, + 0x5068F154, 0x67023355, 0x3EBC7557, 0x09D6B756, + 0x8CC0F853, 0xBBAA3A52, 0xE2147C50, 0xD57EBE51, + 0xE839E25A, 0xDF53205B, 0x86ED6659, 0xB187A458, + 0x3491EB5D, 0x03FB295C, 0x5A456F5E, 0x6D2FAD5F, + 0x801B35E1, 0xB771F7E0, 0xEECFB1E2, 0xD9A573E3, + 0x5CB33CE6, 0x6BD9FEE7, 0x3267B8E5, 0x050D7AE4, + 0x384A26EF, 0x0F20E4EE, 0x569EA2EC, 0x61F460ED, + 0xE4E22FE8, 0xD388EDE9, 0x8A36ABEB, 0xBD5C69EA, + 0xF0B813FD, 0xC7D2D1FC, 0x9E6C97FE, 0xA90655FF, + 0x2C101AFA, 0x1B7AD8FB, 0x42C49EF9, 0x75AE5CF8, + 0x48E900F3, 0x7F83C2F2, 0x263D84F0, 0x115746F1, + 0x944109F4, 0xA32BCBF5, 0xFA958DF7, 0xCDFF4FF6, + 0x605D78D9, 0x5737BAD8, 0x0E89FCDA, 0x39E33EDB, + 0xBCF571DE, 0x8B9FB3DF, 0xD221F5DD, 0xE54B37DC, + 0xD80C6BD7, 0xEF66A9D6, 0xB6D8EFD4, 0x81B22DD5, + 0x04A462D0, 0x33CEA0D1, 0x6A70E6D3, 0x5D1A24D2, + 0x10FE5EC5, 0x27949CC4, 0x7E2ADAC6, 0x494018C7, + 0xCC5657C2, 0xFB3C95C3, 0xA282D3C1, 0x95E811C0, + 0xA8AF4DCB, 0x9FC58FCA, 0xC67BC9C8, 0xF1110BC9, + 0x740744CC, 0x436D86CD, 0x1AD3C0CF, 0x2DB902CE, + 0x4096AF91, 0x77FC6D90, 0x2E422B92, 0x1928E993, + 0x9C3EA696, 0xAB546497, 0xF2EA2295, 0xC580E094, + 0xF8C7BC9F, 0xCFAD7E9E, 0x9613389C, 0xA179FA9D, + 0x246FB598, 0x13057799, 0x4ABB319B, 0x7DD1F39A, + 0x3035898D, 0x075F4B8C, 0x5EE10D8E, 0x698BCF8F, + 0xEC9D808A, 0xDBF7428B, 0x82490489, 0xB523C688, + 0x88649A83, 0xBF0E5882, 0xE6B01E80, 0xD1DADC81, + 0x54CC9384, 0x63A65185, 0x3A181787, 0x0D72D586, + 0xA0D0E2A9, 0x97BA20A8, 0xCE0466AA, 0xF96EA4AB, + 0x7C78EBAE, 0x4B1229AF, 0x12AC6FAD, 0x25C6ADAC, + 0x1881F1A7, 0x2FEB33A6, 0x765575A4, 0x413FB7A5, + 0xC429F8A0, 0xF3433AA1, 0xAAFD7CA3, 0x9D97BEA2, + 0xD073C4B5, 0xE71906B4, 0xBEA740B6, 0x89CD82B7, + 0x0CDBCDB2, 0x3BB10FB3, 0x620F49B1, 0x55658BB0, + 0x6822D7BB, 0x5F4815BA, 0x06F653B8, 0x319C91B9, + 0xB48ADEBC, 0x83E01CBD, 0xDA5E5ABF, 0xED3498BE + }, { + 0x00000000, 0x6567BCB8, 0x8BC809AA, 0xEEAFB512, + 0x5797628F, 0x32F0DE37, 0xDC5F6B25, 0xB938D79D, + 0xEF28B4C5, 0x8A4F087D, 0x64E0BD6F, 0x018701D7, + 0xB8BFD64A, 0xDDD86AF2, 0x3377DFE0, 0x56106358, + 0x9F571950, 0xFA30A5E8, 0x149F10FA, 0x71F8AC42, + 0xC8C07BDF, 0xADA7C767, 0x43087275, 0x266FCECD, + 0x707FAD95, 0x1518112D, 0xFBB7A43F, 0x9ED01887, + 0x27E8CF1A, 0x428F73A2, 0xAC20C6B0, 0xC9477A08, + 0x3EAF32A0, 0x5BC88E18, 0xB5673B0A, 0xD00087B2, + 0x6938502F, 0x0C5FEC97, 0xE2F05985, 0x8797E53D, + 0xD1878665, 0xB4E03ADD, 0x5A4F8FCF, 0x3F283377, + 0x8610E4EA, 0xE3775852, 0x0DD8ED40, 0x68BF51F8, + 0xA1F82BF0, 0xC49F9748, 0x2A30225A, 0x4F579EE2, + 0xF66F497F, 0x9308F5C7, 0x7DA740D5, 0x18C0FC6D, + 0x4ED09F35, 0x2BB7238D, 0xC518969F, 0xA07F2A27, + 0x1947FDBA, 0x7C204102, 0x928FF410, 0xF7E848A8, + 0x3D58149B, 0x583FA823, 0xB6901D31, 0xD3F7A189, + 0x6ACF7614, 0x0FA8CAAC, 0xE1077FBE, 0x8460C306, + 0xD270A05E, 0xB7171CE6, 0x59B8A9F4, 0x3CDF154C, + 0x85E7C2D1, 0xE0807E69, 0x0E2FCB7B, 0x6B4877C3, + 0xA20F0DCB, 0xC768B173, 0x29C70461, 0x4CA0B8D9, + 0xF5986F44, 0x90FFD3FC, 0x7E5066EE, 0x1B37DA56, + 0x4D27B90E, 0x284005B6, 0xC6EFB0A4, 0xA3880C1C, + 0x1AB0DB81, 0x7FD76739, 0x9178D22B, 0xF41F6E93, + 0x03F7263B, 0x66909A83, 0x883F2F91, 0xED589329, + 0x546044B4, 0x3107F80C, 0xDFA84D1E, 0xBACFF1A6, + 0xECDF92FE, 0x89B82E46, 0x67179B54, 0x027027EC, + 0xBB48F071, 0xDE2F4CC9, 0x3080F9DB, 0x55E74563, + 0x9CA03F6B, 0xF9C783D3, 0x176836C1, 0x720F8A79, + 0xCB375DE4, 0xAE50E15C, 0x40FF544E, 0x2598E8F6, + 0x73888BAE, 0x16EF3716, 0xF8408204, 0x9D273EBC, + 0x241FE921, 0x41785599, 0xAFD7E08B, 0xCAB05C33, + 0x3BB659ED, 0x5ED1E555, 0xB07E5047, 0xD519ECFF, + 0x6C213B62, 0x094687DA, 0xE7E932C8, 0x828E8E70, + 0xD49EED28, 0xB1F95190, 0x5F56E482, 0x3A31583A, + 0x83098FA7, 0xE66E331F, 0x08C1860D, 0x6DA63AB5, + 0xA4E140BD, 0xC186FC05, 0x2F294917, 0x4A4EF5AF, + 0xF3762232, 0x96119E8A, 0x78BE2B98, 0x1DD99720, + 0x4BC9F478, 0x2EAE48C0, 0xC001FDD2, 0xA566416A, + 0x1C5E96F7, 0x79392A4F, 0x97969F5D, 0xF2F123E5, + 0x05196B4D, 0x607ED7F5, 0x8ED162E7, 0xEBB6DE5F, + 0x528E09C2, 0x37E9B57A, 0xD9460068, 0xBC21BCD0, + 0xEA31DF88, 0x8F566330, 0x61F9D622, 0x049E6A9A, + 0xBDA6BD07, 0xD8C101BF, 0x366EB4AD, 0x53090815, + 0x9A4E721D, 0xFF29CEA5, 0x11867BB7, 0x74E1C70F, + 0xCDD91092, 0xA8BEAC2A, 0x46111938, 0x2376A580, + 0x7566C6D8, 0x10017A60, 0xFEAECF72, 0x9BC973CA, + 0x22F1A457, 0x479618EF, 0xA939ADFD, 0xCC5E1145, + 0x06EE4D76, 0x6389F1CE, 0x8D2644DC, 0xE841F864, + 0x51792FF9, 0x341E9341, 0xDAB12653, 0xBFD69AEB, + 0xE9C6F9B3, 0x8CA1450B, 0x620EF019, 0x07694CA1, + 0xBE519B3C, 0xDB362784, 0x35999296, 0x50FE2E2E, + 0x99B95426, 0xFCDEE89E, 0x12715D8C, 0x7716E134, + 0xCE2E36A9, 0xAB498A11, 0x45E63F03, 0x208183BB, + 0x7691E0E3, 0x13F65C5B, 0xFD59E949, 0x983E55F1, + 0x2106826C, 0x44613ED4, 0xAACE8BC6, 0xCFA9377E, + 0x38417FD6, 0x5D26C36E, 0xB389767C, 0xD6EECAC4, + 0x6FD61D59, 0x0AB1A1E1, 0xE41E14F3, 0x8179A84B, + 0xD769CB13, 0xB20E77AB, 0x5CA1C2B9, 0x39C67E01, + 0x80FEA99C, 0xE5991524, 0x0B36A036, 0x6E511C8E, + 0xA7166686, 0xC271DA3E, 0x2CDE6F2C, 0x49B9D394, + 0xF0810409, 0x95E6B8B1, 0x7B490DA3, 0x1E2EB11B, + 0x483ED243, 0x2D596EFB, 0xC3F6DBE9, 0xA6916751, + 0x1FA9B0CC, 0x7ACE0C74, 0x9461B966, 0xF10605DE + }, { + 0x00000000, 0xB029603D, 0x6053C07A, 0xD07AA047, + 0xC0A680F5, 0x708FE0C8, 0xA0F5408F, 0x10DC20B2, + 0xC14B7030, 0x7162100D, 0xA118B04A, 0x1131D077, + 0x01EDF0C5, 0xB1C490F8, 0x61BE30BF, 0xD1975082, + 0x8297E060, 0x32BE805D, 0xE2C4201A, 0x52ED4027, + 0x42316095, 0xF21800A8, 0x2262A0EF, 0x924BC0D2, + 0x43DC9050, 0xF3F5F06D, 0x238F502A, 0x93A63017, + 0x837A10A5, 0x33537098, 0xE329D0DF, 0x5300B0E2, + 0x042FC1C1, 0xB406A1FC, 0x647C01BB, 0xD4556186, + 0xC4894134, 0x74A02109, 0xA4DA814E, 0x14F3E173, + 0xC564B1F1, 0x754DD1CC, 0xA537718B, 0x151E11B6, + 0x05C23104, 0xB5EB5139, 0x6591F17E, 0xD5B89143, + 0x86B821A1, 0x3691419C, 0xE6EBE1DB, 0x56C281E6, + 0x461EA154, 0xF637C169, 0x264D612E, 0x96640113, + 0x47F35191, 0xF7DA31AC, 0x27A091EB, 0x9789F1D6, + 0x8755D164, 0x377CB159, 0xE706111E, 0x572F7123, + 0x4958F358, 0xF9719365, 0x290B3322, 0x9922531F, + 0x89FE73AD, 0x39D71390, 0xE9ADB3D7, 0x5984D3EA, + 0x88138368, 0x383AE355, 0xE8404312, 0x5869232F, + 0x48B5039D, 0xF89C63A0, 0x28E6C3E7, 0x98CFA3DA, + 0xCBCF1338, 0x7BE67305, 0xAB9CD342, 0x1BB5B37F, + 0x0B6993CD, 0xBB40F3F0, 0x6B3A53B7, 0xDB13338A, + 0x0A846308, 0xBAAD0335, 0x6AD7A372, 0xDAFEC34F, + 0xCA22E3FD, 0x7A0B83C0, 0xAA712387, 0x1A5843BA, + 0x4D773299, 0xFD5E52A4, 0x2D24F2E3, 0x9D0D92DE, + 0x8DD1B26C, 0x3DF8D251, 0xED827216, 0x5DAB122B, + 0x8C3C42A9, 0x3C152294, 0xEC6F82D3, 0x5C46E2EE, + 0x4C9AC25C, 0xFCB3A261, 0x2CC90226, 0x9CE0621B, + 0xCFE0D2F9, 0x7FC9B2C4, 0xAFB31283, 0x1F9A72BE, + 0x0F46520C, 0xBF6F3231, 0x6F159276, 0xDF3CF24B, + 0x0EABA2C9, 0xBE82C2F4, 0x6EF862B3, 0xDED1028E, + 0xCE0D223C, 0x7E244201, 0xAE5EE246, 0x1E77827B, + 0x92B0E6B1, 0x2299868C, 0xF2E326CB, 0x42CA46F6, + 0x52166644, 0xE23F0679, 0x3245A63E, 0x826CC603, + 0x53FB9681, 0xE3D2F6BC, 0x33A856FB, 0x838136C6, + 0x935D1674, 0x23747649, 0xF30ED60E, 0x4327B633, + 0x102706D1, 0xA00E66EC, 0x7074C6AB, 0xC05DA696, + 0xD0818624, 0x60A8E619, 0xB0D2465E, 0x00FB2663, + 0xD16C76E1, 0x614516DC, 0xB13FB69B, 0x0116D6A6, + 0x11CAF614, 0xA1E39629, 0x7199366E, 0xC1B05653, + 0x969F2770, 0x26B6474D, 0xF6CCE70A, 0x46E58737, + 0x5639A785, 0xE610C7B8, 0x366A67FF, 0x864307C2, + 0x57D45740, 0xE7FD377D, 0x3787973A, 0x87AEF707, + 0x9772D7B5, 0x275BB788, 0xF72117CF, 0x470877F2, + 0x1408C710, 0xA421A72D, 0x745B076A, 0xC4726757, + 0xD4AE47E5, 0x648727D8, 0xB4FD879F, 0x04D4E7A2, + 0xD543B720, 0x656AD71D, 0xB510775A, 0x05391767, + 0x15E537D5, 0xA5CC57E8, 0x75B6F7AF, 0xC59F9792, + 0xDBE815E9, 0x6BC175D4, 0xBBBBD593, 0x0B92B5AE, + 0x1B4E951C, 0xAB67F521, 0x7B1D5566, 0xCB34355B, + 0x1AA365D9, 0xAA8A05E4, 0x7AF0A5A3, 0xCAD9C59E, + 0xDA05E52C, 0x6A2C8511, 0xBA562556, 0x0A7F456B, + 0x597FF589, 0xE95695B4, 0x392C35F3, 0x890555CE, + 0x99D9757C, 0x29F01541, 0xF98AB506, 0x49A3D53B, + 0x983485B9, 0x281DE584, 0xF86745C3, 0x484E25FE, + 0x5892054C, 0xE8BB6571, 0x38C1C536, 0x88E8A50B, + 0xDFC7D428, 0x6FEEB415, 0xBF941452, 0x0FBD746F, + 0x1F6154DD, 0xAF4834E0, 0x7F3294A7, 0xCF1BF49A, + 0x1E8CA418, 0xAEA5C425, 0x7EDF6462, 0xCEF6045F, + 0xDE2A24ED, 0x6E0344D0, 0xBE79E497, 0x0E5084AA, + 0x5D503448, 0xED795475, 0x3D03F432, 0x8D2A940F, + 0x9DF6B4BD, 0x2DDFD480, 0xFDA574C7, 0x4D8C14FA, + 0x9C1B4478, 0x2C322445, 0xFC488402, 0x4C61E43F, + 0x5CBDC48D, 0xEC94A4B0, 0x3CEE04F7, 0x8CC764CA + }, { + 0x00000000, 0xA5D35CCB, 0x0BA1C84D, 0xAE729486, + 0x1642919B, 0xB391CD50, 0x1DE359D6, 0xB830051D, + 0x6D8253EC, 0xC8510F27, 0x66239BA1, 0xC3F0C76A, + 0x7BC0C277, 0xDE139EBC, 0x70610A3A, 0xD5B256F1, + 0x9B02D603, 0x3ED18AC8, 0x90A31E4E, 0x35704285, + 0x8D404798, 0x28931B53, 0x86E18FD5, 0x2332D31E, + 0xF68085EF, 0x5353D924, 0xFD214DA2, 0x58F21169, + 0xE0C21474, 0x451148BF, 0xEB63DC39, 0x4EB080F2, + 0x3605AC07, 0x93D6F0CC, 0x3DA4644A, 0x98773881, + 0x20473D9C, 0x85946157, 0x2BE6F5D1, 0x8E35A91A, + 0x5B87FFEB, 0xFE54A320, 0x502637A6, 0xF5F56B6D, + 0x4DC56E70, 0xE81632BB, 0x4664A63D, 0xE3B7FAF6, + 0xAD077A04, 0x08D426CF, 0xA6A6B249, 0x0375EE82, + 0xBB45EB9F, 0x1E96B754, 0xB0E423D2, 0x15377F19, + 0xC08529E8, 0x65567523, 0xCB24E1A5, 0x6EF7BD6E, + 0xD6C7B873, 0x7314E4B8, 0xDD66703E, 0x78B52CF5, + 0x6C0A580F, 0xC9D904C4, 0x67AB9042, 0xC278CC89, + 0x7A48C994, 0xDF9B955F, 0x71E901D9, 0xD43A5D12, + 0x01880BE3, 0xA45B5728, 0x0A29C3AE, 0xAFFA9F65, + 0x17CA9A78, 0xB219C6B3, 0x1C6B5235, 0xB9B80EFE, + 0xF7088E0C, 0x52DBD2C7, 0xFCA94641, 0x597A1A8A, + 0xE14A1F97, 0x4499435C, 0xEAEBD7DA, 0x4F388B11, + 0x9A8ADDE0, 0x3F59812B, 0x912B15AD, 0x34F84966, + 0x8CC84C7B, 0x291B10B0, 0x87698436, 0x22BAD8FD, + 0x5A0FF408, 0xFFDCA8C3, 0x51AE3C45, 0xF47D608E, + 0x4C4D6593, 0xE99E3958, 0x47ECADDE, 0xE23FF115, + 0x378DA7E4, 0x925EFB2F, 0x3C2C6FA9, 0x99FF3362, + 0x21CF367F, 0x841C6AB4, 0x2A6EFE32, 0x8FBDA2F9, + 0xC10D220B, 0x64DE7EC0, 0xCAACEA46, 0x6F7FB68D, + 0xD74FB390, 0x729CEF5B, 0xDCEE7BDD, 0x793D2716, + 0xAC8F71E7, 0x095C2D2C, 0xA72EB9AA, 0x02FDE561, + 0xBACDE07C, 0x1F1EBCB7, 0xB16C2831, 0x14BF74FA, + 0xD814B01E, 0x7DC7ECD5, 0xD3B57853, 0x76662498, + 0xCE562185, 0x6B857D4E, 0xC5F7E9C8, 0x6024B503, + 0xB596E3F2, 0x1045BF39, 0xBE372BBF, 0x1BE47774, + 0xA3D47269, 0x06072EA2, 0xA875BA24, 0x0DA6E6EF, + 0x4316661D, 0xE6C53AD6, 0x48B7AE50, 0xED64F29B, + 0x5554F786, 0xF087AB4D, 0x5EF53FCB, 0xFB266300, + 0x2E9435F1, 0x8B47693A, 0x2535FDBC, 0x80E6A177, + 0x38D6A46A, 0x9D05F8A1, 0x33776C27, 0x96A430EC, + 0xEE111C19, 0x4BC240D2, 0xE5B0D454, 0x4063889F, + 0xF8538D82, 0x5D80D149, 0xF3F245CF, 0x56211904, + 0x83934FF5, 0x2640133E, 0x883287B8, 0x2DE1DB73, + 0x95D1DE6E, 0x300282A5, 0x9E701623, 0x3BA34AE8, + 0x7513CA1A, 0xD0C096D1, 0x7EB20257, 0xDB615E9C, + 0x63515B81, 0xC682074A, 0x68F093CC, 0xCD23CF07, + 0x189199F6, 0xBD42C53D, 0x133051BB, 0xB6E30D70, + 0x0ED3086D, 0xAB0054A6, 0x0572C020, 0xA0A19CEB, + 0xB41EE811, 0x11CDB4DA, 0xBFBF205C, 0x1A6C7C97, + 0xA25C798A, 0x078F2541, 0xA9FDB1C7, 0x0C2EED0C, + 0xD99CBBFD, 0x7C4FE736, 0xD23D73B0, 0x77EE2F7B, + 0xCFDE2A66, 0x6A0D76AD, 0xC47FE22B, 0x61ACBEE0, + 0x2F1C3E12, 0x8ACF62D9, 0x24BDF65F, 0x816EAA94, + 0x395EAF89, 0x9C8DF342, 0x32FF67C4, 0x972C3B0F, + 0x429E6DFE, 0xE74D3135, 0x493FA5B3, 0xECECF978, + 0x54DCFC65, 0xF10FA0AE, 0x5F7D3428, 0xFAAE68E3, + 0x821B4416, 0x27C818DD, 0x89BA8C5B, 0x2C69D090, + 0x9459D58D, 0x318A8946, 0x9FF81DC0, 0x3A2B410B, + 0xEF9917FA, 0x4A4A4B31, 0xE438DFB7, 0x41EB837C, + 0xF9DB8661, 0x5C08DAAA, 0xF27A4E2C, 0x57A912E7, + 0x19199215, 0xBCCACEDE, 0x12B85A58, 0xB76B0693, + 0x0F5B038E, 0xAA885F45, 0x04FACBC3, 0xA1299708, + 0x749BC1F9, 0xD1489D32, 0x7F3A09B4, 0xDAE9557F, + 0x62D95062, 0xC70A0CA9, 0x6978982F, 0xCCABC4E4 + }, { + 0x00000000, 0xB40B77A6, 0x29119F97, 0x9D1AE831, + 0x13244FF4, 0xA72F3852, 0x3A35D063, 0x8E3EA7C5, + 0x674EEF33, 0xD3459895, 0x4E5F70A4, 0xFA540702, + 0x746AA0C7, 0xC061D761, 0x5D7B3F50, 0xE97048F6, + 0xCE9CDE67, 0x7A97A9C1, 0xE78D41F0, 0x53863656, + 0xDDB89193, 0x69B3E635, 0xF4A90E04, 0x40A279A2, + 0xA9D23154, 0x1DD946F2, 0x80C3AEC3, 0x34C8D965, + 0xBAF67EA0, 0x0EFD0906, 0x93E7E137, 0x27EC9691, + 0x9C39BDCF, 0x2832CA69, 0xB5282258, 0x012355FE, + 0x8F1DF23B, 0x3B16859D, 0xA60C6DAC, 0x12071A0A, + 0xFB7752FC, 0x4F7C255A, 0xD266CD6B, 0x666DBACD, + 0xE8531D08, 0x5C586AAE, 0xC142829F, 0x7549F539, + 0x52A563A8, 0xE6AE140E, 0x7BB4FC3F, 0xCFBF8B99, + 0x41812C5C, 0xF58A5BFA, 0x6890B3CB, 0xDC9BC46D, + 0x35EB8C9B, 0x81E0FB3D, 0x1CFA130C, 0xA8F164AA, + 0x26CFC36F, 0x92C4B4C9, 0x0FDE5CF8, 0xBBD52B5E, + 0x79750B44, 0xCD7E7CE2, 0x506494D3, 0xE46FE375, + 0x6A5144B0, 0xDE5A3316, 0x4340DB27, 0xF74BAC81, + 0x1E3BE477, 0xAA3093D1, 0x372A7BE0, 0x83210C46, + 0x0D1FAB83, 0xB914DC25, 0x240E3414, 0x900543B2, + 0xB7E9D523, 0x03E2A285, 0x9EF84AB4, 0x2AF33D12, + 0xA4CD9AD7, 0x10C6ED71, 0x8DDC0540, 0x39D772E6, + 0xD0A73A10, 0x64AC4DB6, 0xF9B6A587, 0x4DBDD221, + 0xC38375E4, 0x77880242, 0xEA92EA73, 0x5E999DD5, + 0xE54CB68B, 0x5147C12D, 0xCC5D291C, 0x78565EBA, + 0xF668F97F, 0x42638ED9, 0xDF7966E8, 0x6B72114E, + 0x820259B8, 0x36092E1E, 0xAB13C62F, 0x1F18B189, + 0x9126164C, 0x252D61EA, 0xB83789DB, 0x0C3CFE7D, + 0x2BD068EC, 0x9FDB1F4A, 0x02C1F77B, 0xB6CA80DD, + 0x38F42718, 0x8CFF50BE, 0x11E5B88F, 0xA5EECF29, + 0x4C9E87DF, 0xF895F079, 0x658F1848, 0xD1846FEE, + 0x5FBAC82B, 0xEBB1BF8D, 0x76AB57BC, 0xC2A0201A, + 0xF2EA1688, 0x46E1612E, 0xDBFB891F, 0x6FF0FEB9, + 0xE1CE597C, 0x55C52EDA, 0xC8DFC6EB, 0x7CD4B14D, + 0x95A4F9BB, 0x21AF8E1D, 0xBCB5662C, 0x08BE118A, + 0x8680B64F, 0x328BC1E9, 0xAF9129D8, 0x1B9A5E7E, + 0x3C76C8EF, 0x887DBF49, 0x15675778, 0xA16C20DE, + 0x2F52871B, 0x9B59F0BD, 0x0643188C, 0xB2486F2A, + 0x5B3827DC, 0xEF33507A, 0x7229B84B, 0xC622CFED, + 0x481C6828, 0xFC171F8E, 0x610DF7BF, 0xD5068019, + 0x6ED3AB47, 0xDAD8DCE1, 0x47C234D0, 0xF3C94376, + 0x7DF7E4B3, 0xC9FC9315, 0x54E67B24, 0xE0ED0C82, + 0x099D4474, 0xBD9633D2, 0x208CDBE3, 0x9487AC45, + 0x1AB90B80, 0xAEB27C26, 0x33A89417, 0x87A3E3B1, + 0xA04F7520, 0x14440286, 0x895EEAB7, 0x3D559D11, + 0xB36B3AD4, 0x07604D72, 0x9A7AA543, 0x2E71D2E5, + 0xC7019A13, 0x730AEDB5, 0xEE100584, 0x5A1B7222, + 0xD425D5E7, 0x602EA241, 0xFD344A70, 0x493F3DD6, + 0x8B9F1DCC, 0x3F946A6A, 0xA28E825B, 0x1685F5FD, + 0x98BB5238, 0x2CB0259E, 0xB1AACDAF, 0x05A1BA09, + 0xECD1F2FF, 0x58DA8559, 0xC5C06D68, 0x71CB1ACE, + 0xFFF5BD0B, 0x4BFECAAD, 0xD6E4229C, 0x62EF553A, + 0x4503C3AB, 0xF108B40D, 0x6C125C3C, 0xD8192B9A, + 0x56278C5F, 0xE22CFBF9, 0x7F3613C8, 0xCB3D646E, + 0x224D2C98, 0x96465B3E, 0x0B5CB30F, 0xBF57C4A9, + 0x3169636C, 0x856214CA, 0x1878FCFB, 0xAC738B5D, + 0x17A6A003, 0xA3ADD7A5, 0x3EB73F94, 0x8ABC4832, + 0x0482EFF7, 0xB0899851, 0x2D937060, 0x999807C6, + 0x70E84F30, 0xC4E33896, 0x59F9D0A7, 0xEDF2A701, + 0x63CC00C4, 0xD7C77762, 0x4ADD9F53, 0xFED6E8F5, + 0xD93A7E64, 0x6D3109C2, 0xF02BE1F3, 0x44209655, + 0xCA1E3190, 0x7E154636, 0xE30FAE07, 0x5704D9A1, + 0xBE749157, 0x0A7FE6F1, 0x97650EC0, 0x236E7966, + 0xAD50DEA3, 0x195BA905, 0x84414134, 0x304A3692 + }, { + 0x00000000, 0x9E00AACC, 0x7D072542, 0xE3078F8E, + 0xFA0E4A84, 0x640EE048, 0x87096FC6, 0x1909C50A, + 0xB51BE5D3, 0x2B1B4F1F, 0xC81CC091, 0x561C6A5D, + 0x4F15AF57, 0xD115059B, 0x32128A15, 0xAC1220D9, + 0x2B31BB7C, 0xB53111B0, 0x56369E3E, 0xC83634F2, + 0xD13FF1F8, 0x4F3F5B34, 0xAC38D4BA, 0x32387E76, + 0x9E2A5EAF, 0x002AF463, 0xE32D7BED, 0x7D2DD121, + 0x6424142B, 0xFA24BEE7, 0x19233169, 0x87239BA5, + 0x566276F9, 0xC862DC35, 0x2B6553BB, 0xB565F977, + 0xAC6C3C7D, 0x326C96B1, 0xD16B193F, 0x4F6BB3F3, + 0xE379932A, 0x7D7939E6, 0x9E7EB668, 0x007E1CA4, + 0x1977D9AE, 0x87777362, 0x6470FCEC, 0xFA705620, + 0x7D53CD85, 0xE3536749, 0x0054E8C7, 0x9E54420B, + 0x875D8701, 0x195D2DCD, 0xFA5AA243, 0x645A088F, + 0xC8482856, 0x5648829A, 0xB54F0D14, 0x2B4FA7D8, + 0x324662D2, 0xAC46C81E, 0x4F414790, 0xD141ED5C, + 0xEDC29D29, 0x73C237E5, 0x90C5B86B, 0x0EC512A7, + 0x17CCD7AD, 0x89CC7D61, 0x6ACBF2EF, 0xF4CB5823, + 0x58D978FA, 0xC6D9D236, 0x25DE5DB8, 0xBBDEF774, + 0xA2D7327E, 0x3CD798B2, 0xDFD0173C, 0x41D0BDF0, + 0xC6F32655, 0x58F38C99, 0xBBF40317, 0x25F4A9DB, + 0x3CFD6CD1, 0xA2FDC61D, 0x41FA4993, 0xDFFAE35F, + 0x73E8C386, 0xEDE8694A, 0x0EEFE6C4, 0x90EF4C08, + 0x89E68902, 0x17E623CE, 0xF4E1AC40, 0x6AE1068C, + 0xBBA0EBD0, 0x25A0411C, 0xC6A7CE92, 0x58A7645E, + 0x41AEA154, 0xDFAE0B98, 0x3CA98416, 0xA2A92EDA, + 0x0EBB0E03, 0x90BBA4CF, 0x73BC2B41, 0xEDBC818D, + 0xF4B54487, 0x6AB5EE4B, 0x89B261C5, 0x17B2CB09, + 0x909150AC, 0x0E91FA60, 0xED9675EE, 0x7396DF22, + 0x6A9F1A28, 0xF49FB0E4, 0x17983F6A, 0x899895A6, + 0x258AB57F, 0xBB8A1FB3, 0x588D903D, 0xC68D3AF1, + 0xDF84FFFB, 0x41845537, 0xA283DAB9, 0x3C837075, + 0xDA853B53, 0x4485919F, 0xA7821E11, 0x3982B4DD, + 0x208B71D7, 0xBE8BDB1B, 0x5D8C5495, 0xC38CFE59, + 0x6F9EDE80, 0xF19E744C, 0x1299FBC2, 0x8C99510E, + 0x95909404, 0x0B903EC8, 0xE897B146, 0x76971B8A, + 0xF1B4802F, 0x6FB42AE3, 0x8CB3A56D, 0x12B30FA1, + 0x0BBACAAB, 0x95BA6067, 0x76BDEFE9, 0xE8BD4525, + 0x44AF65FC, 0xDAAFCF30, 0x39A840BE, 0xA7A8EA72, + 0xBEA12F78, 0x20A185B4, 0xC3A60A3A, 0x5DA6A0F6, + 0x8CE74DAA, 0x12E7E766, 0xF1E068E8, 0x6FE0C224, + 0x76E9072E, 0xE8E9ADE2, 0x0BEE226C, 0x95EE88A0, + 0x39FCA879, 0xA7FC02B5, 0x44FB8D3B, 0xDAFB27F7, + 0xC3F2E2FD, 0x5DF24831, 0xBEF5C7BF, 0x20F56D73, + 0xA7D6F6D6, 0x39D65C1A, 0xDAD1D394, 0x44D17958, + 0x5DD8BC52, 0xC3D8169E, 0x20DF9910, 0xBEDF33DC, + 0x12CD1305, 0x8CCDB9C9, 0x6FCA3647, 0xF1CA9C8B, + 0xE8C35981, 0x76C3F34D, 0x95C47CC3, 0x0BC4D60F, + 0x3747A67A, 0xA9470CB6, 0x4A408338, 0xD44029F4, + 0xCD49ECFE, 0x53494632, 0xB04EC9BC, 0x2E4E6370, + 0x825C43A9, 0x1C5CE965, 0xFF5B66EB, 0x615BCC27, + 0x7852092D, 0xE652A3E1, 0x05552C6F, 0x9B5586A3, + 0x1C761D06, 0x8276B7CA, 0x61713844, 0xFF719288, + 0xE6785782, 0x7878FD4E, 0x9B7F72C0, 0x057FD80C, + 0xA96DF8D5, 0x376D5219, 0xD46ADD97, 0x4A6A775B, + 0x5363B251, 0xCD63189D, 0x2E649713, 0xB0643DDF, + 0x6125D083, 0xFF257A4F, 0x1C22F5C1, 0x82225F0D, + 0x9B2B9A07, 0x052B30CB, 0xE62CBF45, 0x782C1589, + 0xD43E3550, 0x4A3E9F9C, 0xA9391012, 0x3739BADE, + 0x2E307FD4, 0xB030D518, 0x53375A96, 0xCD37F05A, + 0x4A146BFF, 0xD414C133, 0x37134EBD, 0xA913E471, + 0xB01A217B, 0x2E1A8BB7, 0xCD1D0439, 0x531DAEF5, + 0xFF0F8E2C, 0x610F24E0, 0x8208AB6E, 0x1C0801A2, + 0x0501C4A8, 0x9B016E64, 0x7806E1EA, 0xE6064B26 + } +}; Index: contrib/xz/src/liblzma/check/crc32_x86.S =================================================================== --- contrib/xz/src/liblzma/check/crc32_x86.S (revision 0) +++ contrib/xz/src/liblzma/check/crc32_x86.S (revision 0) @@ -0,0 +1,304 @@ +/* + * Speed-optimized CRC32 using slicing-by-eight algorithm + * + * This uses only i386 instructions, but it is optimized for i686 and later + * (including e.g. Pentium II/III/IV, Athlon XP, and Core 2). For i586 + * (e.g. Pentium), slicing-by-four would be better, and even the C version + * of slicing-by-eight built with gcc -march=i586 tends to be a little bit + * better than this. Very few probably run this code on i586 or older x86 + * so this shouldn't be a problem in practice. + * + * Authors: Igor Pavlov (original version) + * Lasse Collin (AT&T syntax, PIC support, better portability) + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * This code needs lzma_crc32_table, which can be created using the + * following C code: + +uint32_t lzma_crc32_table[8][256]; + +void +init_table(void) +{ + // IEEE-802.3 + static const uint32_t poly32 = UINT32_C(0xEDB88320); + + // Castagnoli + // static const uint32_t poly32 = UINT32_C(0x82F63B78); + + // Koopman + // static const uint32_t poly32 = UINT32_C(0xEB31D82E); + + for (size_t s = 0; s < 8; ++s) { + for (size_t b = 0; b < 256; ++b) { + uint32_t r = s == 0 ? b : lzma_crc32_table[s - 1][b]; + + for (size_t i = 0; i < 8; ++i) { + if (r & 1) + r = (r >> 1) ^ poly32; + else + r >>= 1; + } + + lzma_crc32_table[s][b] = r; + } + } +} + + * The prototype of the CRC32 function: + * extern uint32_t lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc); + */ + +/* + * On some systems, the functions need to be prefixed. The prefix is + * usually an underscore. + */ +#ifndef __USER_LABEL_PREFIX__ +# define __USER_LABEL_PREFIX__ +#endif +#define MAKE_SYM_CAT(prefix, sym) prefix ## sym +#define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) +#define LZMA_CRC32 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32) +#define LZMA_CRC32_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32_table) + +/* + * Solaris assembler doesn't have .p2align, and Darwin uses .align + * differently than GNU/Linux and Solaris. + */ +#if defined(__APPLE__) || defined(__MSDOS__) +# define ALIGN(pow2, abs) .align pow2 +#else +# define ALIGN(pow2, abs) .align abs +#endif + + .text + .globl LZMA_CRC32 + +#if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \ + && !defined(__MSDOS__) + .type LZMA_CRC32, @function +#endif + + ALIGN(4, 16) +LZMA_CRC32: + /* + * Register usage: + * %eax crc + * %esi buf + * %edi size or buf + size + * %ebx lzma_crc32_table + * %ebp Table index + * %ecx Temporary + * %edx Temporary + */ + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + movl 0x14(%esp), %esi /* buf */ + movl 0x18(%esp), %edi /* size */ + movl 0x1C(%esp), %eax /* crc */ + + /* + * Store the address of lzma_crc32_table to %ebx. This is needed to + * get position-independent code (PIC). + * + * The PIC macro is defined by libtool, while __PIC__ is defined + * by GCC but only on some systems. Testing for both makes it simpler + * to test this code without libtool, and keeps the code working also + * when built with libtool but using something else than GCC. + * + * I understood that libtool may define PIC on Windows even though + * the code in Windows DLLs is not PIC in sense that it is in ELF + * binaries, so we need a separate check to always use the non-PIC + * code on Windows. + */ +#if (!defined(PIC) && !defined(__PIC__)) \ + || (defined(_WIN32) || defined(__CYGWIN__)) + /* Not PIC */ + movl $ LZMA_CRC32_TABLE, %ebx +#elif defined(__APPLE__) + /* Mach-O */ + call .L_get_pc +.L_pic: + leal .L_lzma_crc32_table$non_lazy_ptr-.L_pic(%ebx), %ebx + movl (%ebx), %ebx +#else + /* ELF */ + call .L_get_pc + addl $_GLOBAL_OFFSET_TABLE_, %ebx + movl LZMA_CRC32_TABLE@GOT(%ebx), %ebx +#endif + + /* Complem