Commit 32643ecd authored by longpanda's avatar longpanda
Browse files

compress 7za.exe

parent d2e59305
/*
* Branch/Call/Jump (BCJ) filter decoders
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include "xz_private.h"
/*
* The rest of the file is inside this ifdef. It makes things a little more
* convenient when building without support for any BCJ filters.
*/
#ifdef XZ_DEC_BCJ
struct xz_dec_bcj {
/* Type of the BCJ filter being used */
enum {
BCJ_X86 = 4, /* x86 or x86-64 */
BCJ_POWERPC = 5, /* Big endian only */
BCJ_IA64 = 6, /* Big or little endian */
BCJ_ARM = 7, /* Little endian only */
BCJ_ARMTHUMB = 8, /* Little endian only */
BCJ_SPARC = 9 /* Big or little endian */
} type;
/*
* Return value of the next filter in the chain. We need to preserve
* this information across calls, because we must not call the next
* filter anymore once it has returned XZ_STREAM_END.
*/
enum xz_ret ret;
/* True if we are operating in single-call mode. */
bool single_call;
/*
* Absolute position relative to the beginning of the uncompressed
* data (in a single .xz Block). We care only about the lowest 32
* bits so this doesn't need to be uint64_t even with big files.
*/
uint32_t pos;
/* x86 filter state */
uint32_t x86_prev_mask;
/* Temporary space to hold the variables from struct xz_buf */
uint8_t *out;
size_t out_pos;
size_t out_size;
struct {
/* Amount of already filtered data in the beginning of buf */
size_t filtered;
/* Total amount of data currently stored in buf */
size_t size;
/*
* Buffer to hold a mix of filtered and unfiltered data. This
* needs to be big enough to hold Alignment + 2 * Look-ahead:
*
* Type Alignment Look-ahead
* x86 1 4
* PowerPC 4 0
* IA-64 16 0
* ARM 4 0
* ARM-Thumb 2 2
* SPARC 4 0
*/
uint8_t buf[16];
} temp;
};
#ifdef XZ_DEC_X86
/*
* This is used to test the most significant byte of a memory address
* in an x86 instruction.
*/
static inline int bcj_x86_test_msbyte(uint8_t b)
{
return b == 0x00 || b == 0xFF;
}
static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
static const bool mask_to_allowed_status[8]
= { true, true, true, false, true, false, false, false };
static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
size_t i;
size_t prev_pos = (size_t)-1;
uint32_t prev_mask = s->x86_prev_mask;
uint32_t src;
uint32_t dest;
uint32_t j;
uint8_t b;
if (size <= 4)
return 0;
size -= 4;
for (i = 0; i < size; ++i) {
if ((buf[i] & 0xFE) != 0xE8)
continue;
prev_pos = i - prev_pos;
if (prev_pos > 3) {
prev_mask = 0;
} else {
prev_mask = (prev_mask << (prev_pos - 1)) & 7;
if (prev_mask != 0) {
b = buf[i + 4 - mask_to_bit_num[prev_mask]];
if (!mask_to_allowed_status[prev_mask]
|| bcj_x86_test_msbyte(b)) {
prev_pos = i;
prev_mask = (prev_mask << 1) | 1;
continue;
}
}
}
prev_pos = i;
if (bcj_x86_test_msbyte(buf[i + 4])) {
src = get_unaligned_le32(buf + i + 1);
while (true) {
dest = src - (s->pos + (uint32_t)i + 5);
if (prev_mask == 0)
break;
j = mask_to_bit_num[prev_mask] * 8;
b = (uint8_t)(dest >> (24 - j));
if (!bcj_x86_test_msbyte(b))
break;
src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
}
dest &= 0x01FFFFFF;
dest |= (uint32_t)0 - (dest & 0x01000000);
put_unaligned_le32(dest, buf + i + 1);
i += 4;
} else {
prev_mask = (prev_mask << 1) | 1;
}
}
prev_pos = i - prev_pos;
s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
return i;
}
#endif
#ifdef XZ_DEC_POWERPC
static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
size_t i;
uint32_t instr;
for (i = 0; i + 4 <= size; i += 4) {
instr = get_unaligned_be32(buf + i);
if ((instr & 0xFC000003) == 0x48000001) {
instr &= 0x03FFFFFC;
instr -= s->pos + (uint32_t)i;
instr &= 0x03FFFFFC;
instr |= 0x48000001;
put_unaligned_be32(instr, buf + i);
}
}
return i;
}
#endif
#ifdef XZ_DEC_IA64
static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
static const uint8_t branch_table[32] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
4, 4, 6, 6, 0, 0, 7, 7,
4, 4, 0, 0, 4, 4, 0, 0
};
/*
* The local variables take a little bit stack space, but it's less
* than what LZMA2 decoder takes, so it doesn't make sense to reduce
* stack usage here without doing that for the LZMA2 decoder too.
*/
/* Loop counters */
size_t i;
size_t j;
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
uint32_t slot;
/* Bitwise offset of the instruction indicated by slot */
uint32_t bit_pos;
/* bit_pos split into byte and bit parts */
uint32_t byte_pos;
uint32_t bit_res;
/* Address part of an instruction */
uint32_t addr;
/* Mask used to detect which instructions to convert */
uint32_t mask;
/* 41-bit instruction stored somewhere in the lowest 48 bits */
uint64_t instr;
/* Instruction normalized with bit_res for easier manipulation */
uint64_t norm;
for (i = 0; i + 16 <= size; i += 16) {
mask = branch_table[buf[i] & 0x1F];
for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
if (((mask >> slot) & 1) == 0)
continue;
byte_pos = bit_pos >> 3;
bit_res = bit_pos & 7;
instr = 0;
for (j = 0; j < 6; ++j)
instr |= (uint64_t)(buf[i + j + byte_pos])
<< (8 * j);
norm = instr >> bit_res;
if (((norm >> 37) & 0x0F) == 0x05
&& ((norm >> 9) & 0x07) == 0) {
addr = (norm >> 13) & 0x0FFFFF;
addr |= ((uint32_t)(norm >> 36) & 1) << 20;
addr <<= 4;
addr -= s->pos + (uint32_t)i;
addr >>= 4;
norm &= ~((uint64_t)0x8FFFFF << 13);
norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
norm |= (uint64_t)(addr & 0x100000)
<< (36 - 20);
instr &= (1 << bit_res) - 1;
instr |= norm << bit_res;
for (j = 0; j < 6; j++)
buf[i + j + byte_pos]
= (uint8_t)(instr >> (8 * j));
}
}
}
return i;
}
#endif
#ifdef XZ_DEC_ARM
static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
size_t i;
uint32_t addr;
for (i = 0; i + 4 <= size; i += 4) {
if (buf[i + 3] == 0xEB) {
addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
| ((uint32_t)buf[i + 2] << 16);
addr <<= 2;
addr -= s->pos + (uint32_t)i + 8;
addr >>= 2;
buf[i] = (uint8_t)addr;
buf[i + 1] = (uint8_t)(addr >> 8);
buf[i + 2] = (uint8_t)(addr >> 16);
}
}
return i;
}
#endif
#ifdef XZ_DEC_ARMTHUMB
static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
size_t i;
uint32_t addr;
for (i = 0; i + 4 <= size; i += 2) {
if ((buf[i + 1] & 0xF8) == 0xF0
&& (buf[i + 3] & 0xF8) == 0xF8) {
addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
| ((uint32_t)buf[i] << 11)
| (((uint32_t)buf[i + 3] & 0x07) << 8)
| (uint32_t)buf[i + 2];
addr <<= 1;
addr -= s->pos + (uint32_t)i + 4;
addr >>= 1;
buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
buf[i] = (uint8_t)(addr >> 11);
buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
buf[i + 2] = (uint8_t)addr;
i += 2;
}
}
return i;
}
#endif
#ifdef XZ_DEC_SPARC
static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
size_t i;
uint32_t instr;
for (i = 0; i + 4 <= size; i += 4) {
instr = get_unaligned_be32(buf + i);
if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
instr <<= 2;
instr -= s->pos + (uint32_t)i;
instr >>= 2;
instr = ((uint32_t)0x40000000 - (instr & 0x400000))
| 0x40000000 | (instr & 0x3FFFFF);
put_unaligned_be32(instr, buf + i);
}
}
return i;
}
#endif
/*
* Apply the selected BCJ filter. Update *pos and s->pos to match the amount
* of data that got filtered.
*
* NOTE: This is implemented as a switch statement to avoid using function
* pointers, which could be problematic in the kernel boot code, which must
* avoid pointers to static data (at least on x86).
*/
static void bcj_apply(struct xz_dec_bcj *s,
uint8_t *buf, size_t *pos, size_t size)
{
size_t filtered;
buf += *pos;
size -= *pos;
switch (s->type) {
#ifdef XZ_DEC_X86
case BCJ_X86:
filtered = bcj_x86(s, buf, size);
break;
#endif
#ifdef XZ_DEC_POWERPC
case BCJ_POWERPC:
filtered = bcj_powerpc(s, buf, size);
break;
#endif
#ifdef XZ_DEC_IA64
case BCJ_IA64:
filtered = bcj_ia64(s, buf, size);
break;
#endif
#ifdef XZ_DEC_ARM
case BCJ_ARM:
filtered = bcj_arm(s, buf, size);
break;
#endif
#ifdef XZ_DEC_ARMTHUMB
case BCJ_ARMTHUMB:
filtered = bcj_armthumb(s, buf, size);
break;
#endif
#ifdef XZ_DEC_SPARC
case BCJ_SPARC:
filtered = bcj_sparc(s, buf, size);
break;
#endif
default:
/* Never reached but silence compiler warnings. */
filtered = 0;
break;
}
*pos += filtered;
s->pos += filtered;
}
/*
* Flush pending filtered data from temp to the output buffer.
* Move the remaining mixture of possibly filtered and unfiltered
* data to the beginning of temp.
*/
static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
{
size_t copy_size;
copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
b->out_pos += copy_size;
s->temp.filtered -= copy_size;
s->temp.size -= copy_size;
memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
}
/*
* The BCJ filter functions are primitive in sense that they process the
* data in chunks of 1-16 bytes. To hide this issue, this function does
* some buffering.
*/
XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
struct xz_dec_lzma2 *lzma2,
struct xz_buf *b)
{
size_t out_start;
/*
* Flush pending already filtered data to the output buffer. Return
* immediatelly if we couldn't flush everything, or if the next
* filter in the chain had already returned XZ_STREAM_END.
*/
if (s->temp.filtered > 0) {
bcj_flush(s, b);
if (s->temp.filtered > 0)
return XZ_OK;
if (s->ret == XZ_STREAM_END)
return XZ_STREAM_END;
}
/*
* If we have more output space than what is currently pending in
* temp, copy the unfiltered data from temp to the output buffer
* and try to fill the output buffer by decoding more data from the
* next filter in the chain. Apply the BCJ filter on the new data
* in the output buffer. If everything cannot be filtered, copy it
* to temp and rewind the output buffer position accordingly.
*
* This needs to be always run when temp.size == 0 to handle a special
* case where the output buffer is full and the next filter has no
* more output coming but hasn't returned XZ_STREAM_END yet.
*/
if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
out_start = b->out_pos;
memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
b->out_pos += s->temp.size;
s->ret = xz_dec_lzma2_run(lzma2, b);
if (s->ret != XZ_STREAM_END
&& (s->ret != XZ_OK || s->single_call))
return s->ret;
bcj_apply(s, b->out, &out_start, b->out_pos);
/*
* As an exception, if the next filter returned XZ_STREAM_END,
* we can do that too, since the last few bytes that remain
* unfiltered are meant to remain unfiltered.
*/
if (s->ret == XZ_STREAM_END)
return XZ_STREAM_END;
s->temp.size = b->out_pos - out_start;
b->out_pos -= s->temp.size;
memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
/*
* If there wasn't enough input to the next filter to fill
* the output buffer with unfiltered data, there's no point
* to try decoding more data to temp.
*/
if (b->out_pos + s->temp.size < b->out_size)
return XZ_OK;
}
/*
* We have unfiltered data in temp. If the output buffer isn't full
* yet, try to fill the temp buffer by decoding more data from the
* next filter. Apply the BCJ filter on temp. Then we hopefully can
* fill the actual output buffer by copying filtered data from temp.
* A mix of filtered and unfiltered data may be left in temp; it will
* be taken care on the next call to this function.
*/
if (b->out_pos < b->out_size) {
/* Make b->out{,_pos,_size} temporarily point to s->temp. */
s->out = b->out;
s->out_pos = b->out_pos;
s->out_size = b->out_size;
b->out = s->temp.buf;
b->out_pos = s->temp.size;
b->out_size = sizeof(s->temp.buf);
s->ret = xz_dec_lzma2_run(lzma2, b);
s->temp.size = b->out_pos;
b->out = s->out;
b->out_pos = s->out_pos;
b->out_size = s->out_size;
if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
return s->ret;
bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
/*
* If the next filter returned XZ_STREAM_END, we mark that
* everything is filtered, since the last unfiltered bytes
* of the stream are meant to be left as is.
*/
if (s->ret == XZ_STREAM_END)
s->temp.filtered = s->temp.size;
bcj_flush(s, b);
if (s->temp.filtered > 0)
return XZ_OK;
}
return s->ret;
}
XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
{
struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s != NULL)
s->single_call = single_call;
return s;
}
XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
{
switch (id) {
#ifdef XZ_DEC_X86
case BCJ_X86:
#endif
#ifdef XZ_DEC_POWERPC
case BCJ_POWERPC:
#endif
#ifdef XZ_DEC_IA64
case BCJ_IA64:
#endif
#ifdef XZ_DEC_ARM
case BCJ_ARM:
#endif
#ifdef XZ_DEC_ARMTHUMB
case BCJ_ARMTHUMB:
#endif
#ifdef XZ_DEC_SPARC
case BCJ_SPARC:
#endif
break;
default:
/* Unsupported Filter ID */
return XZ_OPTIONS_ERROR;
}
s->type = id;
s->ret = XZ_OK;
s->pos = 0;
s->x86_prev_mask = 0;
s->temp.filtered = 0;
s->temp.size = 0;
return XZ_OK;
}
#endif
/*
* XZ decoder module information
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include <linux/module.h>
#include <linux/xz.h>
EXPORT_SYMBOL(xz_dec_init);
EXPORT_SYMBOL(xz_dec_reset);
EXPORT_SYMBOL(xz_dec_run);
EXPORT_SYMBOL(xz_dec_end);
MODULE_DESCRIPTION("XZ decompressor");
MODULE_VERSION("1.0");
MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov");
/*
* This code is in the public domain, but in Linux it's simplest to just
* say it's GPL and consider the authors as the copyright holders.
*/
MODULE_LICENSE("GPL");
/*
* XZ decoder tester
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/crc32.h>
#include <linux/xz.h>
/* Maximum supported dictionary size */
#define DICT_MAX (1 << 20)
/* Device name to pass to register_chrdev(). */
#define DEVICE_NAME "xz_dec_test"
/* Dynamically allocated device major number */
static int device_major;
/*
* We reuse the same decoder state, and thus can decode only one
* file at a time.
*/
static bool device_is_open;
/* XZ decoder state */
static struct xz_dec *state;
/*
* Return value of xz_dec_run(). We need to avoid calling xz_dec_run() after
* it has returned XZ_STREAM_END, so we make this static.
*/
static enum xz_ret ret;
/*
* Input and output buffers. The input buffer is used as a temporary safe
* place for the data coming from the userspace.
*/
static uint8_t buffer_in[1024];
static uint8_t buffer_out[1024];
/*
* Structure to pass the input and output buffers to the XZ decoder.
* A few of the fields are never modified so we initialize them here.
*/
static struct xz_buf buffers = {
.in = buffer_in,
.out = buffer_out,
.out_size = sizeof(buffer_out)
};
/*
* CRC32 of uncompressed data. This is used to give the user a simple way
* to check that the decoder produces correct output.
*/
static uint32_t crc;
static int xz_dec_test_open(struct inode *i, struct file *f)
{
if (device_is_open)
return -EBUSY;
device_is_open = true;
xz_dec_reset(state);
ret = XZ_OK;
crc = 0xFFFFFFFF;
buffers.in_pos = 0;
buffers.in_size = 0;
buffers.out_pos = 0;
printk(KERN_INFO DEVICE_NAME ": opened\n");
return 0;
}
static int xz_dec_test_release(struct inode *i, struct file *f)
{
device_is_open = false;
if (ret == XZ_OK)
printk(KERN_INFO DEVICE_NAME ": input was truncated\n");
printk(KERN_INFO DEVICE_NAME ": closed\n");
return 0;
}
/*
* Decode the data given to us from the userspace. CRC32 of the uncompressed
* data is calculated and is printed at the end of successful decoding. The
* uncompressed data isn't stored anywhere for further use.
*
* The .xz file must have exactly one Stream and no Stream Padding. The data
* after the first Stream is considered to be garbage.
*/
static ssize_t xz_dec_test_write(struct file *file, const char __user *buf,
size_t size, loff_t *pos)
{
size_t remaining;
if (ret != XZ_OK) {
if (size > 0)
printk(KERN_INFO DEVICE_NAME ": %zu bytes of "
"garbage at the end of the file\n",
size);
return -ENOSPC;
}
printk(KERN_INFO DEVICE_NAME ": decoding %zu bytes of input\n",
size);
remaining = size;
while ((remaining > 0 || buffers.out_pos == buffers.out_size)
&& ret == XZ_OK) {
if (buffers.in_pos == buffers.in_size) {
buffers.in_pos = 0;
buffers.in_size = min(remaining, sizeof(buffer_in));
if (copy_from_user(buffer_in, buf, buffers.in_size))
return -EFAULT;
buf += buffers.in_size;
remaining -= buffers.in_size;
}
buffers.out_pos = 0;
ret = xz_dec_run(state, &buffers);
crc = crc32(crc, buffer_out, buffers.out_pos);
}
switch (ret) {
case XZ_OK:
printk(KERN_INFO DEVICE_NAME ": XZ_OK\n");
return size;
case XZ_STREAM_END:
printk(KERN_INFO DEVICE_NAME ": XZ_STREAM_END, "
"CRC32 = 0x%08X\n", ~crc);
return size - remaining - (buffers.in_size - buffers.in_pos);
case XZ_MEMLIMIT_ERROR:
printk(KERN_INFO DEVICE_NAME ": XZ_MEMLIMIT_ERROR\n");
break;
case XZ_FORMAT_ERROR:
printk(KERN_INFO DEVICE_NAME ": XZ_FORMAT_ERROR\n");
break;
case XZ_OPTIONS_ERROR:
printk(KERN_INFO DEVICE_NAME ": XZ_OPTIONS_ERROR\n");
break;
case XZ_DATA_ERROR:
printk(KERN_INFO DEVICE_NAME ": XZ_DATA_ERROR\n");
break;
case XZ_BUF_ERROR:
printk(KERN_INFO DEVICE_NAME ": XZ_BUF_ERROR\n");
break;
default:
printk(KERN_INFO DEVICE_NAME ": Bug detected!\n");
break;
}
return -EIO;
}
/* Allocate the XZ decoder state and register the character device. */
static int __init xz_dec_test_init(void)
{
static const struct file_operations fileops = {
.owner = THIS_MODULE,
.open = &xz_dec_test_open,
.release = &xz_dec_test_release,
.write = &xz_dec_test_write
};
state = xz_dec_init(XZ_PREALLOC, DICT_MAX);
if (state == NULL)
return -ENOMEM;
device_major = register_chrdev(0, DEVICE_NAME, &fileops);
if (device_major < 0) {
xz_dec_end(state);
return device_major;
}
printk(KERN_INFO DEVICE_NAME ": module loaded\n");
printk(KERN_INFO DEVICE_NAME ": Create a device node with "
"'mknod " DEVICE_NAME " c %d 0' and write .xz files "
"to it.\n", device_major);
return 0;
}
static void __exit xz_dec_test_exit(void)
{
unregister_chrdev(device_major, DEVICE_NAME);
xz_dec_end(state);
printk(KERN_INFO DEVICE_NAME ": module unloaded\n");
}
module_init(xz_dec_test_init);
module_exit(xz_dec_test_exit);
MODULE_DESCRIPTION("XZ decompressor tester");
MODULE_VERSION("1.0");
MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org>");
/*
* This code is in the public domain, but in Linux it's simplest to just
* say it's GPL and consider the authors as the copyright holders.
*/
MODULE_LICENSE("GPL");
/*
* LZMA2 definitions
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef XZ_LZMA2_H
#define XZ_LZMA2_H
/* Range coder constants */
#define RC_SHIFT_BITS 8
#define RC_TOP_BITS 24
#define RC_TOP_VALUE (1 << RC_TOP_BITS)
#define RC_BIT_MODEL_TOTAL_BITS 11
#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
#define RC_MOVE_BITS 5
/*
* Maximum number of position states. A position state is the lowest pb
* number of bits of the current uncompressed offset. In some places there
* are different sets of probabilities for different position states.
*/
#define POS_STATES_MAX (1 << 4)
/*
* This enum is used to track which LZMA symbols have occurred most recently
* and in which order. This information is used to predict the next symbol.
*
* Symbols:
* - Literal: One 8-bit byte
* - Match: Repeat a chunk of data at some distance
* - Long repeat: Multi-byte match at a recently seen distance
* - Short repeat: One-byte repeat at a recently seen distance
*
* The symbol names are in from STATE_oldest_older_previous. REP means
* either short or long repeated match, and NONLIT means any non-literal.
*/
enum lzma_state {
STATE_LIT_LIT,
STATE_MATCH_LIT_LIT,
STATE_REP_LIT_LIT,
STATE_SHORTREP_LIT_LIT,
STATE_MATCH_LIT,
STATE_REP_LIT,
STATE_SHORTREP_LIT,
STATE_LIT_MATCH,
STATE_LIT_LONGREP,
STATE_LIT_SHORTREP,
STATE_NONLIT_MATCH,
STATE_NONLIT_REP
};
/* Total number of states */
#define STATES 12
/* The lowest 7 states indicate that the previous state was a literal. */
#define LIT_STATES 7
/* Indicate that the latest symbol was a literal. */
static inline void lzma_state_literal(enum lzma_state *state)
{
if (*state <= STATE_SHORTREP_LIT_LIT)
*state = STATE_LIT_LIT;
else if (*state <= STATE_LIT_SHORTREP)
*state -= 3;
else
*state -= 6;
}
/* Indicate that the latest symbol was a match. */
static inline void lzma_state_match(enum lzma_state *state)
{
*state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
}
/* Indicate that the latest state was a long repeated match. */
static inline void lzma_state_long_rep(enum lzma_state *state)
{
*state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
}
/* Indicate that the latest symbol was a short match. */
static inline void lzma_state_short_rep(enum lzma_state *state)
{
*state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
}
/* Test if the previous symbol was a literal. */
static inline bool lzma_state_is_literal(enum lzma_state state)
{
return state < LIT_STATES;
}
/* Each literal coder is divided in three sections:
* - 0x001-0x0FF: Without match byte
* - 0x101-0x1FF: With match byte; match bit is 0
* - 0x201-0x2FF: With match byte; match bit is 1
*
* Match byte is used when the previous LZMA symbol was something else than
* a literal (that is, it was some kind of match).
*/
#define LITERAL_CODER_SIZE 0x300
/* Maximum number of literal coders */
#define LITERAL_CODERS_MAX (1 << 4)
/* Minimum length of a match is two bytes. */
#define MATCH_LEN_MIN 2
/* Match length is encoded with 4, 5, or 10 bits.
*
* Length Bits
* 2-9 4 = Choice=0 + 3 bits
* 10-17 5 = Choice=1 + Choice2=0 + 3 bits
* 18-273 10 = Choice=1 + Choice2=1 + 8 bits
*/
#define LEN_LOW_BITS 3
#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
#define LEN_MID_BITS 3
#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
#define LEN_HIGH_BITS 8
#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
/*
* Maximum length of a match is 273 which is a result of the encoding
* described above.
*/
#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
/*
* Different sets of probabilities are used for match distances that have
* very short match length: Lengths of 2, 3, and 4 bytes have a separate
* set of probabilities for each length. The matches with longer length
* use a shared set of probabilities.
*/
#define DIST_STATES 4
/*
* Get the index of the appropriate probability array for decoding
* the distance slot.
*/
static inline uint32_t lzma_get_dist_state(uint32_t len)
{
return len < DIST_STATES + MATCH_LEN_MIN
? len - MATCH_LEN_MIN : DIST_STATES - 1;
}
/*
* The highest two bits of a 32-bit match distance are encoded using six bits.
* This six-bit value is called a distance slot. This way encoding a 32-bit
* value takes 6-36 bits, larger values taking more bits.
*/
#define DIST_SLOT_BITS 6
#define DIST_SLOTS (1 << DIST_SLOT_BITS)
/* Match distances up to 127 are fully encoded using probabilities. Since
* the highest two bits (distance slot) are always encoded using six bits,
* the distances 0-3 don't need any additional bits to encode, since the
* distance slot itself is the same as the actual distance. DIST_MODEL_START
* indicates the first distance slot where at least one additional bit is
* needed.
*/
#define DIST_MODEL_START 4
/*
* Match distances greater than 127 are encoded in three pieces:
* - distance slot: the highest two bits
* - direct bits: 2-26 bits below the highest two bits
* - alignment bits: four lowest bits
*
* Direct bits don't use any probabilities.
*
* The distance slot value of 14 is for distances 128-191.
*/
#define DIST_MODEL_END 14
/* Distance slots that indicate a distance <= 127. */
#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
/*
* For match distances greater than 127, only the highest two bits and the
* lowest four bits (alignment) is encoded using probabilities.
*/
#define ALIGN_BITS 4
#define ALIGN_SIZE (1 << ALIGN_BITS)
#define ALIGN_MASK (ALIGN_SIZE - 1)
/* Total number of all probability variables */
#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
/*
* LZMA remembers the four most recent match distances. Reusing these
* distances tends to take less space than re-encoding the actual
* distance value.
*/
#define REPS 4
#endif
/*
* Private includes and definitions
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef XZ_PRIVATE_H
#define XZ_PRIVATE_H
#ifdef __KERNEL__
# include <linux/xz.h>
# include <linux/kernel.h>
# include <asm/unaligned.h>
/* XZ_PREBOOT may be defined only via decompress_unxz.c. */
# ifndef XZ_PREBOOT
# include <linux/slab.h>
# include <linux/vmalloc.h>
# include <linux/string.h>
# ifdef CONFIG_XZ_DEC_X86
# define XZ_DEC_X86
# endif
# ifdef CONFIG_XZ_DEC_POWERPC
# define XZ_DEC_POWERPC
# endif
# ifdef CONFIG_XZ_DEC_IA64
# define XZ_DEC_IA64
# endif
# ifdef CONFIG_XZ_DEC_ARM
# define XZ_DEC_ARM
# endif
# ifdef CONFIG_XZ_DEC_ARMTHUMB
# define XZ_DEC_ARMTHUMB
# endif
# ifdef CONFIG_XZ_DEC_SPARC
# define XZ_DEC_SPARC
# endif
# define memeq(a, b, size) (memcmp(a, b, size) == 0)
# define memzero(buf, size) memset(buf, 0, size)
# endif
# define get_le32(p) le32_to_cpup((const uint32_t *)(p))
#else
/*
* For userspace builds, use a separate header to define the required
* macros and functions. This makes it easier to adapt the code into
* different environments and avoids clutter in the Linux kernel tree.
*/
# include "xz_config.h"
#endif
/* If no specific decoding mode is requested, enable support for all modes. */
#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
&& !defined(XZ_DEC_DYNALLOC)
# define XZ_DEC_SINGLE
# define XZ_DEC_PREALLOC
# define XZ_DEC_DYNALLOC
#endif
/*
* The DEC_IS_foo(mode) macros are used in "if" statements. If only some
* of the supported modes are enabled, these macros will evaluate to true or
* false at compile time and thus allow the compiler to omit unneeded code.
*/
#ifdef XZ_DEC_SINGLE
# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
#else
# define DEC_IS_SINGLE(mode) (false)
#endif
#ifdef XZ_DEC_PREALLOC
# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
#else
# define DEC_IS_PREALLOC(mode) (false)
#endif
#ifdef XZ_DEC_DYNALLOC
# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
#else
# define DEC_IS_DYNALLOC(mode) (false)
#endif
#if !defined(XZ_DEC_SINGLE)
# define DEC_IS_MULTI(mode) (true)
#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
#else
# define DEC_IS_MULTI(mode) (false)
#endif
/*
* If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
* XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
*/
#ifndef XZ_DEC_BCJ
# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
|| defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
|| defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
|| defined(XZ_DEC_SPARC)
# define XZ_DEC_BCJ
# endif
#endif
/*
* Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
* before calling xz_dec_lzma2_run().
*/
XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
uint32_t dict_max);
/*
* Decode the LZMA2 properties (one byte) and reset the decoder. Return
* XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
* big enough, and XZ_OPTIONS_ERROR if props indicates something that this
* decoder doesn't support.
*/
XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
uint8_t props);
/* Decode raw LZMA2 stream from b->in to b->out. */
XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
struct xz_buf *b);
/* Free the memory allocated for the LZMA2 decoder. */
XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
#ifdef XZ_DEC_BCJ
/*
* Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
* calling xz_dec_bcj_run().
*/
XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
/*
* Decode the Filter ID of a BCJ filter. This implementation doesn't
* support custom start offsets, so no decoding of Filter Properties
* is needed. Returns XZ_OK if the given Filter ID is supported.
* Otherwise XZ_OPTIONS_ERROR is returned.
*/
XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
/*
* Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
* a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
* must be called directly.
*/
XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
struct xz_dec_lzma2 *lzma2,
struct xz_buf *b);
/* Free the memory allocated for the BCJ filters. */
#define xz_dec_bcj_end(s) kfree(s)
#endif
#endif
/*
* Definitions for handling the .xz file format
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef XZ_STREAM_H
#define XZ_STREAM_H
#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
# include <linux/crc32.h>
# undef crc32
# define xz_crc32(buf, size, crc) \
(~crc32_le(~(uint32_t)(crc), buf, size))
#endif
/*
* See the .xz file format specification at
* http://tukaani.org/xz/xz-file-format.txt
* to understand the container format.
*/
#define STREAM_HEADER_SIZE 12
#define HEADER_MAGIC "\3757zXZ"
#define HEADER_MAGIC_SIZE 6
#define FOOTER_MAGIC "YZ"
#define FOOTER_MAGIC_SIZE 2
/*
* Variable-length integer can hold a 63-bit unsigned integer or a special
* value indicating that the value is unknown.
*
* Experimental: vli_type can be defined to uint32_t to save a few bytes
* in code size (no effect on speed). Doing so limits the uncompressed and
* compressed size of the file to less than 256 MiB and may also weaken
* error detection slightly.
*/
typedef uint64_t vli_type;
#define VLI_MAX ((vli_type)-1 / 2)
#define VLI_UNKNOWN ((vli_type)-1)
/* Maximum encoded size of a VLI */
#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
/* Integrity Check types */
enum xz_check {
XZ_CHECK_NONE = 0,
XZ_CHECK_CRC32 = 1,
XZ_CHECK_CRC64 = 4,
XZ_CHECK_SHA256 = 10
};
/* Maximum possible Check ID */
#define XZ_CHECK_MAX 15
#endif
#!/bin/sh
#
# This is a wrapper for xz to compress the kernel image using appropriate
# compression options depending on the architecture.
#
# Author: Lasse Collin <lasse.collin@tukaani.org>
#
# This file has been put into the public domain.
# You can do whatever you want with this file.
#
BCJ=
LZMA2OPTS=
case $SRCARCH in
x86) BCJ=--x86 ;;
powerpc) BCJ=--powerpc ;;
ia64) BCJ=--ia64; LZMA2OPTS=pb=4 ;;
arm) BCJ=--arm ;;
sparc) BCJ=--sparc ;;
esac
exec xz --check=crc32 $BCJ --lzma2=$LZMA2OPTS,dict=32MiB
#
# Makefile
#
# Author: Lasse Collin <lasse.collin@tukaani.org>
#
# This file has been put into the public domain.
# You can do whatever you want with this file.
#
CC = gcc -std=gnu89
BCJ_CPPFLAGS = -DXZ_DEC_X86 -DXZ_DEC_POWERPC -DXZ_DEC_IA64 \
-DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_SPARC
CPPFLAGS = -DXZ_USE_CRC64 -DXZ_DEC_ANY_CHECK
CFLAGS = -ggdb3 -O2 -pedantic -Wall -Wextra
RM = rm -f
VPATH = ../linux/include/linux ../linux/lib/xz
COMMON_SRCS = xz_crc32.c xz_crc64.c xz_dec_stream.c xz_dec_lzma2.c xz_dec_bcj.c
COMMON_OBJS = $(COMMON_SRCS:.c=.o)
XZMINIDEC_OBJS = xzminidec.o
BYTETEST_OBJS = bytetest.o
BUFTEST_OBJS = buftest.o
BOOTTEST_OBJS = boottest.o
XZ_HEADERS = xz.h xz_private.h xz_stream.h xz_lzma2.h xz_config.h
PROGRAMS = xzminidec bytetest buftest boottest
ALL_CPPFLAGS = -I../linux/include/linux -I. $(BCJ_CPPFLAGS) $(CPPFLAGS)
all: $(PROGRAMS)
%.o: %.c $(XZ_HEADERS)
$(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c -o $@ $<
xzminidec: $(COMMON_OBJS) $(XZMINIDEC_OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(COMMON_OBJS) $(XZMINIDEC_OBJS)
bytetest: $(COMMON_OBJS) $(BYTETEST_OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(COMMON_OBJS) $(BYTETEST_OBJS)
buftest: $(COMMON_OBJS) $(BUFTEST_OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(COMMON_OBJS) $(BUFTEST_OBJS)
boottest: $(BOOTTEST_OBJS) $(COMMON_SRCS)
$(CC) $(ALL_CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTTEST_OBJS)
.PHONY: clean
clean:
-$(RM) $(COMMON_OBJS) $(XZMINIDEC_OBJS) $(BUFTEST_OBJS) \
$(BOOTTEST_OBJS) $(PROGRAMS)
/*
* Test application for xz_boot.c
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define STATIC static
#define INIT
static void error(/*const*/ char *msg)
{
fprintf(stderr, "%s\n", msg);
}
/* Disable the CRC64 support even if it was enabled in the Makefile. */
#undef XZ_USE_CRC64
#include "../linux/lib/decompress_unxz.c"
static uint8_t in[1024 * 1024];
static uint8_t out[1024 * 1024];
static int fill(void *buf, unsigned int size)
{
return fread(buf, 1, size, stdin);
}
static int flush(/*const*/ void *buf, unsigned int size)
{
return fwrite(buf, 1, size, stdout);
}
static void test_buf_to_buf(void)
{
size_t in_size;
int ret;
in_size = fread(in, 1, sizeof(in), stdin);
ret = decompress(in, in_size, NULL, NULL, out, NULL, &error);
/* fwrite(out, 1, FIXME, stdout); */
fprintf(stderr, "ret = %d\n", ret);
}
static void test_buf_to_cb(void)
{
size_t in_size;
int in_used;
int ret;
in_size = fread(in, 1, sizeof(in), stdin);
ret = decompress(in, in_size, NULL, &flush, NULL, &in_used, &error);
fprintf(stderr, "ret = %d; in_used = %d\n", ret, in_used);
}
static void test_cb_to_cb(void)
{
int ret;
ret = decompress(NULL, 0, &fill, &flush, NULL, NULL, &error);
fprintf(stderr, "ret = %d\n", ret);
}
/*
* Not used by Linux <= 2.6.37-rc4 and newer probably won't use it either,
* but this kind of use case is still required to be supported by the API.
*/
static void test_cb_to_buf(void)
{
int in_used;
int ret;
ret = decompress(in, 0, &fill, NULL, out, &in_used, &error);
/* fwrite(out, 1, FIXME, stdout); */
fprintf(stderr, "ret = %d; in_used = %d\n", ret, in_used);
}
int main(int argc, char **argv)
{
if (argc != 2)
fprintf(stderr, "Usage: %s [bb|bc|cc|cb]\n", argv[0]);
else if (strcmp(argv[1], "bb") == 0)
test_buf_to_buf();
else if (strcmp(argv[1], "bc") == 0)
test_buf_to_cb();
else if (strcmp(argv[1], "cc") == 0)
test_cb_to_cb();
else if (strcmp(argv[1], "cb") == 0)
test_cb_to_buf();
else
fprintf(stderr, "Usage: %s [bb|bc|cc|cb]\n", argv[0]);
return 0;
}
/*
* Test application to test buffer-to-buffer decoding
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include "xz.h"
#define BUFFER_SIZE (1024 * 1024)
static uint8_t in[BUFFER_SIZE];
static uint8_t out[BUFFER_SIZE];
int main(void)
{
struct xz_buf b;
struct xz_dec *s;
enum xz_ret ret;
xz_crc32_init();
s = xz_dec_init(XZ_SINGLE, 0);
if (s == NULL) {
fputs("Initialization failed", stderr);
return 1;
}
b.in = in;
b.in_pos = 0;
b.in_size = fread(in, 1, sizeof(in), stdin);
b.out = out;
b.out_pos = 0;
b.out_size = sizeof(out);
ret = xz_dec_run(s, &b);
xz_dec_end(s);
fwrite(out, 1, b.out_pos, stdout);
fprintf(stderr, "%d\n", ret);
return 0;
}
/*
* Lazy test for the case when the output size is known
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "xz.h"
static uint8_t in[1];
static uint8_t out[BUFSIZ];
int main(int argc, char **argv)
{
struct xz_buf b;
struct xz_dec *s;
enum xz_ret ret;
const char *msg;
size_t uncomp_size;
if (argc != 2) {
fputs("Give uncompressed size as the argument", stderr);
return 1;
}
uncomp_size = atoi(argv[1]);
xz_crc32_init();
/*
* Support up to 64 MiB dictionary. The actually needed memory
* is allocated once the headers have been parsed.
*/
s = xz_dec_init(XZ_DYNALLOC, 1 << 26);
if (s == NULL) {
msg = "Memory allocation failed\n";
goto error;
}
b.in = in;
b.in_pos = 0;
b.in_size = 0;
b.out = out;
b.out_pos = 0;
b.out_size = uncomp_size < BUFSIZ ? uncomp_size : BUFSIZ;
while (true) {
if (b.in_pos == b.in_size) {
b.in_size = fread(in, 1, sizeof(in), stdin);
b.in_pos = 0;
}
ret = xz_dec_run(s, &b);
if (b.out_pos == sizeof(out)) {
if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
msg = "Write error\n";
goto error;
}
uncomp_size -= b.out_pos;
b.out_pos = 0;
b.out_size = uncomp_size < BUFSIZ
? uncomp_size : BUFSIZ;
}
if (ret == XZ_OK)
continue;
#ifdef XZ_DEC_ANY_CHECK
if (ret == XZ_UNSUPPORTED_CHECK) {
fputs(argv[0], stderr);
fputs(": ", stderr);
fputs("Unsupported check; not verifying "
"file integrity\n", stderr);
continue;
}
#endif
if (uncomp_size != b.out_pos) {
msg = "Uncompressed size doesn't match\n";
goto error;
}
if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos
|| fclose(stdout)) {
msg = "Write error\n";
goto error;
}
switch (ret) {
case XZ_STREAM_END:
xz_dec_end(s);
return 0;
case XZ_MEM_ERROR:
msg = "Memory allocation failed\n";
goto error;
case XZ_MEMLIMIT_ERROR:
msg = "Memory usage limit reached\n";
goto error;
case XZ_FORMAT_ERROR:
msg = "Not a .xz file\n";
goto error;
case XZ_OPTIONS_ERROR:
msg = "Unsupported options in the .xz headers\n";
goto error;
case XZ_DATA_ERROR:
case XZ_BUF_ERROR:
msg = "File is corrupt\n";
goto error;
default:
msg = "Bug!\n";
goto error;
}
}
error:
xz_dec_end(s);
fputs(argv[0], stderr);
fputs(": ", stderr);
fputs(msg, stderr);
return 1;
}
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment