Externals: Add libLZMA.

This commit is contained in:
degasus 2019-12-30 15:07:54 +01:00
parent 4385afdb0a
commit 9fd03cda9d
156 changed files with 35414 additions and 0 deletions

92
Externals/liblzma/rangecoder/price.h vendored Normal file
View file

@ -0,0 +1,92 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file price.h
/// \brief Probability price calculation
//
// Author: Igor Pavlov
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_PRICE_H
#define LZMA_PRICE_H
#define RC_MOVE_REDUCING_BITS 4
#define RC_BIT_PRICE_SHIFT_BITS 4
#define RC_PRICE_TABLE_SIZE (RC_BIT_MODEL_TOTAL >> RC_MOVE_REDUCING_BITS)
#define RC_INFINITY_PRICE (UINT32_C(1) << 30)
/// Lookup table for the inline functions defined in this file.
extern const uint8_t lzma_rc_prices[RC_PRICE_TABLE_SIZE];
static inline uint32_t
rc_bit_price(const probability prob, const uint32_t bit)
{
return lzma_rc_prices[(prob ^ ((UINT32_C(0) - bit)
& (RC_BIT_MODEL_TOTAL - 1))) >> RC_MOVE_REDUCING_BITS];
}
static inline uint32_t
rc_bit_0_price(const probability prob)
{
return lzma_rc_prices[prob >> RC_MOVE_REDUCING_BITS];
}
static inline uint32_t
rc_bit_1_price(const probability prob)
{
return lzma_rc_prices[(prob ^ (RC_BIT_MODEL_TOTAL - 1))
>> RC_MOVE_REDUCING_BITS];
}
static inline uint32_t
rc_bittree_price(const probability *const probs,
const uint32_t bit_levels, uint32_t symbol)
{
uint32_t price = 0;
symbol += UINT32_C(1) << bit_levels;
do {
const uint32_t bit = symbol & 1;
symbol >>= 1;
price += rc_bit_price(probs[symbol], bit);
} while (symbol != 1);
return price;
}
static inline uint32_t
rc_bittree_reverse_price(const probability *const probs,
uint32_t bit_levels, uint32_t symbol)
{
uint32_t price = 0;
uint32_t model_index = 1;
do {
const uint32_t bit = symbol & 1;
symbol >>= 1;
price += rc_bit_price(probs[model_index], bit);
model_index = (model_index << 1) + bit;
} while (--bit_levels != 0);
return price;
}
static inline uint32_t
rc_direct_price(const uint32_t bits)
{
return bits << RC_BIT_PRICE_SHIFT_BITS;
}
#endif

View file

@ -0,0 +1,22 @@
/* This file has been automatically generated by price_tablegen.c. */
#include "range_encoder.h"
const uint8_t lzma_rc_prices[RC_PRICE_TABLE_SIZE] = {
128, 103, 91, 84, 78, 73, 69, 66,
63, 61, 58, 56, 54, 52, 51, 49,
48, 46, 45, 44, 43, 42, 41, 40,
39, 38, 37, 36, 35, 34, 34, 33,
32, 31, 31, 30, 29, 29, 28, 28,
27, 26, 26, 25, 25, 24, 24, 23,
23, 22, 22, 22, 21, 21, 20, 20,
19, 19, 19, 18, 18, 17, 17, 17,
16, 16, 16, 15, 15, 15, 14, 14,
14, 13, 13, 13, 12, 12, 12, 11,
11, 11, 11, 10, 10, 10, 10, 9,
9, 9, 9, 8, 8, 8, 8, 7,
7, 7, 7, 6, 6, 6, 6, 5,
5, 5, 5, 5, 4, 4, 4, 4,
3, 3, 3, 3, 3, 2, 2, 2,
2, 2, 2, 1, 1, 1, 1, 1
};

View file

@ -0,0 +1,87 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file price_tablegen.c
/// \brief Probability price table generator
///
/// Compiling: gcc -std=c99 -o price_tablegen price_tablegen.c
///
// Authors: Igor Pavlov
// Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include <inttypes.h>
#include <stdio.h>
#include "range_common.h"
#include "price.h"
static uint32_t rc_prices[RC_PRICE_TABLE_SIZE];
static void
init_price_table(void)
{
for (uint32_t i = (UINT32_C(1) << RC_MOVE_REDUCING_BITS) / 2;
i < RC_BIT_MODEL_TOTAL;
i += (UINT32_C(1) << RC_MOVE_REDUCING_BITS)) {
const uint32_t cycles_bits = RC_BIT_PRICE_SHIFT_BITS;
uint32_t w = i;
uint32_t bit_count = 0;
for (uint32_t j = 0; j < cycles_bits; ++j) {
w *= w;
bit_count <<= 1;
while (w >= (UINT32_C(1) << 16)) {
w >>= 1;
++bit_count;
}
}
rc_prices[i >> RC_MOVE_REDUCING_BITS]
= (RC_BIT_MODEL_TOTAL_BITS << cycles_bits)
- 15 - bit_count;
}
return;
}
static void
print_price_table(void)
{
printf("/* This file has been automatically generated by "
"price_tablegen.c. */\n\n"
"#include \"range_encoder.h\"\n\n"
"const uint8_t lzma_rc_prices["
"RC_PRICE_TABLE_SIZE] = {");
const size_t array_size = sizeof(lzma_rc_prices)
/ sizeof(lzma_rc_prices[0]);
for (size_t i = 0; i < array_size; ++i) {
if (i % 8 == 0)
printf("\n\t");
printf("%4" PRIu32, rc_prices[i]);
if (i != array_size - 1)
printf(",");
}
printf("\n};\n");
return;
}
int
main(void)
{
init_price_table();
print_price_table();
return 0;
}

View file

@ -0,0 +1,71 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file range_common.h
/// \brief Common things for range encoder and decoder
///
// Authors: Igor Pavlov
// Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_RANGE_COMMON_H
#define LZMA_RANGE_COMMON_H
#include "common.h"
///////////////
// Constants //
///////////////
#define RC_SHIFT_BITS 8
#define RC_TOP_BITS 24
#define RC_TOP_VALUE (UINT32_C(1) << RC_TOP_BITS)
#define RC_BIT_MODEL_TOTAL_BITS 11
#define RC_BIT_MODEL_TOTAL (UINT32_C(1) << RC_BIT_MODEL_TOTAL_BITS)
#define RC_MOVE_BITS 5
////////////
// Macros //
////////////
// Resets the probability so that both 0 and 1 have probability of 50 %
#define bit_reset(prob) \
prob = RC_BIT_MODEL_TOTAL >> 1
// This does the same for a complete bit tree.
// (A tree represented as an array.)
#define bittree_reset(probs, bit_levels) \
for (uint32_t bt_i = 0; bt_i < (1 << (bit_levels)); ++bt_i) \
bit_reset((probs)[bt_i])
//////////////////////
// Type definitions //
//////////////////////
/// \brief Type of probabilities used with range coder
///
/// This needs to be at least 12-bit integer, so uint16_t is a logical choice.
/// However, on some architecture and compiler combinations, a bigger type
/// may give better speed, because the probability variables are accessed
/// a lot. On the other hand, bigger probability type increases cache
/// footprint, since there are 2 to 14 thousand probability variables in
/// LZMA (assuming the limit of lc + lp <= 4; with lc + lp <= 12 there
/// would be about 1.5 million variables).
///
/// With malicious files, the initialization speed of the LZMA decoder can
/// become important. In that case, smaller probability variables mean that
/// there is less bytes to write to RAM, which makes initialization faster.
/// With big probability type, the initialization can become so slow that it
/// can be a problem e.g. for email servers doing virus scanning.
///
/// I will be sticking to uint16_t unless some specific architectures
/// are *much* faster (20-50 %) with uint32_t.
typedef uint16_t probability;
#endif

View file

@ -0,0 +1,185 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file range_decoder.h
/// \brief Range Decoder
///
// Authors: Igor Pavlov
// Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_RANGE_DECODER_H
#define LZMA_RANGE_DECODER_H
#include "range_common.h"
typedef struct {
uint32_t range;
uint32_t code;
uint32_t init_bytes_left;
} lzma_range_decoder;
/// Reads the first five bytes to initialize the range decoder.
static inline lzma_ret
rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in,
size_t *restrict in_pos, size_t in_size)
{
while (rc->init_bytes_left > 0) {
if (*in_pos == in_size)
return LZMA_OK;
// The first byte is always 0x00. It could have been omitted
// in LZMA2 but it wasn't, so one byte is wasted in every
// LZMA2 chunk.
if (rc->init_bytes_left == 5 && in[*in_pos] != 0x00)
return LZMA_DATA_ERROR;
rc->code = (rc->code << 8) | in[*in_pos];
++*in_pos;
--rc->init_bytes_left;
}
return LZMA_STREAM_END;
}
/// Makes local copies of range decoder and *in_pos variables. Doing this
/// improves speed significantly. The range decoder macros expect also
/// variables `in' and `in_size' to be defined.
#define rc_to_local(range_decoder, in_pos) \
lzma_range_decoder rc = range_decoder; \
size_t rc_in_pos = (in_pos); \
uint32_t rc_bound
/// Stores the local copes back to the range decoder structure.
#define rc_from_local(range_decoder, in_pos) \
do { \
range_decoder = rc; \
in_pos = rc_in_pos; \
} while (0)
/// Resets the range decoder structure.
#define rc_reset(range_decoder) \
do { \
(range_decoder).range = UINT32_MAX; \
(range_decoder).code = 0; \
(range_decoder).init_bytes_left = 5; \
} while (0)
/// When decoding has been properly finished, rc.code is always zero unless
/// the input stream is corrupt. So checking this can catch some corrupt
/// files especially if they don't have any other integrity check.
#define rc_is_finished(range_decoder) \
((range_decoder).code == 0)
/// Read the next input byte if needed. If more input is needed but there is
/// no more input available, "goto out" is used to jump out of the main
/// decoder loop.
#define rc_normalize(seq) \
do { \
if (rc.range < RC_TOP_VALUE) { \
if (unlikely(rc_in_pos == in_size)) { \
coder->sequence = seq; \
goto out; \
} \
rc.range <<= RC_SHIFT_BITS; \
rc.code = (rc.code << RC_SHIFT_BITS) | in[rc_in_pos++]; \
} \
} while (0)
/// Start decoding a bit. This must be used together with rc_update_0()
/// and rc_update_1():
///
/// rc_if_0(prob, seq) {
/// rc_update_0(prob);
/// // Do something
/// } else {
/// rc_update_1(prob);
/// // Do something else
/// }
///
#define rc_if_0(prob, seq) \
rc_normalize(seq); \
rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \
if (rc.code < rc_bound)
/// Update the range decoder state and the used probability variable to
/// match a decoded bit of 0.
#define rc_update_0(prob) \
do { \
rc.range = rc_bound; \
prob += (RC_BIT_MODEL_TOTAL - (prob)) >> RC_MOVE_BITS; \
} while (0)
/// Update the range decoder state and the used probability variable to
/// match a decoded bit of 1.
#define rc_update_1(prob) \
do { \
rc.range -= rc_bound; \
rc.code -= rc_bound; \
prob -= (prob) >> RC_MOVE_BITS; \
} while (0)
/// Decodes one bit and runs action0 or action1 depending on the decoded bit.
/// This macro is used as the last step in bittree reverse decoders since
/// those don't use "symbol" for anything else than indexing the probability
/// arrays.
#define rc_bit_last(prob, action0, action1, seq) \
do { \
rc_if_0(prob, seq) { \
rc_update_0(prob); \
action0; \
} else { \
rc_update_1(prob); \
action1; \
} \
} while (0)
/// Decodes one bit, updates "symbol", and runs action0 or action1 depending
/// on the decoded bit.
#define rc_bit(prob, action0, action1, seq) \
rc_bit_last(prob, \
symbol <<= 1; action0, \
symbol = (symbol << 1) + 1; action1, \
seq);
/// Like rc_bit() but add "case seq:" as a prefix. This makes the unrolled
/// loops more readable because the code isn't littered with "case"
/// statements. On the other hand this also makes it less readable, since
/// spotting the places where the decoder loop may be restarted is less
/// obvious.
#define rc_bit_case(prob, action0, action1, seq) \
case seq: rc_bit(prob, action0, action1, seq)
/// Decode a bit without using a probability.
#define rc_direct(dest, seq) \
do { \
rc_normalize(seq); \
rc.range >>= 1; \
rc.code -= rc.range; \
rc_bound = UINT32_C(0) - (rc.code >> 31); \
rc.code += rc.range & rc_bound; \
dest = (dest << 1) + (rc_bound + 1); \
} while (0)
// NOTE: No macros are provided for bittree decoding. It seems to be simpler
// to just write them open in the code.
#endif

View file

@ -0,0 +1,231 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file range_encoder.h
/// \brief Range Encoder
///
// Authors: Igor Pavlov
// Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_RANGE_ENCODER_H
#define LZMA_RANGE_ENCODER_H
#include "range_common.h"
#include "price.h"
/// Maximum number of symbols that can be put pending into lzma_range_encoder
/// structure between calls to lzma_rc_encode(). For LZMA, 52+5 is enough
/// (match with big distance and length followed by range encoder flush).
#define RC_SYMBOLS_MAX 58
typedef struct {
uint64_t low;
uint64_t cache_size;
uint32_t range;
uint8_t cache;
/// Number of symbols in the tables
size_t count;
/// rc_encode()'s position in the tables
size_t pos;
/// Symbols to encode
enum {
RC_BIT_0,
RC_BIT_1,
RC_DIRECT_0,
RC_DIRECT_1,
RC_FLUSH,
} symbols[RC_SYMBOLS_MAX];
/// Probabilities associated with RC_BIT_0 or RC_BIT_1
probability *probs[RC_SYMBOLS_MAX];
} lzma_range_encoder;
static inline void
rc_reset(lzma_range_encoder *rc)
{
rc->low = 0;
rc->cache_size = 1;
rc->range = UINT32_MAX;
rc->cache = 0;
rc->count = 0;
rc->pos = 0;
}
static inline void
rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
{
rc->symbols[rc->count] = bit;
rc->probs[rc->count] = prob;
++rc->count;
}
static inline void
rc_bittree(lzma_range_encoder *rc, probability *probs,
uint32_t bit_count, uint32_t symbol)
{
uint32_t model_index = 1;
do {
const uint32_t bit = (symbol >> --bit_count) & 1;
rc_bit(rc, &probs[model_index], bit);
model_index = (model_index << 1) + bit;
} while (bit_count != 0);
}
static inline void
rc_bittree_reverse(lzma_range_encoder *rc, probability *probs,
uint32_t bit_count, uint32_t symbol)
{
uint32_t model_index = 1;
do {
const uint32_t bit = symbol & 1;
symbol >>= 1;
rc_bit(rc, &probs[model_index], bit);
model_index = (model_index << 1) + bit;
} while (--bit_count != 0);
}
static inline void
rc_direct(lzma_range_encoder *rc,
uint32_t value, uint32_t bit_count)
{
do {
rc->symbols[rc->count++]
= RC_DIRECT_0 + ((value >> --bit_count) & 1);
} while (bit_count != 0);
}
static inline void
rc_flush(lzma_range_encoder *rc)
{
for (size_t i = 0; i < 5; ++i)
rc->symbols[rc->count++] = RC_FLUSH;
}
static inline bool
rc_shift_low(lzma_range_encoder *rc,
uint8_t *out, size_t *out_pos, size_t out_size)
{
if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000)
|| (uint32_t)(rc->low >> 32) != 0) {
do {
if (*out_pos == out_size)
return true;
out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
++*out_pos;
rc->cache = 0xFF;
} while (--rc->cache_size != 0);
rc->cache = (rc->low >> 24) & 0xFF;
}
++rc->cache_size;
rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS;
return false;
}
static inline bool
rc_encode(lzma_range_encoder *rc,
uint8_t *out, size_t *out_pos, size_t out_size)
{
assert(rc->count <= RC_SYMBOLS_MAX);
while (rc->pos < rc->count) {
// Normalize
if (rc->range < RC_TOP_VALUE) {
if (rc_shift_low(rc, out, out_pos, out_size))
return true;
rc->range <<= RC_SHIFT_BITS;
}
// Encode a bit
switch (rc->symbols[rc->pos]) {
case RC_BIT_0: {
probability prob = *rc->probs[rc->pos];
rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS)
* prob;
prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS;
*rc->probs[rc->pos] = prob;
break;
}
case RC_BIT_1: {
probability prob = *rc->probs[rc->pos];
const uint32_t bound = prob * (rc->range
>> RC_BIT_MODEL_TOTAL_BITS);
rc->low += bound;
rc->range -= bound;
prob -= prob >> RC_MOVE_BITS;
*rc->probs[rc->pos] = prob;
break;
}
case RC_DIRECT_0:
rc->range >>= 1;
break;
case RC_DIRECT_1:
rc->range >>= 1;
rc->low += rc->range;
break;
case RC_FLUSH:
// Prevent further normalizations.
rc->range = UINT32_MAX;
// Flush the last five bytes (see rc_flush()).
do {
if (rc_shift_low(rc, out, out_pos, out_size))
return true;
} while (++rc->pos < rc->count);
// Reset the range encoder so we are ready to continue
// encoding if we weren't finishing the stream.
rc_reset(rc);
return false;
default:
assert(0);
break;
}
++rc->pos;
}
rc->count = 0;
rc->pos = 0;
return false;
}
static inline uint64_t
rc_pending(const lzma_range_encoder *rc)
{
return rc->cache_size + 5 - 1;
}
#endif