2
0
mirror of https://github.com/dpethes/rerogue.git synced 2025-06-07 18:58:32 +02:00
rerogue/model_viewer/util/dc2core.pas
2020-05-01 07:43:32 +02:00

1958 lines
55 KiB
ObjectPascal

unit dc2core;
interface
uses
sysutils, math;
type
TBitstreamBufferState = record
current_bits: longword;
mask: longword;
end;
{ TBitstreamWriter }
TBitstreamWriter = class
private
buffer: plongword;
cur: plongword;
mask: longword;
closed: boolean;
public
constructor Create(const memory_buffer: pbyte);
destructor Destroy; override;
procedure Close;
function IsByteAligned: boolean;
procedure ByteAlign;
function GetBitSize: longword;
function GetByteSize: longword;
function GetUnbufferedByteSize: longword;
function GetDataStart: pbyte;
procedure Write(const bit: integer);
procedure Write(const bits, length: longword); //write multiple bits, lsb first
function GetState: TBitstreamBufferState;
procedure SetState(const state: TBitstreamBufferState);
procedure ResetBufferPosition;
end;
{ TBitstreamReader }
TBitstreamReader = class
private
buffer: plongword;
cur: plongword;
used: longword;
public
constructor Create(const memory_buffer: pbyte);
function GetPosition(): longword;
function GetBitCount(): longword;
function GetUncachedPosition(): longword;
function IsByteAligned(): boolean;
function Read(): longword; {$ifdef bs_inline} inline; {$endif}
function Read(count: longword): longword;
function Show(const count: longword): longword;
procedure Skip(const count: longword);
function ReadInverse(bit_count: longword): longword;
function GetState: TBitstreamBufferState;
procedure SetState(const state: TBitstreamBufferState);
procedure ResetBufferPosition;
function GetInternalState: TBitstreamBufferState;
procedure SetInternalState(const state: TBitstreamBufferState);
end;
function SwapBits (const bits, bit_count: longword): longword;
const
MAX_BLOCK_SIZE = 32 * 1024;
type
TBlockTypeEnum = (BTRaw := 0, BTFixed, BTDynamic, BTError);
//statistiky pre enkodovanie
stats_t = record
offsets_sum, offsets_used, //sucet/pocet offsetov
matches_sum, elements_encoded: int64; // -||- zhod
blocks: array[TBlockTypeEnum] of longword;
end;
type
{ TSlidingBuffer }
TSlidingBuffer = object
private
_buffer: pbyte;
_previous_bytes_count: integer;
public
constructor Init();
destructor Done;
function GetWindow: pbyte; inline;
procedure InsertData(const data: pbyte; const size: integer);
end;
const
MAX_COMPRESSION_LEVEL = 7;
DICT_SIZE = MAX_BLOCK_SIZE;
MAX_DEFLATE_MATCH_LENGTH = 258;
type
TSearchResult = record
distance,
length: word;
end;
{ TMatchSearcher }
TMatchSearcher = class
private
_max_search_depth: integer; //limit how many positions we want to check
_max_search_match_length: integer; //limit how long match needs to be to satisfy search conditions
_links: pinteger; //linked list of hash occurences
_last_seen_idx: pinteger; //last known position of a hash in the stream
_bytes_processed: integer;
_current_chunk_size: integer;
_sbuffer: TSlidingBuffer; //sliding buffer for search window data
function Search(const window_end_ptr, str: pbyte; const current_idx, max_match_length: integer
): TSearchResult;
public
constructor Create;
destructor Destroy; override;
procedure SetCompressionLevel(const level: integer);
{ New chunk of data that to be processed. }
procedure NewData (const data: pbyte; const size: integer);
{
Find previous occurence of bytes in str.
str - searched data pointer
data_index - searched data index relative to current chunk
}
function FindMatch(const str: pbyte; const data_index: integer): TSearchResult;
end;
const
//konstanty pre strom
END_OF_STREAM = 285;
TOP_NODE = END_OF_STREAM * 2 + 1;
type
//huff tree definitions
tree_node_t = record
weight: longword;
child_0: word;
child_1: word;
end;
tree_node_p = ^tree_node_t;
vlc_code_t = record
bits: word;
code_len: byte;
end;
vlc_code_p = ^vlc_code_t;
huff_tree_t = record
counts: plongword;
nodes: tree_node_p;
codes: vlc_code_p;
root_node: longword;
end;
//fixed huffcodes are actually constructed using codes <0..287>, but last 2 are never used
TDecodeTable = record
codes_of_legth: array[0..15] of word; //number of codes for given length
code_value: array[0..END_OF_STREAM + 2] of word; //map code to literal/length value
end;
PDecodeTable = ^TDecodeTable;
procedure huff_FillCanonDecodingTable(var tab: TDecodeTable; const code_lengths: pbyte; const count: integer);
procedure huff_code2canon(const codes: vlc_code_p);
procedure huff_init (out h: huff_tree_t);
procedure huff_free (h: huff_tree_t);
procedure huff_raise_count (var h: huff_tree_t; const val: word); inline;
procedure huff_build_tree (var h: huff_tree_t; max_cnt: word = 255);
procedure huff_build_distance_tree (var h: huff_tree_t; max_cnt: word = 255);
const
END_OF_BLOCK = 1000; //must not collide with any valid Deflate values
END_OF_BLOCK_CODE = 256;
type
{ TVlcWriter }
TVlcWriter = object
private
bs: TBitstreamWriter;
len_tree, dist_tree: vlc_code_p;
public
procedure SetTrees(const bitstream: TBitstreamWriter; const length_tree, distance_tree: vlc_code_p);
procedure WriteMatch (const len, dist: longword);
procedure WriteLiteral (const c: byte);
procedure WriteBlockEnd ();
end;
TSymbolBits = record
symbol: word;
nbits: byte;
end;
const
TAB0_BITS = 9; //LUT bits, must be less or equal to maximum bit length of huff codes allowed by Deflate
type
TDecodeLookupTables = record
codes_t0: array[0..511] of TSymbolBits; //9 bits = 512 values
canon_table: TDecodeTable;
end;
{ TVlcReader }
TVlcReader = class
private
bs: TBitstreamReader;
literal_dectable, distance_dectable: TDecodeLookupTables;
public
procedure SetTables(const bitreader: TBitstreamReader; const literal_table, distance_table: TDecodeLookupTables);
procedure ReadCodePair (out length, distance: word);
end;
function Length2code (const len: longword): longword;
function Distance2code(const dist: longword): longword;
function vlc_ReadCode(const bs: TBitstreamReader; const table: TDecodeTable): integer;
function vlc_ReadCode(const bs: TBitstreamReader; const dectable: TDecodeLookupTables): integer;
function InitDecodeLut(const code_lengths: pbyte; const count: integer): TDecodeLookupTables;
const
MIN_MATCH_LENGTH = 3;
type
TLiteralMatch = record
match_length: word; //match length
offset: word; //match offset
literal: byte; //byte from input stream
end;
PLiteralMatch = ^TLiteralMatch;
{ TBlockWriter }
TBlockWriter = class
private
bitWriter: TBitstreamWriter; //bitstream writer
literal_match_stats: pinteger;
distance_stats: pinteger;
literal_codes: vlc_code_p;
distance_codes: vlc_code_p;
_block_type: TBlockTypeEnum;
_last: boolean; //last block in stream
bs_cache: TBitstreamBufferState; //state of the buffer at beginning of the block
procedure BeginBlock;
procedure BuildHuffCodes;
procedure BuildFixedHuffCodes;
procedure WriteCodingTrees;
procedure WriteBlockEncoded(const search_results: PLiteralMatch; const size: integer);
procedure WriteBlockRaw(const rawdata: pbyte; const rawsize: integer);
public
constructor Create(const output_buffer: pbyte);
destructor Destroy; override;
procedure InitNewBlock(const block_type: TBlockTypeEnum);
procedure SetLast;
procedure UpdateStatsMatch(const len, dist: longword); inline;
procedure UpdateStatsLiteral(const literal: byte); inline;
procedure WriteBlock(const rawdata: pbyte; const rawsize: integer;
const search_results: PLiteralMatch; const size: integer; const keep_buffer: boolean = false);
procedure Done;
function GetStreamSize: integer;
end;
TBlockContext = record
btype: TBlockTypeEnum;
size: integer;
unfinished: boolean;
last: boolean; //last block flag
end;
{ TBlockReader }
TBlockReader = class
private
_block_type: TBlockTypeEnum;
_vlc: TVlcReader;
procedure ReadHeaderCodes(const bs: TBitstreamReader);
procedure InitFixedCodes(const bs: TBitstreamReader);
public
constructor Create;
destructor Destroy; override;
function ReadBlockHeader(const bs: TBitstreamReader): TBlockContext;
function GetVlcReader: TVlcReader; inline;
end;
(*******************************************************************************
*******************************************************************************)
implementation
{ SwapBits
Swap bit ordering in source pattern. Swaps up to 16 bits.
}
function SwapBits (const bits, bit_count: longword): longword;
var
x: longword;
begin
x := bits;
x := ((x and $aaaaaaaa) >> 1) or ((x and $55555555) << 1);
x := ((x and $cccccccc) >> 2) or ((x and $33333333) << 2);
x := ((x and $f0f0f0f0) >> 4) or ((x and $0f0f0f0f) << 4);
x := ((x and $ff00ff00) >> 8) or ((x and $00ff00ff) << 8);
result := x >> (16 - bit_count);
end;
{ TBitstreamWriter }
constructor TBitstreamWriter.Create(const memory_buffer: pbyte);
begin
buffer := plongword (memory_buffer);
cur := buffer;
cur^ := 0;
mask := 0;
end;
destructor TBitstreamWriter.Destroy;
begin
if not closed then
Close;
inherited Destroy;
end;
function TBitstreamWriter.GetBitSize: longword;
begin
result := 32 * (cur - buffer) + mask;
end;
function TBitstreamWriter.GetByteSize: longword;
begin
result := (cur - buffer) * 4;
result += (mask + 7) div 8; //+ buffer
end;
function TBitstreamWriter.GetUnbufferedByteSize: longword;
begin
result := (cur - buffer) * 4;
end;
function TBitstreamWriter.GetDataStart: pbyte;
begin
result := pbyte(buffer);
end;
procedure TBitstreamWriter.Close;
begin
end;
function TBitstreamWriter.IsByteAligned: boolean;
begin
result := mask mod 8 = 0;
end;
procedure TBitstreamWriter.ByteAlign;
begin
while not IsByteAligned do
Write(0);
end;
procedure TBitstreamWriter.Write(const bit: integer);
begin
cur^ := cur^ or longword((bit and 1) shl mask);
mask += 1;
if mask = 32 then begin
cur += 1;
cur^ := 0;
mask := 0;
end;
end;
procedure TBitstreamWriter.Write(const bits, length: longword);
var
bits_: longword;
begin
Assert(length <= 32, 'bit_count over 32');
//clear unused bits
bits_ := bits and ($ffffffff shr (32 - length));
cur^ := cur^ or (bits_ shl mask);
mask += length;
if mask >= 32 then begin
mask -= 32; //number of bits that didn't fit into buffer
cur += 1;
cur^ := 0;
if mask > 0 then
cur^ := bits_ shr (length - mask);
end;
end;
function TBitstreamWriter.GetState: TBitstreamBufferState;
begin
Result.mask := mask;
Result.current_bits := cur^;
end;
procedure TBitstreamWriter.SetState(const state: TBitstreamBufferState);
begin
mask := state.mask;
cur^ := state.current_bits;
end;
procedure TBitstreamWriter.ResetBufferPosition;
var
cache: TBitstreamBufferState;
begin
cache := GetState;
cur := buffer;
SetState(cache);
end;
{ TBitstreamReader }
constructor TBitstreamReader.Create(const memory_buffer: pbyte);
begin
buffer := plongword (memory_buffer);
cur := buffer;
used := 0;
end;
function TBitstreamReader.GetPosition: longword;
begin
result := (cur - buffer) << 2; //used dword count
result += (used + 7) shr 3; //+ buffer
end;
function TBitstreamReader.GetBitCount: longword;
begin
result := 32 * longword(cur - buffer) + used;
end;
function TBitstreamReader.GetUncachedPosition: longword;
begin
result := (cur - buffer) * 4; //used dword count
end;
function TBitstreamReader.IsByteAligned: boolean;
begin
result := true;
if used mod 8 > 0 then result := false;
end;
function TBitstreamReader.Read: longword;
begin
result := (cur^ shr used) and 1;
used += 1;
if used = 32 then begin
cur += 1;
used := 0;
end;
end;
function TBitstreamReader.Read(count: longword): longword;
var
bits_left: integer;
begin
result := cur^ shr used;
if count < (32 - used) then begin
result := result and ($ffffffff shr (32 - count));
used += count;
end else begin
bits_left := count - (32 - used);
cur += 1;
if bits_left > 0 then
result := result or (cur^ and ($ffffffff shr (32 - bits_left))) shl (32 - used);
used := bits_left;
end;
end;
function TBitstreamReader.Show(const count: longword): longword;
var
bits_left: integer;
begin
result := cur^ shr used;
if count < (32 - used) then begin
result := result and ($ffffffff shr (32 - count));
end else begin
bits_left := count - (32 - used);
if bits_left > 0 then
result := result or ((cur + 1)^ and ($ffffffff shr (32 - bits_left))) shl (32 - used);
end;
end;
procedure TBitstreamReader.Skip(const count: longword);
begin
if count < (32 - used) then begin
used += count;
end else begin
cur += 1;
used := count - (32 - used);
end;
end;
function TBitstreamReader.ReadInverse(bit_count: longword): longword;
var
i: integer;
begin
result := 0;
for i := bit_count - 1 downto 0 do
result := result or Read() shl i;
end;
function TBitstreamReader.GetState: TBitstreamBufferState;
begin
result.current_bits := cur^;
end;
procedure TBitstreamReader.SetState(const state: TBitstreamBufferState);
begin
cur^ := state.current_bits;
end;
procedure TBitstreamReader.ResetBufferPosition;
begin
cur := buffer;
end;
function TBitstreamReader.GetInternalState: TBitstreamBufferState;
begin
result.current_bits := cur - buffer;
result.mask := used;
end;
procedure TBitstreamReader.SetInternalState(const state: TBitstreamBufferState);
begin
cur := buffer + state.current_bits;
used := state.mask;
end;
{ TSlidingBuffer }
constructor TSlidingBuffer.Init();
begin
_buffer := getmem(2 * MAX_BLOCK_SIZE);
_buffer += MAX_BLOCK_SIZE;
_previous_bytes_count := 0;
end;
destructor TSlidingBuffer.Done;
begin
freemem(_buffer - MAX_BLOCK_SIZE);
end;
function TSlidingBuffer.GetWindow: pbyte;
begin
result := _buffer;
end;
procedure TSlidingBuffer.InsertData(const data: pbyte; const size: integer);
begin
Assert(size <= MAX_BLOCK_SIZE, 'cannot insert more data than allocated range');
if _previous_bytes_count > 0 then
move((_buffer + _previous_bytes_count - MAX_BLOCK_SIZE)^,
(_buffer - MAX_BLOCK_SIZE)^,
MAX_BLOCK_SIZE);
move(data^, _buffer^, size);
_previous_bytes_count := size;
end;
const
SEARCH_DEPTH: array[0..MAX_COMPRESSION_LEVEL] of Integer = (0, 1, 8, 16, 32, 48, 64, 32*1024);
SEARCH_MATCH_DIVIDER: array[0..MAX_COMPRESSION_LEVEL] of Integer = (1, 4, 4, 4, 4, 4, 2, 1);
HASH_BITS = 18;
{
Generate 16bit hash from first 3 bytes of a given pointer
}
function hash3(const x: pbyte): integer; inline;
begin
result := ((x+2)^ shl 10) xor ((x+1)^ shl 5) xor x^;
end;
{ TMatchSearcher }
constructor TMatchSearcher.Create;
begin
_sbuffer.Init();
_max_search_depth := SEARCH_DEPTH[0];
_max_search_match_length := MAX_DEFLATE_MATCH_LENGTH div SEARCH_MATCH_DIVIDER[0];
_links := getmem(2 * DICT_SIZE * sizeof(integer));
_last_seen_idx := getmem(1 shl HASH_BITS * sizeof(integer)); //must be equal to hash bits
Filldword(_last_seen_idx^, 1 shl HASH_BITS, $ffffffff ); //negative indices don't get searched, so use -1
_current_chunk_size := 0;
_bytes_processed := 0;
end;
destructor TMatchSearcher.Destroy;
begin
freemem(_links);
freemem(_last_seen_idx);
_sbuffer.Done;
inherited;
end;
procedure TMatchSearcher.SetCompressionLevel(const level: integer);
begin
Assert(level <= MAX_COMPRESSION_LEVEL, 'invalid compression level');
_max_search_depth := SEARCH_DEPTH[level];
_max_search_match_length := MAX_DEFLATE_MATCH_LENGTH div SEARCH_MATCH_DIVIDER[level];
end;
{
Take next data chunk and create links between the occurences of the same hash
}
procedure TMatchSearcher.NewData(const data: pbyte; const size: integer);
var
i, key, last_seen: integer;
begin
_sbuffer.InsertData(data, size);
_bytes_processed += _current_chunk_size;
_current_chunk_size := size;
move((_links + DICT_SIZE)^, _links^, DICT_SIZE * sizeof(integer));
for i := 0 to size - 1 do begin
key := hash3(data + i);
last_seen := _last_seen_idx[key];
_links[DICT_SIZE + i] := last_seen;
_last_seen_idx[key] := i + _bytes_processed;
end;
end;
{
Compare strings, return length of the match. Loop at the last byte of the window.
}
function compare_strings_loop(const window, string_data: pbyte;
const max_match_length, window_size: integer): integer;
var
i, k: integer;
begin
result := 0;
i := 0;
for k := 0 to max_match_length - 1 do begin
if window[i] = string_data[k] then
result += 1
else
exit;
i += 1;
if i = window_size then
i := 0;
end;
end;
{
Compare strings, return length of the match.
There must be at least max_match_length valid bytes in window.
}
function compare_strings(const window, string_data: pbyte;
const max_match_length: integer): integer;
var
i: integer;
begin
result := 0;
for i := 0 to max_match_length - 1 do
if window[i] = string_data[i] then
result += 1
else
exit;
end;
{
Compare last byte of the window against current string.
}
function compare_strings_rle(const string_data: pbyte; const byte_value, max_match_length: integer): integer;
var
i: integer;
begin
result := 0;
for i := 0 to max_match_length - 1 do
if byte_value = string_data[i] then
result += 1
else
exit;
end;
function InitSearchResult(const distance, best_match: longword): TSearchResult; inline;
begin
longword(result) := longword( best_match << 16 or distance );
end;
function TMatchSearcher.Search(const window_end_ptr, str: pbyte;
const current_idx, max_match_length: integer): TSearchResult;
var
i: integer;
links: pinteger;
best_match_distance: integer;
best_match_length: integer;
last_seen_idx: integer;
min_allowed_idx: integer;
previous_idx: integer;
length: integer;
distance: integer;
begin
Assert(max_match_length >= 3);
//test if searched string is a repetition of the last byte before full search
best_match_length := compare_strings_rle(str, window_end_ptr[-1], max_match_length);
result := InitSearchResult(1, best_match_length);
if best_match_length >= _max_search_match_length then
exit;
last_seen_idx := current_idx - _bytes_processed;
links := _links + DICT_SIZE;
best_match_distance := 1;
min_allowed_idx := max(0, current_idx - DICT_SIZE);
//early termination if links of the next searched position are much closer than current ones
if links[last_seen_idx] < links[last_seen_idx + 1] - (DICT_SIZE shr 1) then
exit;
for i := _max_search_depth - 1 downto 0 do begin
//if the position falls out of the sliding window_end_ptr range, it's too old and cannot be searched
previous_idx := links[last_seen_idx];
if previous_idx < min_allowed_idx then begin
break;
end;
last_seen_idx := previous_idx - _bytes_processed;
//compare data at given positions
distance := current_idx - previous_idx;
if previous_idx + max_match_length < current_idx then
length := compare_strings(window_end_ptr - distance, str, max_match_length)
else
length := compare_strings_loop(window_end_ptr - distance, str, max_match_length, distance);
if length > best_match_length then begin
best_match_length := length;
best_match_distance := distance;
if length >= _max_search_match_length then
break;
end;
end;
Assert(best_match_distance >= 0);
result := InitSearchResult(best_match_distance, best_match_length);
end;
{
Find best match between current bytes and bytes already seen.
If distance = 0 & length = 0 - no occurences were found
}
function TMatchSearcher.FindMatch(const str: pbyte; const data_index: integer): TSearchResult;
var
max_match_length: integer;
current_idx: integer;
window_end_ptr: pbyte;
begin
result := InitSearchResult(0, 0);
//reduce maximum possible match length at the end of the stream
//we need at least 3 bytes to be able to run search (hash function takes 3 bytes as input)
max_match_length := min(MAX_DEFLATE_MATCH_LENGTH, _current_chunk_size - data_index);
if max_match_length <= 2 then
exit;
//beginning of a stream, nothing to search
if _bytes_processed + data_index = 0 then
exit;
//get proper search window and currently searched string's file index
window_end_ptr := _sbuffer.GetWindow + data_index;
current_idx := _bytes_processed + data_index;
result := Search(window_end_ptr, str, current_idx, max_match_length);
end;
{ huff_FillCanonDecodingTable
code_lengths - array of lengths, indexed by code
count - number of codes to fill
}
procedure huff_FillCanonDecodingTable(var tab: TDecodeTable; const code_lengths: pbyte; const count: integer);
var
len: integer; //current length; all deflate code lengths are between 1 and 15
same_length_count: integer;
i, j: integer;
begin
j := 0;
tab.codes_of_legth[0] := 0;
for len := 1 to 15 do begin
same_length_count := 0;
for i := 0 to count - 1 do begin
if code_lengths[i] = len then begin
tab.code_value[j] := i;
j += 1;
same_length_count += 1;
end;
end;
tab.codes_of_legth[len] := same_length_count;
end;
end;
(*
huff_code2canon
zmen huffkody na kanonicke huffkody
*)
procedure huff_code2canon(const codes: vlc_code_p);
var
len: integer;
b, i: integer;
begin
b := 0;
for len := 1 to 15 do begin
for i := 0 to END_OF_STREAM do begin
if codes[i].code_len = len then begin
codes[i].bits := b;
b += 1;
end;
end;
b := b shl 1;
end;
end;
(*******************************************************************************
generovanie stromu
*)
//huff_init
procedure huff_init (out h: huff_tree_t);
var
i: integer;
begin
i := sizeof(longword) * (END_OF_STREAM + 1);
h.counts := getmem(i);
FillByte(h.counts^, i, 0);
i := sizeof(tree_node_t) * (END_OF_STREAM + 1) * 2;
h.nodes := getmem(i);
FillByte(h.nodes^, i, 0);
i := sizeof(vlc_code_t) * (END_OF_STREAM + 1);
h.codes := getmem(i);
FillByte(h.codes^, i, 0);
h.root_node := 0;
end;
//huff_free
procedure huff_free (h: huff_tree_t);
begin
freemem( h.counts );
freemem( h.nodes );
freemem( h.codes );
end;
procedure huff_raise_count(var h: huff_tree_t; const val: word);
begin
h.counts[val] += 1;
end;
(*******************************************************************************
scale_counts
*)
procedure scale_counts(var params: huff_tree_t; const max_cnt: word);
var
counts: plongword;
b: integer;
max: longword;
new: longword;
ratio: single;
begin
counts := params.counts;
max := 0;
for b := 0 to END_OF_STREAM do
if counts[b] > max then max := counts[b];
if max <= max_cnt then exit;
ratio := single( max ) / max_cnt;
for b := 0 to END_OF_STREAM do
if counts[b] > 0 then begin
new := round( counts[b] / ratio );
if new = 0 then
counts[b] := 1
else
counts[b] := new;
end;
end;
(*******************************************************************************
build_tree
zostavenie huffmanovho stromu, nastavenie indexu korenoveho uzla stromu
*)
procedure build_tree (var params: huff_tree_t);
var
nodes: tree_node_p;
counts: plongword;
next_free: integer;
i: integer;
min_1,
min_2: integer;
begin
nodes := params.nodes;
counts := params.counts;
for i := 0 to END_OF_STREAM do nodes[i].weight := counts[i];
nodes[TOP_NODE].weight := High(word);
next_free := END_OF_STREAM;
while true do begin
next_free := next_free + 1;
min_1 := TOP_NODE;
min_2 := TOP_NODE;
for i := 0 to next_free - 1 do
if nodes[i].weight > 0 then begin
if nodes[i].weight < nodes[min_1].weight then begin
min_2 := min_1 ;
min_1 := i ;
end else
if nodes[i].weight < nodes[min_2].weight then min_2 := i;
end;
if min_2 = TOP_NODE then break;
nodes[next_free].weight := nodes[min_1].weight + nodes[min_2].weight;
nodes[next_free].child_0 := min_1;
nodes[next_free].child_1 := min_2;
nodes[min_1].weight := 0;
nodes[min_2].weight := 0;
end;
params.root_node := next_free - 1;
end;
(*******************************************************************************
tree_to_code
z korenoveho uzlu rekurzivne postupuj cez jednotlive listy
a zapis cestu ako VLC kod + pocet jeho bitov do tabulky
*)
procedure tree_to_code (var p: huff_tree_t; code_len_current, node: integer);
begin
if node <= END_OF_STREAM then
p.codes[node].code_len := code_len_current
else begin
code_len_current := code_len_current + 1;
tree_to_code (p, code_len_current, p.nodes[node].child_0 );
tree_to_code (p, code_len_current, p.nodes[node].child_1 );
end;
end;
(*******************************************************************************
huff_build_tree
zostav huffmanov strom, vrat index korenoveho uzla stromu
*)
procedure huff_build_tree (var h: huff_tree_t; max_cnt: word = 255);
begin
scale_counts (h, max_cnt);
build_tree (h);
tree_to_code (h, 0, h.root_node);
huff_code2canon (h.codes);
end;
//special case, when there is only one symbol in alphabet - can happen with distance trees
procedure huff_build_distance_tree (var h: huff_tree_t; max_cnt: word = 255);
const
MAX_DIST_CODES = 32;
var
i: integer;
last_nonzero_idx: integer;
used_symbols_count: integer;
begin
used_symbols_count := 0;
last_nonzero_idx := 0;
for i := 0 to MAX_DIST_CODES - 1 do
if h.counts[i] > 0 then begin
used_symbols_count += 1;
if used_symbols_count > 1 then
break;
last_nonzero_idx := i;
end;
if used_symbols_count > 1 then begin
huff_build_tree(h, max_cnt);
end else begin
h.codes[last_nonzero_idx].code_len := 1;
h.codes[last_nonzero_idx].bits := 0;
h.root_node := last_nonzero_idx;
end
end;
{ Length2code
Map match length value to length code for huff encoding.
}
function Length2code (const len: longword): longword;
const
table: array[byte] of byte = (
1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14,
14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29
);
begin
Assert(len >= 3);
result := 256 + table[len-3]; //0..255 = literals, 256 = block end
end;
{ Code2length
Map decoded length code to length value.
}
function Code2length(const code: longword): longword; inline;
const
table: array[0..28] of byte = (
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67,
83, 99, 115, 131, 163, 195, 227, 0
);
begin
result := table[ code - 257 ];
if result = 0 then result := 258;
end;
{ Distance2code
Map distance value to distance code for huff encoding.
}
function Distance2code(const dist: longword): longword;
const
table_512: array [0..511] of byte = (
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9,
9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17
);
table_128: array[2..127] of byte = (
18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24,
24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29
);
begin
if dist <= 512 then
result := table_512[dist - 1]
else begin
result := table_128[(dist - 1) shr 8];
end;
end;
{ Code2distance
Map decoded distance code to distance value.
}
function Code2distance(const code: longword): longword; inline;
const
table: array[0..29] of word = (
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
);
begin
result := table[ code ];
end;
{ TVlcWriter }
procedure TVlcWriter.SetTrees(const bitstream: TBitstreamWriter;
const length_tree, distance_tree: vlc_code_p);
begin
bs := bitstream;
len_tree := length_tree;
dist_tree := distance_tree;
end;
procedure TVlcWriter.WriteMatch(const len, dist: longword);
var
code, bits: longword;
begin
//length
code := Length2code(len);
bs.Write(len_tree[code].bits, len_tree[code].code_len);
if (code >= 265) and (code < 285) then begin //extra bits
bits := 5 - (284 - code) div 4;
bs.Write(len - 3, bits);
end;
//offset / distance
code := Distance2code(dist);
bs.Write(dist_tree[code].bits, dist_tree[code].code_len);
if code >= 4 then begin
bits := code div 2 - 1;
bs.Write(dist - 1, bits);
end;
end;
procedure TVlcWriter.WriteLiteral(const c: byte);
begin
bs.Write(len_tree[c].bits, len_tree[c].code_len);
end;
procedure TVlcWriter.WriteBlockEnd;
begin
bs.Write(len_tree[END_OF_BLOCK_CODE].bits, len_tree[END_OF_BLOCK_CODE].code_len);
end;
{ vlc_ReadCode
Read one canonical huffman code using the given decoding table. Maximum symbol length cannot
exceed 15 bits (maximum allowed by Deflate), otherwise reading fails and bad things happen.
}
function vlc_ReadCode(const bs: TBitstreamReader; const table: TDecodeTable): integer;
var
i, codes,
diff, value: longword;
value_low: longword; //lowest value for code of given length
codes_skipped: longword; //how many codes we already skipped
number_of_codes: pword; //# codes of given length
begin
i := 0;
value := 0;
codes_skipped := 0;
value_low := 0;
codes := 0;
number_of_codes := @table.codes_of_legth[0];
repeat
codes_skipped += codes;
value_low += codes;
value_low := value_low shl 1;
i += 1;
Assert(i < 16, 'could not read vlc code');
codes := number_of_codes[i];
value := (value shl 1) or bs.read();
diff := value - value_low;
until codes > diff;
result := table.code_value[ codes_skipped + diff ];
end;
{ vlc_ReadCode
Read one variable-length code using the given lookup table. If the code couldn't be read, try
to read with the canon huff decoding table.
}
function vlc_ReadCode(const bs: TBitstreamReader; const dectable: TDecodeLookupTables): integer;
var
bits: integer;
sb: TSymbolBits;
begin
bits := bs.Show(TAB0_BITS);
sb := dectable.codes_t0[bits];
result := sb.symbol;
if (sb.nbits = 0) then begin
result := vlc_ReadCode(bs, dectable.canon_table);
end else
bs.Skip(sb.nbits);
end;
{ InitDecodeLut
Assign canonical huff code bits to each code by its length and build a look-up table for fast
decoding. Uses separate code bits runs for each code length. Makes 2 passes over input data,
one pass could be removed if code length stats were provided beforehand, but it doesn't gain much.
}
function InitDecodeLut(const code_lengths: pbyte; const count: integer): TDecodeLookupTables;
var
i, len, code_bits: integer;
value, k, b: integer;
sb: TSymbolBits;
num_lengths: array[0..15] of integer; //# of codes of given length
length_bits: array[0..15] of integer; //canonical bits for codes of given length
begin
FillByte(num_lengths, sizeof(num_lengths), 0);
FillByte(length_bits, sizeof(length_bits), 0);
for i := 0 to count - 1 do begin
num_lengths[code_lengths[i]] += 1;
end;
b := 0;
for i := 1 to 15 do begin
length_bits[i] := b;
b += num_lengths[i];
b := b << 1;
end;
FillByte(result.codes_t0, sizeof(result.codes_t0), 0);
for i := 0 to count - 1 do begin
len := code_lengths[i];
if not (len in [1..TAB0_BITS]) then
continue;
code_bits := length_bits[len];
length_bits[len] += 1;
sb.symbol := i;
sb.nbits := len;
//insert each code length + junk code_bits combination
code_bits := SwapBits(code_bits, len);
for k := 0 to 1 << (TAB0_BITS - len) - 1 do begin
value := (k << len) or code_bits;
result.codes_t0[value] := sb;
end;
end;
end;
{ TVlcReader }
procedure TVlcReader.SetTables(const bitreader: TBitstreamReader;
const literal_table, distance_table: TDecodeLookupTables);
begin
bs := bitreader;
literal_dectable := literal_table;
distance_dectable := distance_table;
end;
procedure TVlcReader.ReadCodePair(out length, distance: word);
const
LITERAL_EXTRA_BITS: array[257..285] of byte = (
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
);
var
code, extra_bits: longword;
begin
length := 1;
code := vlc_ReadCode(bs, literal_dectable);
//decode literals, length / distance, end of block
if code < 256 then begin
distance := code;
end
else if code > 256 then begin
length := Code2length(code);
extra_bits := LITERAL_EXTRA_BITS[code];
if extra_bits > 0 then begin
length += bs.Read(extra_bits);
end;
code := vlc_ReadCode(bs, distance_dectable);
distance := Code2distance(code);
if code >= 4 then begin
distance += bs.Read(code >> 1 - 1);
end;
end
else begin
length := END_OF_BLOCK;
distance := 0;
end;
end;
const
//code ordering for header code length alphabet
//see RFC1951 section 3.2.7. Compression with dynamic Huffman codes (BTYPE=10)
HeaderCodeLengthOrder: array[0..18] of byte = (
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
);
LITERAL_MATCH_ELEMENTS = END_OF_STREAM + 1;
DISTANCE_ELEMENTS = 30;
type
TRleResult = record
size: integer;
nonzero: integer;
rl_pairs: array[0..LITERAL_MATCH_ELEMENTS-1] of record
code_len, repeats: byte;
end;
code_lengths: array[0..LITERAL_MATCH_ELEMENTS-1] of byte;
end;
{
Build length-limited huffman code tree.
Length is limited by reducing the code occurence's statistics.
Less accuracy means that the differences between code lengths are reduced, too.
This is somewhat suboptimal.
Distance trees are special, because there are cases where they contain only one used symbol.
}
procedure build_limited_tree(var tree: huff_tree_t; limit, size: word; const for_distance: boolean = false);
var
i: integer;
tree_ok: boolean;
freq_limit: integer;
begin
freq_limit := 256*2;
repeat
if for_distance then
huff_build_distance_tree(tree, freq_limit)
else
huff_build_tree(tree, freq_limit);
tree_ok := true;
for i := 0 to size - 1 do
if tree.codes[i].code_len > limit then begin
tree_ok := false;
freq_limit := freq_limit shr 1;
break;
end;
until tree_ok;
//reverse bits for faster bitwriting
for i := 0 to size - 1 do begin
tree.codes[i].bits := SwapBits(tree.codes[i].bits, tree.codes[i].code_len);
end;
end;
{
WriteCodeLengths
Write lengths of tree's codes using, encoded by header_codes
}
procedure WriteCodeLengths
(const bs: TBitstreamWriter; const header_codes, tree_codes: vlc_code_p; const size: integer);
var
i, k: integer;
bits, length: longword;
begin
for i := 0 to size - 1 do begin
k := tree_codes[i].code_len;
bits := header_codes[k].bits;
length := header_codes[k].code_len;
bs.Write(bits, length);
end;
end;
procedure WriteCodeLengthsRle
(const bs: TBitstreamWriter; const header_codes: vlc_code_p; const rle_res: TRleResult);
var
i, k: integer;
bits, length: longword;
size: integer;
begin
size := rle_res.size;
i := 0;
while i < size do begin
k := rle_res.rl_pairs[i].code_len;
bits := header_codes[k].bits;
length := header_codes[k].code_len;
bs.Write(bits, length);
if k = 16 then begin //Copy the previous code length, the next 2 bits indicate repeat length
bs.Write(rle_res.rl_pairs[i].repeats, 2);
end
else if k = 17 then begin //Repeat a code length of 0, 3 bits of length
bs.Write(rle_res.rl_pairs[i].repeats, 3);
end
else if k = 18 then begin //Repeat a code length of 0, 7 bits of length
bs.Write(rle_res.rl_pairs[i].repeats, 7);
end;
i += 1;
end;
end;
{
GetHclen
Reduce the number of header tree code lengths that need to be stored.
Saves a couple of bits per block.
}
function GetHclen(const min_length, max_length: integer): integer;
var
i, length: integer;
begin
result := 19;
for i := 18 downto 5 do begin
length := HeaderCodeLengthOrder[i];
if (length >= min_length) and (length <= max_length) then
break
else
result -= 1;
end;
result -= 4;
end;
function RleCodeBuffer(const src: array of byte; const size: integer): TRleResult;
var
i, k, m: integer;
value, next_value: byte;
max_lookahead: integer;
run_length: integer;
nonzero: integer;
procedure AddRl(const code_len, repeats: byte);
begin
result.rl_pairs[m].code_len := code_len;
result.rl_pairs[m].repeats := repeats;
m += 1;
end;
begin
nonzero := 0;
i := 0;
m := 0;
while i < size do begin
value := src[i];
//zero runs
if value = 0 then begin
max_lookahead := 137;
if size - i < max_lookahead then
max_lookahead := size - i;
run_length := 1;
for k := i + 1 to i + max_lookahead - 1 do begin
next_value := src[k];
if value = next_value then
run_length += 1
else
break;
end;
if run_length < 3 then begin
AddRl(0, 0);
i += 1;
end else begin
i += run_length;
if run_length < 11 then
AddRl(17, run_length - 3) //Repeat a code length of 0 for 3 - 10 times.
else
AddRl(18, run_length - 11); //Repeat a code length of 0 for 11 - 138 times.
end;
end
//nonzero runs
else begin
max_lookahead := 6;
if size - i <= max_lookahead then
max_lookahead := size - i - 1;
run_length := 0;
for k := i + 1 to i + max_lookahead do begin
next_value := src[k];
if value = next_value then
run_length += 1
else
break;
end;
if run_length >= 3 then begin
AddRl(value, 0);
AddRl(16, run_length - 3); //Copy the previous code length 3 - 6 times.
i += run_length + 1;
nonzero += run_length + 1;
end else begin
AddRl(value, 0);
i += 1;
nonzero += 1;
end;
end;
end;
result.size := m;
result.nonzero := nonzero;
for i := 0 to result.size - 1 do
result.code_lengths[i] := result.rl_pairs[i].code_len;
end;
{ TBlockWriter }
{
BeginBlock
Write block header.
header bytes:
BFINAL - 1 bit
BTYPE - 2 bits
}
procedure TBlockWriter.BeginBlock;
begin
bitWriter.Write(longword( _last ) and 1);
bitWriter.Write(longword( _block_type ), 2);
end;
{ BuildFixedHuffCodes
Create vlc trees for blocks compressed using fixed Huffman codes
}
procedure TBlockWriter.BuildFixedHuffCodes;
var
i, bits: integer;
function vlc(const b, len: integer): vlc_code_t;
begin
result.bits := SwapBits(b, len);
result.code_len := len;
bits += 1;
end;
begin
bits := 0;
for i := 256 to 279 do literal_codes[i] := vlc(bits, 7);
bits := bits << 1;
for i := 0 to 143 do literal_codes[i] := vlc(bits, 8);
for i := 280 to 287 do literal_codes[i] := vlc(bits, 8);
bits := bits << 1;
for i := 144 to 255 do literal_codes[i] := vlc(bits, 9);
for i := 0 to 29 do distance_codes[i] := vlc(i, 5);
end;
{ BuildHuffCodes
Create vlc trees from accumulated statistics for literal/length and distance coding
}
procedure TBlockWriter.BuildHuffCodes;
var
i: integer;
tree: huff_tree_t;
begin
//generate literal/match codes
huff_init(tree);
for i := 0 to LITERAL_MATCH_ELEMENTS - 1 do
tree.counts[i] := literal_match_stats[i];
tree.counts[END_OF_BLOCK_CODE] := 1;
build_limited_tree(tree, 15, LITERAL_MATCH_ELEMENTS);
Move(tree.codes ^, literal_codes ^, LITERAL_MATCH_ELEMENTS * sizeof(vlc_code_t));
huff_free(tree);
//generate distance codes
huff_init(tree);
for i := 0 to DISTANCE_ELEMENTS - 1 do
tree.counts[i] := distance_stats[i];
build_limited_tree(tree, 15, DISTANCE_ELEMENTS, true);
Move(tree.codes ^, distance_codes ^, DISTANCE_ELEMENTS * sizeof(vlc_code_t));
huff_free(tree);
end;
{
WriteCodingTrees
Store literal/length and distance trees.
Canonical huff coding is used, so code lengths are enough to store the trees.
}
procedure TBlockWriter.WriteCodingTrees;
var
max_used_codelength, min_used_codelength: integer;
block_header_tree: huff_tree_t;
procedure UpdateCodeLengthStats(const code_lengths: array of byte; const size: integer);
var
i: integer;
length: integer;
begin
i := 0;
while i < size do begin
length := code_lengths[i];
huff_raise_count(block_header_tree, length);
i += 1;
if (length < min_used_codelength) and (length > 0) then min_used_codelength := length;
if length > max_used_codelength then max_used_codelength := length;
end;
end;
var
header_codes: vlc_code_p;
hclen, hlit, hdist: integer;
i: integer;
codelen_buffer: array[0..LITERAL_MATCH_ELEMENTS - 1] of byte;
distance_rle: TRleResult;
literal_rle: TRleResult;
begin
//build header tree for coding the literal/length and distance tree code lengths
max_used_codelength := 0;
min_used_codelength := 15;
huff_init(block_header_tree);
for i := 0 to LITERAL_MATCH_ELEMENTS - 1 do
codelen_buffer[i] := literal_codes[i].code_len;
literal_rle := RleCodeBuffer(codelen_buffer, LITERAL_MATCH_ELEMENTS);
UpdateCodeLengthStats(literal_rle.code_lengths, literal_rle.size);
for i := 0 to DISTANCE_ELEMENTS - 1 do
codelen_buffer[i] := distance_codes[i].code_len;
distance_rle := RleCodeBuffer(codelen_buffer, DISTANCE_ELEMENTS);
UpdateCodeLengthStats(distance_rle.code_lengths, distance_rle.size);
build_limited_tree(block_header_tree, 7, 19);
//store the header tree code lengths
//when do we want to define a smaller hdist? if we used a smaller encoding window, thus limiting the distances?
hlit := 286 - 257;
hdist := 29;
hclen := GetHclen(min_used_codelength, max_used_codelength);
bitWriter.Write(hlit, 5);
bitWriter.Write(hdist, 5);
bitWriter.Write(hclen, 4);
header_codes := block_header_tree.codes;
for i := 0 to hclen + 4 - 1 do
bitWriter.Write( header_codes[ HeaderCodeLengthOrder[i] ].code_len, 3 );
//store codes of literal/length and distance trees
WriteCodeLengthsRle(bitWriter, header_codes, literal_rle);
WriteCodeLengthsRle(bitWriter, header_codes, distance_rle);
huff_free(block_header_tree);
end;
{ WriteBlockEncoded
Write a complete block into bitstream: literals and match length / distance pairs, END_OF_BLOCK symbol.
Handles distinction between blocks compressed using fixed or dynamic huff codes
}
procedure TBlockWriter.WriteBlockEncoded(const search_results: PLiteralMatch; const size: integer);
var
i: integer;
lm: TLiteralMatch;
vlc: TVlcWriter;
begin
if _block_type = BTDynamic then begin
BuildHuffCodes;
WriteCodingTrees();
end else
BuildFixedHuffCodes();
vlc.SetTrees(bitWriter, literal_codes, distance_codes);
for i := 0 to size - 1 do begin
lm := search_results[i];
if lm.match_length > 0 then
vlc.WriteMatch(lm.match_length, lm.offset)
else
vlc.WriteLiteral(lm.literal);
end;
vlc.WriteBlockEnd();
end;
{
WriteBlockRaw
Write a raw block into bitstream: copy input values.
Raw block header:
n bits - byte alignment
16 bits - data length
16 bits - inverted data length
}
procedure TBlockWriter.WriteBlockRaw(const rawdata: pbyte; const rawsize: integer);
var
i: integer;
begin
bitWriter.ByteAlign;
bitWriter.Write(rawsize, 16);
bitWriter.Write(rawsize xor $ffff, 16);
for i := 0 to rawsize - 1 do
bitWriter.Write(rawdata[i], 8); //todo: memcpy
end;
constructor TBlockWriter.Create(const output_buffer: pbyte);
begin
bitWriter := TBitstreamWriter.Create(output_buffer);
bs_cache := bitWriter.GetState;
literal_match_stats := GetMem(LITERAL_MATCH_ELEMENTS * sizeof(integer));
distance_stats := Getmem(DISTANCE_ELEMENTS * sizeof(integer));
literal_codes := GetMem(LITERAL_MATCH_ELEMENTS * sizeof(vlc_code_t));
distance_codes := GetMem(DISTANCE_ELEMENTS * sizeof(vlc_code_t));
end;
destructor TBlockWriter.Destroy;
begin
inherited Destroy;
bitWriter.Free;
Freemem(literal_match_stats);
Freemem(distance_stats);
Freemem(literal_codes);
Freemem(distance_codes);
end;
procedure TBlockWriter.InitNewBlock(const block_type: TBlockTypeEnum);
begin
FillDWord(literal_match_stats^, LITERAL_MATCH_ELEMENTS, 0);
FillDWord(distance_stats^, DISTANCE_ELEMENTS, 0);
bitWriter.SetState(bs_cache);
_block_type := block_type;
_last := false;
end;
procedure TBlockWriter.SetLast;
begin
_last := true;
end;
procedure TBlockWriter.UpdateStatsMatch(const len, dist: longword);
begin
literal_match_stats[ Length2code(len) ] += 1;
distance_stats[ Distance2code(dist) ] += 1;
end;
procedure TBlockWriter.UpdateStatsLiteral(const literal: byte);
begin
literal_match_stats[ literal ] += 1;
end;
procedure TBlockWriter.WriteBlock(const rawdata: pbyte; const rawsize: integer;
const search_results: PLiteralMatch; const size: integer; const keep_buffer: boolean);
begin
if not keep_buffer then
bitWriter.ResetBufferPosition;
BeginBlock();
if _block_type <> BTRaw then
WriteBlockEncoded(search_results, size)
else
WriteBlockRaw(rawdata, rawsize);
end;
procedure TBlockWriter.Done;
begin
//throw away bs cache
bs_cache := bitWriter.GetState;
end;
{
GetStreamSize
Returns number of whole bytes that were written into bitstream for current block.
The last written bit doesn't have to be at a byte aligned position,
so we need to cache the write buffer and mask to put the bits in the next processed block.
If the current block is the last one processed, the outstanding bits must be counted into the stream size
(they would be lost otherwise).
}
function TBlockWriter.GetStreamSize: integer;
begin
if not _last then begin
result := bitWriter.GetUnbufferedByteSize;
end else begin
bitWriter.Close;
result := bitWriter.GetByteSize;
end;
end;
{ TBlockReader }
{ ReadHeaderCodes
Read code lengths and generate tables for dynamic block decoding
}
procedure TBlockReader.ReadHeaderCodes(const bs: TBitstreamReader);
const
MAX_CODE_LENGTHS = 286 + 32; //# of Literal/Length codes + # of Distance codes
var
literal_dectable, //literal/length decoding table
distance_dectable: TDecodeLookupTables; //distance decoding table
hlit, hdist, hclen: word;
len, last_len: integer; //code length, previous code length
i, k, extra_bits: longword;
code_lengths: array[0..MAX_CODE_LENGTHS-1] of byte;
dt: TDecodeLookupTables;
begin
hlit := bs.Read(5) + 257;
hdist := bs.Read(5) + 1;
hclen := bs.Read(4) + 4;
//get code_len codes
FillByte(code_lengths, 19, 0);
for i := 0 to hclen - 1 do begin
k := HeaderCodeLengthOrder[i];
code_lengths[k] := bs.Read(3);
end;
dt := InitDecodeLut(code_lengths, 19);
//decode symbols
FillByte(code_lengths, MAX_CODE_LENGTHS, 0);
i := 0;
last_len := 16;
while i < hlit + hdist do begin
len := vlc_ReadCode(bs, dt);
if len < 16 then begin
code_lengths[i] := len;
i += 1;
last_len := len;
end
else
case len of
16: begin //rep previous length
Assert(last_len <> 16, 'dynamic block header error');
extra_bits := bs.Read(2);
k := i;
i += extra_bits + 3;
for k := k to i - 1 do
code_lengths[k] := last_len;
end;
17: begin //rep zero length
extra_bits := bs.Read(3);
i += extra_bits + 3;
end;
18: begin //rep zero length
extra_bits := bs.Read(7);
i += extra_bits + 11;
end
end;
end;
literal_dectable := InitDecodeLut(pbyte(@code_lengths), hlit);
huff_FillCanonDecodingTable(literal_dectable.canon_table, pbyte(@code_lengths), hlit);
distance_dectable := InitDecodeLut(pbyte(@code_lengths) + hlit, hdist);
huff_FillCanonDecodingTable(distance_dectable.canon_table, pbyte(@code_lengths) + hlit, hdist);
_vlc.SetTables(bs, literal_dectable, distance_dectable);
end;
procedure TBlockReader.InitFixedCodes(const bs: TBitstreamReader);
var
literal_dectable, //literal/length decoding table
distance_dectable: TDecodeLookupTables; //distance decoding table
code_lengths: array[0..287] of byte;
i: integer;
begin
for i := 256 to 279 do code_lengths[i] := 7;
for i := 0 to 143 do code_lengths[i] := 8;
for i := 280 to 287 do code_lengths[i] := 8;
for i := 144 to 255 do code_lengths[i] := 9;
literal_dectable := InitDecodeLut(pbyte(@code_lengths), 288);
huff_FillCanonDecodingTable(literal_dectable.canon_table, pbyte(@code_lengths), 288);
for i := 0 to 31 do code_lengths[i] := 5;
distance_dectable := InitDecodeLut(pbyte(@code_lengths), 32);
huff_FillCanonDecodingTable(distance_dectable.canon_table, pbyte(@code_lengths), 32);
_vlc.SetTables(bs, literal_dectable, distance_dectable);
end;
constructor TBlockReader.Create;
begin
_vlc := TVlcReader.Create;
end;
destructor TBlockReader.Destroy;
begin
_vlc.Free;
inherited;
end;
{ ReadBlockHeader
Reads block header including length trees for codes
}
function TBlockReader.ReadBlockHeader(const bs: TBitstreamReader): TBlockContext;
var
block: TBlockContext;
t: integer;
begin
block.last := bs.Read() = 1;
block.btype := TBlockTypeEnum( bs.Read(2) );
_block_type := block.btype;
case block.btype of
BTRaw: begin
while not bs.IsByteAligned() do
bs.Read();
block.size := bs.Read(16);
t := not integer(bs.Read(16));
Assert(block.size = t, 'blk size mismatch');
end;
BTDynamic: begin
ReadHeaderCodes(bs);
end;
BTFixed: begin
InitFixedCodes(bs);
end;
end;
Result := block;
end;
function TBlockReader.GetVlcReader(): TVlcReader;
begin
result := _vlc;
end;
end.