unit dc2core; interface uses sysutils, math; type TBitstreamBufferState = record current_bits: longword; mask: longword; end; { TBitstreamWriter } TBitstreamWriter = class private buffer: plongword; cur: plongword; mask: longword; closed: boolean; public constructor Create(const memory_buffer: pbyte); destructor Destroy; override; procedure Close; function IsByteAligned: boolean; procedure ByteAlign; function GetBitSize: longword; function GetByteSize: longword; function GetUnbufferedByteSize: longword; function GetDataStart: pbyte; procedure Write(const bit: integer); procedure Write(const bits, length: longword); //write multiple bits, lsb first function GetState: TBitstreamBufferState; procedure SetState(const state: TBitstreamBufferState); procedure ResetBufferPosition; end; { TBitstreamReader } TBitstreamReader = class private buffer: plongword; cur: plongword; used: longword; public constructor Create(const memory_buffer: pbyte); function GetPosition(): longword; function GetBitCount(): longword; function GetUncachedPosition(): longword; function IsByteAligned(): boolean; function Read(): longword; {$ifdef bs_inline} inline; {$endif} function Read(count: longword): longword; function Show(const count: longword): longword; procedure Skip(const count: longword); function ReadInverse(bit_count: longword): longword; function GetState: TBitstreamBufferState; procedure SetState(const state: TBitstreamBufferState); procedure ResetBufferPosition; function GetInternalState: TBitstreamBufferState; procedure SetInternalState(const state: TBitstreamBufferState); end; function SwapBits (const bits, bit_count: longword): longword; const MAX_BLOCK_SIZE = 32 * 1024; type TBlockTypeEnum = (BTRaw := 0, BTFixed, BTDynamic, BTError); //statistiky pre enkodovanie stats_t = record offsets_sum, offsets_used, //sucet/pocet offsetov matches_sum, elements_encoded: int64; // -||- zhod blocks: array[TBlockTypeEnum] of longword; end; type { TSlidingBuffer } TSlidingBuffer = object private _buffer: pbyte; _previous_bytes_count: integer; public constructor Init(); destructor Done; function GetWindow: pbyte; inline; procedure InsertData(const data: pbyte; const size: integer); end; const MAX_COMPRESSION_LEVEL = 7; DICT_SIZE = MAX_BLOCK_SIZE; MAX_DEFLATE_MATCH_LENGTH = 258; type TSearchResult = record distance, length: word; end; { TMatchSearcher } TMatchSearcher = class private _max_search_depth: integer; //limit how many positions we want to check _max_search_match_length: integer; //limit how long match needs to be to satisfy search conditions _links: pinteger; //linked list of hash occurences _last_seen_idx: pinteger; //last known position of a hash in the stream _bytes_processed: integer; _current_chunk_size: integer; _sbuffer: TSlidingBuffer; //sliding buffer for search window data function Search(const window_end_ptr, str: pbyte; const current_idx, max_match_length: integer ): TSearchResult; public constructor Create; destructor Destroy; override; procedure SetCompressionLevel(const level: integer); { New chunk of data that to be processed. } procedure NewData (const data: pbyte; const size: integer); { Find previous occurence of bytes in str. str - searched data pointer data_index - searched data index relative to current chunk } function FindMatch(const str: pbyte; const data_index: integer): TSearchResult; end; const //konstanty pre strom END_OF_STREAM = 285; TOP_NODE = END_OF_STREAM * 2 + 1; type //huff tree definitions tree_node_t = record weight: longword; child_0: word; child_1: word; end; tree_node_p = ^tree_node_t; vlc_code_t = record bits: word; code_len: byte; end; vlc_code_p = ^vlc_code_t; huff_tree_t = record counts: plongword; nodes: tree_node_p; codes: vlc_code_p; root_node: longword; end; //fixed huffcodes are actually constructed using codes <0..287>, but last 2 are never used TDecodeTable = record codes_of_legth: array[0..15] of word; //number of codes for given length code_value: array[0..END_OF_STREAM + 2] of word; //map code to literal/length value end; PDecodeTable = ^TDecodeTable; procedure huff_FillCanonDecodingTable(var tab: TDecodeTable; const code_lengths: pbyte; const count: integer); procedure huff_code2canon(const codes: vlc_code_p); procedure huff_init (out h: huff_tree_t); procedure huff_free (h: huff_tree_t); procedure huff_raise_count (var h: huff_tree_t; const val: word); inline; procedure huff_build_tree (var h: huff_tree_t; max_cnt: word = 255); procedure huff_build_distance_tree (var h: huff_tree_t; max_cnt: word = 255); const END_OF_BLOCK = 1000; //must not collide with any valid Deflate values END_OF_BLOCK_CODE = 256; type { TVlcWriter } TVlcWriter = object private bs: TBitstreamWriter; len_tree, dist_tree: vlc_code_p; public procedure SetTrees(const bitstream: TBitstreamWriter; const length_tree, distance_tree: vlc_code_p); procedure WriteMatch (const len, dist: longword); procedure WriteLiteral (const c: byte); procedure WriteBlockEnd (); end; TSymbolBits = record symbol: word; nbits: byte; end; const TAB0_BITS = 9; //LUT bits, must be less or equal to maximum bit length of huff codes allowed by Deflate type TDecodeLookupTables = record codes_t0: array[0..511] of TSymbolBits; //9 bits = 512 values canon_table: TDecodeTable; end; { TVlcReader } TVlcReader = class private bs: TBitstreamReader; literal_dectable, distance_dectable: TDecodeLookupTables; public procedure SetTables(const bitreader: TBitstreamReader; const literal_table, distance_table: TDecodeLookupTables); procedure ReadCodePair (out length, distance: word); end; function Length2code (const len: longword): longword; function Distance2code(const dist: longword): longword; function vlc_ReadCode(const bs: TBitstreamReader; const table: TDecodeTable): integer; function vlc_ReadCode(const bs: TBitstreamReader; const dectable: TDecodeLookupTables): integer; function InitDecodeLut(const code_lengths: pbyte; const count: integer): TDecodeLookupTables; const MIN_MATCH_LENGTH = 3; type TLiteralMatch = record match_length: word; //match length offset: word; //match offset literal: byte; //byte from input stream end; PLiteralMatch = ^TLiteralMatch; { TBlockWriter } TBlockWriter = class private bitWriter: TBitstreamWriter; //bitstream writer literal_match_stats: pinteger; distance_stats: pinteger; literal_codes: vlc_code_p; distance_codes: vlc_code_p; _block_type: TBlockTypeEnum; _last: boolean; //last block in stream bs_cache: TBitstreamBufferState; //state of the buffer at beginning of the block procedure BeginBlock; procedure BuildHuffCodes; procedure BuildFixedHuffCodes; procedure WriteCodingTrees; procedure WriteBlockEncoded(const search_results: PLiteralMatch; const size: integer); procedure WriteBlockRaw(const rawdata: pbyte; const rawsize: integer); public constructor Create(const output_buffer: pbyte); destructor Destroy; override; procedure InitNewBlock(const block_type: TBlockTypeEnum); procedure SetLast; procedure UpdateStatsMatch(const len, dist: longword); inline; procedure UpdateStatsLiteral(const literal: byte); inline; procedure WriteBlock(const rawdata: pbyte; const rawsize: integer; const search_results: PLiteralMatch; const size: integer; const keep_buffer: boolean = false); procedure Done; function GetStreamSize: integer; end; TBlockContext = record btype: TBlockTypeEnum; size: integer; unfinished: boolean; last: boolean; //last block flag end; { TBlockReader } TBlockReader = class private _block_type: TBlockTypeEnum; _vlc: TVlcReader; procedure ReadHeaderCodes(const bs: TBitstreamReader); procedure InitFixedCodes(const bs: TBitstreamReader); public constructor Create; destructor Destroy; override; function ReadBlockHeader(const bs: TBitstreamReader): TBlockContext; function GetVlcReader: TVlcReader; inline; end; (******************************************************************************* *******************************************************************************) implementation { SwapBits Swap bit ordering in source pattern. Swaps up to 16 bits. } function SwapBits (const bits, bit_count: longword): longword; var x: longword; begin x := bits; x := ((x and $aaaaaaaa) >> 1) or ((x and $55555555) << 1); x := ((x and $cccccccc) >> 2) or ((x and $33333333) << 2); x := ((x and $f0f0f0f0) >> 4) or ((x and $0f0f0f0f) << 4); x := ((x and $ff00ff00) >> 8) or ((x and $00ff00ff) << 8); result := x >> (16 - bit_count); end; { TBitstreamWriter } constructor TBitstreamWriter.Create(const memory_buffer: pbyte); begin buffer := plongword (memory_buffer); cur := buffer; cur^ := 0; mask := 0; end; destructor TBitstreamWriter.Destroy; begin if not closed then Close; inherited Destroy; end; function TBitstreamWriter.GetBitSize: longword; begin result := 32 * (cur - buffer) + mask; end; function TBitstreamWriter.GetByteSize: longword; begin result := (cur - buffer) * 4; result += (mask + 7) div 8; //+ buffer end; function TBitstreamWriter.GetUnbufferedByteSize: longword; begin result := (cur - buffer) * 4; end; function TBitstreamWriter.GetDataStart: pbyte; begin result := pbyte(buffer); end; procedure TBitstreamWriter.Close; begin end; function TBitstreamWriter.IsByteAligned: boolean; begin result := mask mod 8 = 0; end; procedure TBitstreamWriter.ByteAlign; begin while not IsByteAligned do Write(0); end; procedure TBitstreamWriter.Write(const bit: integer); begin cur^ := cur^ or longword((bit and 1) shl mask); mask += 1; if mask = 32 then begin cur += 1; cur^ := 0; mask := 0; end; end; procedure TBitstreamWriter.Write(const bits, length: longword); var bits_: longword; begin Assert(length <= 32, 'bit_count over 32'); //clear unused bits bits_ := bits and ($ffffffff shr (32 - length)); cur^ := cur^ or (bits_ shl mask); mask += length; if mask >= 32 then begin mask -= 32; //number of bits that didn't fit into buffer cur += 1; cur^ := 0; if mask > 0 then cur^ := bits_ shr (length - mask); end; end; function TBitstreamWriter.GetState: TBitstreamBufferState; begin Result.mask := mask; Result.current_bits := cur^; end; procedure TBitstreamWriter.SetState(const state: TBitstreamBufferState); begin mask := state.mask; cur^ := state.current_bits; end; procedure TBitstreamWriter.ResetBufferPosition; var cache: TBitstreamBufferState; begin cache := GetState; cur := buffer; SetState(cache); end; { TBitstreamReader } constructor TBitstreamReader.Create(const memory_buffer: pbyte); begin buffer := plongword (memory_buffer); cur := buffer; used := 0; end; function TBitstreamReader.GetPosition: longword; begin result := (cur - buffer) << 2; //used dword count result += (used + 7) shr 3; //+ buffer end; function TBitstreamReader.GetBitCount: longword; begin result := 32 * longword(cur - buffer) + used; end; function TBitstreamReader.GetUncachedPosition: longword; begin result := (cur - buffer) * 4; //used dword count end; function TBitstreamReader.IsByteAligned: boolean; begin result := true; if used mod 8 > 0 then result := false; end; function TBitstreamReader.Read: longword; begin result := (cur^ shr used) and 1; used += 1; if used = 32 then begin cur += 1; used := 0; end; end; function TBitstreamReader.Read(count: longword): longword; var bits_left: integer; begin result := cur^ shr used; if count < (32 - used) then begin result := result and ($ffffffff shr (32 - count)); used += count; end else begin bits_left := count - (32 - used); cur += 1; if bits_left > 0 then result := result or (cur^ and ($ffffffff shr (32 - bits_left))) shl (32 - used); used := bits_left; end; end; function TBitstreamReader.Show(const count: longword): longword; var bits_left: integer; begin result := cur^ shr used; if count < (32 - used) then begin result := result and ($ffffffff shr (32 - count)); end else begin bits_left := count - (32 - used); if bits_left > 0 then result := result or ((cur + 1)^ and ($ffffffff shr (32 - bits_left))) shl (32 - used); end; end; procedure TBitstreamReader.Skip(const count: longword); begin if count < (32 - used) then begin used += count; end else begin cur += 1; used := count - (32 - used); end; end; function TBitstreamReader.ReadInverse(bit_count: longword): longword; var i: integer; begin result := 0; for i := bit_count - 1 downto 0 do result := result or Read() shl i; end; function TBitstreamReader.GetState: TBitstreamBufferState; begin result.current_bits := cur^; end; procedure TBitstreamReader.SetState(const state: TBitstreamBufferState); begin cur^ := state.current_bits; end; procedure TBitstreamReader.ResetBufferPosition; begin cur := buffer; end; function TBitstreamReader.GetInternalState: TBitstreamBufferState; begin result.current_bits := cur - buffer; result.mask := used; end; procedure TBitstreamReader.SetInternalState(const state: TBitstreamBufferState); begin cur := buffer + state.current_bits; used := state.mask; end; { TSlidingBuffer } constructor TSlidingBuffer.Init(); begin _buffer := getmem(2 * MAX_BLOCK_SIZE); _buffer += MAX_BLOCK_SIZE; _previous_bytes_count := 0; end; destructor TSlidingBuffer.Done; begin freemem(_buffer - MAX_BLOCK_SIZE); end; function TSlidingBuffer.GetWindow: pbyte; begin result := _buffer; end; procedure TSlidingBuffer.InsertData(const data: pbyte; const size: integer); begin Assert(size <= MAX_BLOCK_SIZE, 'cannot insert more data than allocated range'); if _previous_bytes_count > 0 then move((_buffer + _previous_bytes_count - MAX_BLOCK_SIZE)^, (_buffer - MAX_BLOCK_SIZE)^, MAX_BLOCK_SIZE); move(data^, _buffer^, size); _previous_bytes_count := size; end; const SEARCH_DEPTH: array[0..MAX_COMPRESSION_LEVEL] of Integer = (0, 1, 8, 16, 32, 48, 64, 32*1024); SEARCH_MATCH_DIVIDER: array[0..MAX_COMPRESSION_LEVEL] of Integer = (1, 4, 4, 4, 4, 4, 2, 1); HASH_BITS = 18; { Generate 16bit hash from first 3 bytes of a given pointer } function hash3(const x: pbyte): integer; inline; begin result := ((x+2)^ shl 10) xor ((x+1)^ shl 5) xor x^; end; { TMatchSearcher } constructor TMatchSearcher.Create; begin _sbuffer.Init(); _max_search_depth := SEARCH_DEPTH[0]; _max_search_match_length := MAX_DEFLATE_MATCH_LENGTH div SEARCH_MATCH_DIVIDER[0]; _links := getmem(2 * DICT_SIZE * sizeof(integer)); _last_seen_idx := getmem(1 shl HASH_BITS * sizeof(integer)); //must be equal to hash bits Filldword(_last_seen_idx^, 1 shl HASH_BITS, $ffffffff ); //negative indices don't get searched, so use -1 _current_chunk_size := 0; _bytes_processed := 0; end; destructor TMatchSearcher.Destroy; begin freemem(_links); freemem(_last_seen_idx); _sbuffer.Done; inherited; end; procedure TMatchSearcher.SetCompressionLevel(const level: integer); begin Assert(level <= MAX_COMPRESSION_LEVEL, 'invalid compression level'); _max_search_depth := SEARCH_DEPTH[level]; _max_search_match_length := MAX_DEFLATE_MATCH_LENGTH div SEARCH_MATCH_DIVIDER[level]; end; { Take next data chunk and create links between the occurences of the same hash } procedure TMatchSearcher.NewData(const data: pbyte; const size: integer); var i, key, last_seen: integer; begin _sbuffer.InsertData(data, size); _bytes_processed += _current_chunk_size; _current_chunk_size := size; move((_links + DICT_SIZE)^, _links^, DICT_SIZE * sizeof(integer)); for i := 0 to size - 1 do begin key := hash3(data + i); last_seen := _last_seen_idx[key]; _links[DICT_SIZE + i] := last_seen; _last_seen_idx[key] := i + _bytes_processed; end; end; { Compare strings, return length of the match. Loop at the last byte of the window. } function compare_strings_loop(const window, string_data: pbyte; const max_match_length, window_size: integer): integer; var i, k: integer; begin result := 0; i := 0; for k := 0 to max_match_length - 1 do begin if window[i] = string_data[k] then result += 1 else exit; i += 1; if i = window_size then i := 0; end; end; { Compare strings, return length of the match. There must be at least max_match_length valid bytes in window. } function compare_strings(const window, string_data: pbyte; const max_match_length: integer): integer; var i: integer; begin result := 0; for i := 0 to max_match_length - 1 do if window[i] = string_data[i] then result += 1 else exit; end; { Compare last byte of the window against current string. } function compare_strings_rle(const string_data: pbyte; const byte_value, max_match_length: integer): integer; var i: integer; begin result := 0; for i := 0 to max_match_length - 1 do if byte_value = string_data[i] then result += 1 else exit; end; function InitSearchResult(const distance, best_match: longword): TSearchResult; inline; begin longword(result) := longword( best_match << 16 or distance ); end; function TMatchSearcher.Search(const window_end_ptr, str: pbyte; const current_idx, max_match_length: integer): TSearchResult; var i: integer; links: pinteger; best_match_distance: integer; best_match_length: integer; last_seen_idx: integer; min_allowed_idx: integer; previous_idx: integer; length: integer; distance: integer; begin Assert(max_match_length >= 3); //test if searched string is a repetition of the last byte before full search best_match_length := compare_strings_rle(str, window_end_ptr[-1], max_match_length); result := InitSearchResult(1, best_match_length); if best_match_length >= _max_search_match_length then exit; last_seen_idx := current_idx - _bytes_processed; links := _links + DICT_SIZE; best_match_distance := 1; min_allowed_idx := max(0, current_idx - DICT_SIZE); //early termination if links of the next searched position are much closer than current ones if links[last_seen_idx] < links[last_seen_idx + 1] - (DICT_SIZE shr 1) then exit; for i := _max_search_depth - 1 downto 0 do begin //if the position falls out of the sliding window_end_ptr range, it's too old and cannot be searched previous_idx := links[last_seen_idx]; if previous_idx < min_allowed_idx then begin break; end; last_seen_idx := previous_idx - _bytes_processed; //compare data at given positions distance := current_idx - previous_idx; if previous_idx + max_match_length < current_idx then length := compare_strings(window_end_ptr - distance, str, max_match_length) else length := compare_strings_loop(window_end_ptr - distance, str, max_match_length, distance); if length > best_match_length then begin best_match_length := length; best_match_distance := distance; if length >= _max_search_match_length then break; end; end; Assert(best_match_distance >= 0); result := InitSearchResult(best_match_distance, best_match_length); end; { Find best match between current bytes and bytes already seen. If distance = 0 & length = 0 - no occurences were found } function TMatchSearcher.FindMatch(const str: pbyte; const data_index: integer): TSearchResult; var max_match_length: integer; current_idx: integer; window_end_ptr: pbyte; begin result := InitSearchResult(0, 0); //reduce maximum possible match length at the end of the stream //we need at least 3 bytes to be able to run search (hash function takes 3 bytes as input) max_match_length := min(MAX_DEFLATE_MATCH_LENGTH, _current_chunk_size - data_index); if max_match_length <= 2 then exit; //beginning of a stream, nothing to search if _bytes_processed + data_index = 0 then exit; //get proper search window and currently searched string's file index window_end_ptr := _sbuffer.GetWindow + data_index; current_idx := _bytes_processed + data_index; result := Search(window_end_ptr, str, current_idx, max_match_length); end; { huff_FillCanonDecodingTable code_lengths - array of lengths, indexed by code count - number of codes to fill } procedure huff_FillCanonDecodingTable(var tab: TDecodeTable; const code_lengths: pbyte; const count: integer); var len: integer; //current length; all deflate code lengths are between 1 and 15 same_length_count: integer; i, j: integer; begin j := 0; tab.codes_of_legth[0] := 0; for len := 1 to 15 do begin same_length_count := 0; for i := 0 to count - 1 do begin if code_lengths[i] = len then begin tab.code_value[j] := i; j += 1; same_length_count += 1; end; end; tab.codes_of_legth[len] := same_length_count; end; end; (* huff_code2canon zmen huffkody na kanonicke huffkody *) procedure huff_code2canon(const codes: vlc_code_p); var len: integer; b, i: integer; begin b := 0; for len := 1 to 15 do begin for i := 0 to END_OF_STREAM do begin if codes[i].code_len = len then begin codes[i].bits := b; b += 1; end; end; b := b shl 1; end; end; (******************************************************************************* generovanie stromu *) //huff_init procedure huff_init (out h: huff_tree_t); var i: integer; begin i := sizeof(longword) * (END_OF_STREAM + 1); h.counts := getmem(i); FillByte(h.counts^, i, 0); i := sizeof(tree_node_t) * (END_OF_STREAM + 1) * 2; h.nodes := getmem(i); FillByte(h.nodes^, i, 0); i := sizeof(vlc_code_t) * (END_OF_STREAM + 1); h.codes := getmem(i); FillByte(h.codes^, i, 0); h.root_node := 0; end; //huff_free procedure huff_free (h: huff_tree_t); begin freemem( h.counts ); freemem( h.nodes ); freemem( h.codes ); end; procedure huff_raise_count(var h: huff_tree_t; const val: word); begin h.counts[val] += 1; end; (******************************************************************************* scale_counts *) procedure scale_counts(var params: huff_tree_t; const max_cnt: word); var counts: plongword; b: integer; max: longword; new: longword; ratio: single; begin counts := params.counts; max := 0; for b := 0 to END_OF_STREAM do if counts[b] > max then max := counts[b]; if max <= max_cnt then exit; ratio := single( max ) / max_cnt; for b := 0 to END_OF_STREAM do if counts[b] > 0 then begin new := round( counts[b] / ratio ); if new = 0 then counts[b] := 1 else counts[b] := new; end; end; (******************************************************************************* build_tree zostavenie huffmanovho stromu, nastavenie indexu korenoveho uzla stromu *) procedure build_tree (var params: huff_tree_t); var nodes: tree_node_p; counts: plongword; next_free: integer; i: integer; min_1, min_2: integer; begin nodes := params.nodes; counts := params.counts; for i := 0 to END_OF_STREAM do nodes[i].weight := counts[i]; nodes[TOP_NODE].weight := High(word); next_free := END_OF_STREAM; while true do begin next_free := next_free + 1; min_1 := TOP_NODE; min_2 := TOP_NODE; for i := 0 to next_free - 1 do if nodes[i].weight > 0 then begin if nodes[i].weight < nodes[min_1].weight then begin min_2 := min_1 ; min_1 := i ; end else if nodes[i].weight < nodes[min_2].weight then min_2 := i; end; if min_2 = TOP_NODE then break; nodes[next_free].weight := nodes[min_1].weight + nodes[min_2].weight; nodes[next_free].child_0 := min_1; nodes[next_free].child_1 := min_2; nodes[min_1].weight := 0; nodes[min_2].weight := 0; end; params.root_node := next_free - 1; end; (******************************************************************************* tree_to_code z korenoveho uzlu rekurzivne postupuj cez jednotlive listy a zapis cestu ako VLC kod + pocet jeho bitov do tabulky *) procedure tree_to_code (var p: huff_tree_t; code_len_current, node: integer); begin if node <= END_OF_STREAM then p.codes[node].code_len := code_len_current else begin code_len_current := code_len_current + 1; tree_to_code (p, code_len_current, p.nodes[node].child_0 ); tree_to_code (p, code_len_current, p.nodes[node].child_1 ); end; end; (******************************************************************************* huff_build_tree zostav huffmanov strom, vrat index korenoveho uzla stromu *) procedure huff_build_tree (var h: huff_tree_t; max_cnt: word = 255); begin scale_counts (h, max_cnt); build_tree (h); tree_to_code (h, 0, h.root_node); huff_code2canon (h.codes); end; //special case, when there is only one symbol in alphabet - can happen with distance trees procedure huff_build_distance_tree (var h: huff_tree_t; max_cnt: word = 255); const MAX_DIST_CODES = 32; var i: integer; last_nonzero_idx: integer; used_symbols_count: integer; begin used_symbols_count := 0; last_nonzero_idx := 0; for i := 0 to MAX_DIST_CODES - 1 do if h.counts[i] > 0 then begin used_symbols_count += 1; if used_symbols_count > 1 then break; last_nonzero_idx := i; end; if used_symbols_count > 1 then begin huff_build_tree(h, max_cnt); end else begin h.codes[last_nonzero_idx].code_len := 1; h.codes[last_nonzero_idx].bits := 0; h.root_node := last_nonzero_idx; end end; { Length2code Map match length value to length code for huff encoding. } function Length2code (const len: longword): longword; const table: array[byte] of byte = ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29 ); begin Assert(len >= 3); result := 256 + table[len-3]; //0..255 = literals, 256 = block end end; { Code2length Map decoded length code to length value. } function Code2length(const code: longword): longword; inline; const table: array[0..28] of byte = ( 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 0 ); begin result := table[ code - 257 ]; if result = 0 then result := 258; end; { Distance2code Map distance value to distance code for huff encoding. } function Distance2code(const dist: longword): longword; const table_512: array [0..511] of byte = ( 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 ); table_128: array[2..127] of byte = ( 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 ); begin if dist <= 512 then result := table_512[dist - 1] else begin result := table_128[(dist - 1) shr 8]; end; end; { Code2distance Map decoded distance code to distance value. } function Code2distance(const code: longword): longword; inline; const table: array[0..29] of word = ( 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 ); begin result := table[ code ]; end; { TVlcWriter } procedure TVlcWriter.SetTrees(const bitstream: TBitstreamWriter; const length_tree, distance_tree: vlc_code_p); begin bs := bitstream; len_tree := length_tree; dist_tree := distance_tree; end; procedure TVlcWriter.WriteMatch(const len, dist: longword); var code, bits: longword; begin //length code := Length2code(len); bs.Write(len_tree[code].bits, len_tree[code].code_len); if (code >= 265) and (code < 285) then begin //extra bits bits := 5 - (284 - code) div 4; bs.Write(len - 3, bits); end; //offset / distance code := Distance2code(dist); bs.Write(dist_tree[code].bits, dist_tree[code].code_len); if code >= 4 then begin bits := code div 2 - 1; bs.Write(dist - 1, bits); end; end; procedure TVlcWriter.WriteLiteral(const c: byte); begin bs.Write(len_tree[c].bits, len_tree[c].code_len); end; procedure TVlcWriter.WriteBlockEnd; begin bs.Write(len_tree[END_OF_BLOCK_CODE].bits, len_tree[END_OF_BLOCK_CODE].code_len); end; { vlc_ReadCode Read one canonical huffman code using the given decoding table. Maximum symbol length cannot exceed 15 bits (maximum allowed by Deflate), otherwise reading fails and bad things happen. } function vlc_ReadCode(const bs: TBitstreamReader; const table: TDecodeTable): integer; var i, codes, diff, value: longword; value_low: longword; //lowest value for code of given length codes_skipped: longword; //how many codes we already skipped number_of_codes: pword; //# codes of given length begin i := 0; value := 0; codes_skipped := 0; value_low := 0; codes := 0; number_of_codes := @table.codes_of_legth[0]; repeat codes_skipped += codes; value_low += codes; value_low := value_low shl 1; i += 1; Assert(i < 16, 'could not read vlc code'); codes := number_of_codes[i]; value := (value shl 1) or bs.read(); diff := value - value_low; until codes > diff; result := table.code_value[ codes_skipped + diff ]; end; { vlc_ReadCode Read one variable-length code using the given lookup table. If the code couldn't be read, try to read with the canon huff decoding table. } function vlc_ReadCode(const bs: TBitstreamReader; const dectable: TDecodeLookupTables): integer; var bits: integer; sb: TSymbolBits; begin bits := bs.Show(TAB0_BITS); sb := dectable.codes_t0[bits]; result := sb.symbol; if (sb.nbits = 0) then begin result := vlc_ReadCode(bs, dectable.canon_table); end else bs.Skip(sb.nbits); end; { InitDecodeLut Assign canonical huff code bits to each code by its length and build a look-up table for fast decoding. Uses separate code bits runs for each code length. Makes 2 passes over input data, one pass could be removed if code length stats were provided beforehand, but it doesn't gain much. } function InitDecodeLut(const code_lengths: pbyte; const count: integer): TDecodeLookupTables; var i, len, code_bits: integer; value, k, b: integer; sb: TSymbolBits; num_lengths: array[0..15] of integer; //# of codes of given length length_bits: array[0..15] of integer; //canonical bits for codes of given length begin FillByte(num_lengths, sizeof(num_lengths), 0); FillByte(length_bits, sizeof(length_bits), 0); for i := 0 to count - 1 do begin num_lengths[code_lengths[i]] += 1; end; b := 0; for i := 1 to 15 do begin length_bits[i] := b; b += num_lengths[i]; b := b << 1; end; FillByte(result.codes_t0, sizeof(result.codes_t0), 0); for i := 0 to count - 1 do begin len := code_lengths[i]; if not (len in [1..TAB0_BITS]) then continue; code_bits := length_bits[len]; length_bits[len] += 1; sb.symbol := i; sb.nbits := len; //insert each code length + junk code_bits combination code_bits := SwapBits(code_bits, len); for k := 0 to 1 << (TAB0_BITS - len) - 1 do begin value := (k << len) or code_bits; result.codes_t0[value] := sb; end; end; end; { TVlcReader } procedure TVlcReader.SetTables(const bitreader: TBitstreamReader; const literal_table, distance_table: TDecodeLookupTables); begin bs := bitreader; literal_dectable := literal_table; distance_dectable := distance_table; end; procedure TVlcReader.ReadCodePair(out length, distance: word); const LITERAL_EXTRA_BITS: array[257..285] of byte = ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 ); var code, extra_bits: longword; begin length := 1; code := vlc_ReadCode(bs, literal_dectable); //decode literals, length / distance, end of block if code < 256 then begin distance := code; end else if code > 256 then begin length := Code2length(code); extra_bits := LITERAL_EXTRA_BITS[code]; if extra_bits > 0 then begin length += bs.Read(extra_bits); end; code := vlc_ReadCode(bs, distance_dectable); distance := Code2distance(code); if code >= 4 then begin distance += bs.Read(code >> 1 - 1); end; end else begin length := END_OF_BLOCK; distance := 0; end; end; const //code ordering for header code length alphabet //see RFC1951 section 3.2.7. Compression with dynamic Huffman codes (BTYPE=10) HeaderCodeLengthOrder: array[0..18] of byte = ( 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 ); LITERAL_MATCH_ELEMENTS = END_OF_STREAM + 1; DISTANCE_ELEMENTS = 30; type TRleResult = record size: integer; nonzero: integer; rl_pairs: array[0..LITERAL_MATCH_ELEMENTS-1] of record code_len, repeats: byte; end; code_lengths: array[0..LITERAL_MATCH_ELEMENTS-1] of byte; end; { Build length-limited huffman code tree. Length is limited by reducing the code occurence's statistics. Less accuracy means that the differences between code lengths are reduced, too. This is somewhat suboptimal. Distance trees are special, because there are cases where they contain only one used symbol. } procedure build_limited_tree(var tree: huff_tree_t; limit, size: word; const for_distance: boolean = false); var i: integer; tree_ok: boolean; freq_limit: integer; begin freq_limit := 256*2; repeat if for_distance then huff_build_distance_tree(tree, freq_limit) else huff_build_tree(tree, freq_limit); tree_ok := true; for i := 0 to size - 1 do if tree.codes[i].code_len > limit then begin tree_ok := false; freq_limit := freq_limit shr 1; break; end; until tree_ok; //reverse bits for faster bitwriting for i := 0 to size - 1 do begin tree.codes[i].bits := SwapBits(tree.codes[i].bits, tree.codes[i].code_len); end; end; { WriteCodeLengths Write lengths of tree's codes using, encoded by header_codes } procedure WriteCodeLengths (const bs: TBitstreamWriter; const header_codes, tree_codes: vlc_code_p; const size: integer); var i, k: integer; bits, length: longword; begin for i := 0 to size - 1 do begin k := tree_codes[i].code_len; bits := header_codes[k].bits; length := header_codes[k].code_len; bs.Write(bits, length); end; end; procedure WriteCodeLengthsRle (const bs: TBitstreamWriter; const header_codes: vlc_code_p; const rle_res: TRleResult); var i, k: integer; bits, length: longword; size: integer; begin size := rle_res.size; i := 0; while i < size do begin k := rle_res.rl_pairs[i].code_len; bits := header_codes[k].bits; length := header_codes[k].code_len; bs.Write(bits, length); if k = 16 then begin //Copy the previous code length, the next 2 bits indicate repeat length bs.Write(rle_res.rl_pairs[i].repeats, 2); end else if k = 17 then begin //Repeat a code length of 0, 3 bits of length bs.Write(rle_res.rl_pairs[i].repeats, 3); end else if k = 18 then begin //Repeat a code length of 0, 7 bits of length bs.Write(rle_res.rl_pairs[i].repeats, 7); end; i += 1; end; end; { GetHclen Reduce the number of header tree code lengths that need to be stored. Saves a couple of bits per block. } function GetHclen(const min_length, max_length: integer): integer; var i, length: integer; begin result := 19; for i := 18 downto 5 do begin length := HeaderCodeLengthOrder[i]; if (length >= min_length) and (length <= max_length) then break else result -= 1; end; result -= 4; end; function RleCodeBuffer(const src: array of byte; const size: integer): TRleResult; var i, k, m: integer; value, next_value: byte; max_lookahead: integer; run_length: integer; nonzero: integer; procedure AddRl(const code_len, repeats: byte); begin result.rl_pairs[m].code_len := code_len; result.rl_pairs[m].repeats := repeats; m += 1; end; begin nonzero := 0; i := 0; m := 0; while i < size do begin value := src[i]; //zero runs if value = 0 then begin max_lookahead := 137; if size - i < max_lookahead then max_lookahead := size - i; run_length := 1; for k := i + 1 to i + max_lookahead - 1 do begin next_value := src[k]; if value = next_value then run_length += 1 else break; end; if run_length < 3 then begin AddRl(0, 0); i += 1; end else begin i += run_length; if run_length < 11 then AddRl(17, run_length - 3) //Repeat a code length of 0 for 3 - 10 times. else AddRl(18, run_length - 11); //Repeat a code length of 0 for 11 - 138 times. end; end //nonzero runs else begin max_lookahead := 6; if size - i <= max_lookahead then max_lookahead := size - i - 1; run_length := 0; for k := i + 1 to i + max_lookahead do begin next_value := src[k]; if value = next_value then run_length += 1 else break; end; if run_length >= 3 then begin AddRl(value, 0); AddRl(16, run_length - 3); //Copy the previous code length 3 - 6 times. i += run_length + 1; nonzero += run_length + 1; end else begin AddRl(value, 0); i += 1; nonzero += 1; end; end; end; result.size := m; result.nonzero := nonzero; for i := 0 to result.size - 1 do result.code_lengths[i] := result.rl_pairs[i].code_len; end; { TBlockWriter } { BeginBlock Write block header. header bytes: BFINAL - 1 bit BTYPE - 2 bits } procedure TBlockWriter.BeginBlock; begin bitWriter.Write(longword( _last ) and 1); bitWriter.Write(longword( _block_type ), 2); end; { BuildFixedHuffCodes Create vlc trees for blocks compressed using fixed Huffman codes } procedure TBlockWriter.BuildFixedHuffCodes; var i, bits: integer; function vlc(const b, len: integer): vlc_code_t; begin result.bits := SwapBits(b, len); result.code_len := len; bits += 1; end; begin bits := 0; for i := 256 to 279 do literal_codes[i] := vlc(bits, 7); bits := bits << 1; for i := 0 to 143 do literal_codes[i] := vlc(bits, 8); for i := 280 to 287 do literal_codes[i] := vlc(bits, 8); bits := bits << 1; for i := 144 to 255 do literal_codes[i] := vlc(bits, 9); for i := 0 to 29 do distance_codes[i] := vlc(i, 5); end; { BuildHuffCodes Create vlc trees from accumulated statistics for literal/length and distance coding } procedure TBlockWriter.BuildHuffCodes; var i: integer; tree: huff_tree_t; begin //generate literal/match codes huff_init(tree); for i := 0 to LITERAL_MATCH_ELEMENTS - 1 do tree.counts[i] := literal_match_stats[i]; tree.counts[END_OF_BLOCK_CODE] := 1; build_limited_tree(tree, 15, LITERAL_MATCH_ELEMENTS); Move(tree.codes ^, literal_codes ^, LITERAL_MATCH_ELEMENTS * sizeof(vlc_code_t)); huff_free(tree); //generate distance codes huff_init(tree); for i := 0 to DISTANCE_ELEMENTS - 1 do tree.counts[i] := distance_stats[i]; build_limited_tree(tree, 15, DISTANCE_ELEMENTS, true); Move(tree.codes ^, distance_codes ^, DISTANCE_ELEMENTS * sizeof(vlc_code_t)); huff_free(tree); end; { WriteCodingTrees Store literal/length and distance trees. Canonical huff coding is used, so code lengths are enough to store the trees. } procedure TBlockWriter.WriteCodingTrees; var max_used_codelength, min_used_codelength: integer; block_header_tree: huff_tree_t; procedure UpdateCodeLengthStats(const code_lengths: array of byte; const size: integer); var i: integer; length: integer; begin i := 0; while i < size do begin length := code_lengths[i]; huff_raise_count(block_header_tree, length); i += 1; if (length < min_used_codelength) and (length > 0) then min_used_codelength := length; if length > max_used_codelength then max_used_codelength := length; end; end; var header_codes: vlc_code_p; hclen, hlit, hdist: integer; i: integer; codelen_buffer: array[0..LITERAL_MATCH_ELEMENTS - 1] of byte; distance_rle: TRleResult; literal_rle: TRleResult; begin //build header tree for coding the literal/length and distance tree code lengths max_used_codelength := 0; min_used_codelength := 15; huff_init(block_header_tree); for i := 0 to LITERAL_MATCH_ELEMENTS - 1 do codelen_buffer[i] := literal_codes[i].code_len; literal_rle := RleCodeBuffer(codelen_buffer, LITERAL_MATCH_ELEMENTS); UpdateCodeLengthStats(literal_rle.code_lengths, literal_rle.size); for i := 0 to DISTANCE_ELEMENTS - 1 do codelen_buffer[i] := distance_codes[i].code_len; distance_rle := RleCodeBuffer(codelen_buffer, DISTANCE_ELEMENTS); UpdateCodeLengthStats(distance_rle.code_lengths, distance_rle.size); build_limited_tree(block_header_tree, 7, 19); //store the header tree code lengths //when do we want to define a smaller hdist? if we used a smaller encoding window, thus limiting the distances? hlit := 286 - 257; hdist := 29; hclen := GetHclen(min_used_codelength, max_used_codelength); bitWriter.Write(hlit, 5); bitWriter.Write(hdist, 5); bitWriter.Write(hclen, 4); header_codes := block_header_tree.codes; for i := 0 to hclen + 4 - 1 do bitWriter.Write( header_codes[ HeaderCodeLengthOrder[i] ].code_len, 3 ); //store codes of literal/length and distance trees WriteCodeLengthsRle(bitWriter, header_codes, literal_rle); WriteCodeLengthsRle(bitWriter, header_codes, distance_rle); huff_free(block_header_tree); end; { WriteBlockEncoded Write a complete block into bitstream: literals and match length / distance pairs, END_OF_BLOCK symbol. Handles distinction between blocks compressed using fixed or dynamic huff codes } procedure TBlockWriter.WriteBlockEncoded(const search_results: PLiteralMatch; const size: integer); var i: integer; lm: TLiteralMatch; vlc: TVlcWriter; begin if _block_type = BTDynamic then begin BuildHuffCodes; WriteCodingTrees(); end else BuildFixedHuffCodes(); vlc.SetTrees(bitWriter, literal_codes, distance_codes); for i := 0 to size - 1 do begin lm := search_results[i]; if lm.match_length > 0 then vlc.WriteMatch(lm.match_length, lm.offset) else vlc.WriteLiteral(lm.literal); end; vlc.WriteBlockEnd(); end; { WriteBlockRaw Write a raw block into bitstream: copy input values. Raw block header: n bits - byte alignment 16 bits - data length 16 bits - inverted data length } procedure TBlockWriter.WriteBlockRaw(const rawdata: pbyte; const rawsize: integer); var i: integer; begin bitWriter.ByteAlign; bitWriter.Write(rawsize, 16); bitWriter.Write(rawsize xor $ffff, 16); for i := 0 to rawsize - 1 do bitWriter.Write(rawdata[i], 8); //todo: memcpy end; constructor TBlockWriter.Create(const output_buffer: pbyte); begin bitWriter := TBitstreamWriter.Create(output_buffer); bs_cache := bitWriter.GetState; literal_match_stats := GetMem(LITERAL_MATCH_ELEMENTS * sizeof(integer)); distance_stats := Getmem(DISTANCE_ELEMENTS * sizeof(integer)); literal_codes := GetMem(LITERAL_MATCH_ELEMENTS * sizeof(vlc_code_t)); distance_codes := GetMem(DISTANCE_ELEMENTS * sizeof(vlc_code_t)); end; destructor TBlockWriter.Destroy; begin inherited Destroy; bitWriter.Free; Freemem(literal_match_stats); Freemem(distance_stats); Freemem(literal_codes); Freemem(distance_codes); end; procedure TBlockWriter.InitNewBlock(const block_type: TBlockTypeEnum); begin FillDWord(literal_match_stats^, LITERAL_MATCH_ELEMENTS, 0); FillDWord(distance_stats^, DISTANCE_ELEMENTS, 0); bitWriter.SetState(bs_cache); _block_type := block_type; _last := false; end; procedure TBlockWriter.SetLast; begin _last := true; end; procedure TBlockWriter.UpdateStatsMatch(const len, dist: longword); begin literal_match_stats[ Length2code(len) ] += 1; distance_stats[ Distance2code(dist) ] += 1; end; procedure TBlockWriter.UpdateStatsLiteral(const literal: byte); begin literal_match_stats[ literal ] += 1; end; procedure TBlockWriter.WriteBlock(const rawdata: pbyte; const rawsize: integer; const search_results: PLiteralMatch; const size: integer; const keep_buffer: boolean); begin if not keep_buffer then bitWriter.ResetBufferPosition; BeginBlock(); if _block_type <> BTRaw then WriteBlockEncoded(search_results, size) else WriteBlockRaw(rawdata, rawsize); end; procedure TBlockWriter.Done; begin //throw away bs cache bs_cache := bitWriter.GetState; end; { GetStreamSize Returns number of whole bytes that were written into bitstream for current block. The last written bit doesn't have to be at a byte aligned position, so we need to cache the write buffer and mask to put the bits in the next processed block. If the current block is the last one processed, the outstanding bits must be counted into the stream size (they would be lost otherwise). } function TBlockWriter.GetStreamSize: integer; begin if not _last then begin result := bitWriter.GetUnbufferedByteSize; end else begin bitWriter.Close; result := bitWriter.GetByteSize; end; end; { TBlockReader } { ReadHeaderCodes Read code lengths and generate tables for dynamic block decoding } procedure TBlockReader.ReadHeaderCodes(const bs: TBitstreamReader); const MAX_CODE_LENGTHS = 286 + 32; //# of Literal/Length codes + # of Distance codes var literal_dectable, //literal/length decoding table distance_dectable: TDecodeLookupTables; //distance decoding table hlit, hdist, hclen: word; len, last_len: integer; //code length, previous code length i, k, extra_bits: longword; code_lengths: array[0..MAX_CODE_LENGTHS-1] of byte; dt: TDecodeLookupTables; begin hlit := bs.Read(5) + 257; hdist := bs.Read(5) + 1; hclen := bs.Read(4) + 4; //get code_len codes FillByte(code_lengths, 19, 0); for i := 0 to hclen - 1 do begin k := HeaderCodeLengthOrder[i]; code_lengths[k] := bs.Read(3); end; dt := InitDecodeLut(code_lengths, 19); //decode symbols FillByte(code_lengths, MAX_CODE_LENGTHS, 0); i := 0; last_len := 16; while i < hlit + hdist do begin len := vlc_ReadCode(bs, dt); if len < 16 then begin code_lengths[i] := len; i += 1; last_len := len; end else case len of 16: begin //rep previous length Assert(last_len <> 16, 'dynamic block header error'); extra_bits := bs.Read(2); k := i; i += extra_bits + 3; for k := k to i - 1 do code_lengths[k] := last_len; end; 17: begin //rep zero length extra_bits := bs.Read(3); i += extra_bits + 3; end; 18: begin //rep zero length extra_bits := bs.Read(7); i += extra_bits + 11; end end; end; literal_dectable := InitDecodeLut(pbyte(@code_lengths), hlit); huff_FillCanonDecodingTable(literal_dectable.canon_table, pbyte(@code_lengths), hlit); distance_dectable := InitDecodeLut(pbyte(@code_lengths) + hlit, hdist); huff_FillCanonDecodingTable(distance_dectable.canon_table, pbyte(@code_lengths) + hlit, hdist); _vlc.SetTables(bs, literal_dectable, distance_dectable); end; procedure TBlockReader.InitFixedCodes(const bs: TBitstreamReader); var literal_dectable, //literal/length decoding table distance_dectable: TDecodeLookupTables; //distance decoding table code_lengths: array[0..287] of byte; i: integer; begin for i := 256 to 279 do code_lengths[i] := 7; for i := 0 to 143 do code_lengths[i] := 8; for i := 280 to 287 do code_lengths[i] := 8; for i := 144 to 255 do code_lengths[i] := 9; literal_dectable := InitDecodeLut(pbyte(@code_lengths), 288); huff_FillCanonDecodingTable(literal_dectable.canon_table, pbyte(@code_lengths), 288); for i := 0 to 31 do code_lengths[i] := 5; distance_dectable := InitDecodeLut(pbyte(@code_lengths), 32); huff_FillCanonDecodingTable(distance_dectable.canon_table, pbyte(@code_lengths), 32); _vlc.SetTables(bs, literal_dectable, distance_dectable); end; constructor TBlockReader.Create; begin _vlc := TVlcReader.Create; end; destructor TBlockReader.Destroy; begin _vlc.Free; inherited; end; { ReadBlockHeader Reads block header including length trees for codes } function TBlockReader.ReadBlockHeader(const bs: TBitstreamReader): TBlockContext; var block: TBlockContext; t: integer; begin block.last := bs.Read() = 1; block.btype := TBlockTypeEnum( bs.Read(2) ); _block_type := block.btype; case block.btype of BTRaw: begin while not bs.IsByteAligned() do bs.Read(); block.size := bs.Read(16); t := not integer(bs.Read(16)); Assert(block.size = t, 'blk size mismatch'); end; BTDynamic: begin ReadHeaderCodes(bs); end; BTFixed: begin InitFixedCodes(bs); end; end; Result := block; end; function TBlockReader.GetVlcReader(): TVlcReader; begin result := _vlc; end; end.