From 6ea0ea6a1afd3d0ae5f0e656be5d5b078a643dfe Mon Sep 17 00:00:00 2001
From: dpethes <dejvu@email.cz>
Date: Fri, 1 May 2020 07:43:32 +0200
Subject: [PATCH] model viewer: texture export

---
 model_viewer/hob_mesh.pas            |  112 +-
 model_viewer/model_viewer.lpi        |   37 +-
 model_viewer/model_viewer.pas        |   89 +-
 model_viewer/util/crc32fast.pas      |   78 +
 model_viewer/util/dc2_encoder.pas    |  278 ++++
 model_viewer/util/dc2_simple_api.pas |   93 ++
 model_viewer/util/dc2core.pas        | 1957 ++++++++++++++++++++++++++
 model_viewer/util/png_writer.pas     |  192 +++
 model_viewer/util/prediction.pas     |  204 +++
 9 files changed, 2987 insertions(+), 53 deletions(-)
 create mode 100644 model_viewer/util/crc32fast.pas
 create mode 100644 model_viewer/util/dc2_encoder.pas
 create mode 100644 model_viewer/util/dc2_simple_api.pas
 create mode 100644 model_viewer/util/dc2core.pas
 create mode 100644 model_viewer/util/png_writer.pas
 create mode 100644 model_viewer/util/prediction.pas
diff --git a/model_viewer/hob_mesh.pas b/model_viewer/hob_mesh.pas
index c2067c4..d61c889 100644
--- a/model_viewer/hob_mesh.pas
+++ b/model_viewer/hob_mesh.pas
@@ -4,7 +4,7 @@ unit hob_mesh;
 interface
 
 uses
-  Classes, SysUtils, gl, GLext, math, gvector, fpimgui,
+  Classes, SysUtils, gl, GLext, math, gvector, fpimgui, png_writer,
   hob_parser, hmt_parser;
 
 type
@@ -25,6 +25,7 @@ type
       gl_tex_id: integer;
       width, height: integer;
       pixels: pbyte;
+      name: string;
   end;
 
   TVertexList = specialize TVector<TVertex>;
@@ -40,6 +41,10 @@ type
       textures: boolean;
   end;
 
+  TMeshOpts = record
+      export_png_textures: boolean;
+  end;
+
   { TModel
     single HOB mesh
   }
@@ -54,12 +59,13 @@ type
       procedure HmtRead(stream: TMemoryStream);
       procedure HobRead(stream: TMemoryStream);
       procedure HobReadMesh(const mesh: THobObject);
+      procedure SaveMaterials(const mtl_name: string; const png_textures: boolean);
     public
       destructor Destroy; override;
       procedure Load(hob, hmt: TMemoryStream);
       procedure InitGL;
       procedure DrawGL(var opts: TRenderOpts);
-      procedure ExportObj(const obj_name: string);
+      procedure ExportObj(const obj_name: string; const png_textures: boolean);
   end;
 
 implementation
@@ -194,11 +200,16 @@ procedure TModel.HmtRead(stream: TMemoryStream);
   end;
 var
   i: integer;
+  name: string;
 begin
   _hmt := ParseHmtFile(stream);
   SetLength(_materials, _hmt.material_count);
-  for i := 0 to _hmt.material_count - 1 do
-      SetTexByName(_materials[i], _hmt.materials[i].name_string);
+  for i := 0 to _hmt.material_count - 1 do begin
+      name := _hmt.materials[i].name_string;  //preserve for obj/mtl export
+      _materials[i].name := name;
+      writeln('material: ', name);
+      SetTexByName(_materials[i], name);
+  end;
 end;
 
 
@@ -365,8 +376,7 @@ const
   HeaderComment = 'Exported with HOB viewer';
   DefaultMaterial = 'default';
 
-
-procedure TModel.ExportObj(const obj_name: string);
+procedure TModel.ExportObj(const obj_name: string; const png_textures: boolean);
 const
   DesiredUnitSize = 2;
 var
@@ -384,6 +394,8 @@ var
 
   i,j,k: integer;
   vertex_counter: Integer;
+  mat: TMaterial;
+  mtl_name: string;
 
 function GetMaxCoord: double;
 var
@@ -407,7 +419,8 @@ begin
   Rewrite(objfile);
 
   writeln(objfile, '# ' + HeaderComment);
-  writeln(objfile, 'mtllib ', obj_name + '.mtl');
+  mtl_name := obj_name + '.mtl';
+  writeln(objfile, 'mtllib ', mtl_name);
 
   //scale pass
   scaling_factor := 1;
@@ -453,8 +466,10 @@ begin
           if face.material_index <> last_material_index then begin
               if face.material_index = -1 then
                   writeln(objfile, 'usemtl ' + DefaultMaterial)
-              else
-                  writeln(objfile, 'usemtl material_id', face.material_index);
+              else begin
+                  mat := _materials[face.material_index];
+                  writeln(objfile, 'usemtl ' + mat.name);
+              end;
               last_material_index := face.material_index;
           end;
 
@@ -471,8 +486,85 @@ begin
   end;
 
   CloseFile(objfile);
+  SaveMaterials(mtl_name, png_textures);
+end;
 
-  //SaveMaterials(pdo, obj_name);
+
+procedure TModel.SaveMaterials(const mtl_name: string; const png_textures: boolean);
+var
+  mtl_file:TextFile;
+  tex_name: string;
+  mat: TMaterial;
+  pixbuf: pbyte;
+
+  procedure Flip(const samples: integer);
+  var
+    y: Integer;
+    src, dst: Pbyte;
+  begin
+    src := mat.pixels + (mat.height - 1) * mat.width * samples;
+    dst := pixbuf;
+    for y := 0 to mat.height - 1 do begin
+        move(src^, dst^, mat.width * samples);
+        src -= mat.width * samples;
+        dst += mat.width * samples;
+    end;
+  end;
+
+  procedure WriteMaterial;
+  begin
+    writeln(mtl_file, 'newmtl ', mat.name);  //begin new material
+    if mat.has_texture then begin        //texture
+        tex_name := '';
+        if mat.bpp = 24 then begin
+            tex_name := mat.name+'.pnm';
+            Flip(3);
+            if not png_textures then begin
+                pnm_save(tex_name, pixbuf, mat.width, mat.height)
+            end
+            else begin
+                tex_name := mat.name+'.png';
+                png_write(tex_name, pixbuf, mat.width, mat.height, 24);
+            end;
+        end
+        else if mat.bpp = 8 then begin
+            tex_name := mat.name+'.pgm';
+            Flip(1);
+            if not png_textures then begin
+                pgm_save(tex_name, pixbuf, mat.width, mat.height)
+            end
+            else begin
+                tex_name := mat.name+'.png';
+                png_write(tex_name, pixbuf, mat.width, mat.height, 8);
+            end;
+        end;
+        if tex_name <> '' then
+            writeln(mtl_file, 'map_Kd ' + tex_name);
+    end;
+    writeln(mtl_file, 'Ka 1.000 1.000 1.000');  //ambient color
+    writeln(mtl_file, 'Kd 1.000 1.000 1.000');  //diffuse color
+    writeln(mtl_file, 'Ks 1.000 1.000 1.000');  //specular color
+    writeln(mtl_file, 'Ns 100.0');              //specular weight
+    writeln(mtl_file, 'illum 2');               //Color on and Ambient on, Highlight on
+    writeln(mtl_file);
+  end;
+
+begin
+  pixbuf := GetMem(512*512);  //overkill for RS texture sizes
+
+  AssignFile(mtl_file, mtl_name);
+  Rewrite(mtl_file);
+
+  writeln(mtl_file, '# ' + HeaderComment);
+  mat.name := DefaultMaterial;
+  mat.has_texture := false;
+  WriteMaterial();
+  for mat in _materials do begin
+      WriteMaterial();
+  end;
+
+  CloseFile(mtl_file);
+  Freemem(pixbuf);
 end;
 
 end.
diff --git a/model_viewer/model_viewer.lpi b/model_viewer/model_viewer.lpi
index 08c2ee1..bcc536c 100644
--- a/model_viewer/model_viewer.lpi
+++ b/model_viewer/model_viewer.lpi
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <CONFIG>
   <ProjectOptions>
-    <Version Value="10"/>
+    <Version Value="11"/>
     <PathDelim Value="\"/>
     <General>
       <Flags>
@@ -28,7 +28,7 @@
           </Target>
           <SearchPaths>
             <IncludeFiles Value="$(ProjOutDir)"/>
-            <OtherUnitFiles Value="..\rs_units;sdl2;imgui"/>
+            <OtherUnitFiles Value="..\rs_units;sdl2;imgui;util"/>
             <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
           </SearchPaths>
           <CodeGeneration>
@@ -51,16 +51,23 @@
     </PublishOptions>
     <RunParams>
       <local>
-        <FormatVersion Value="1"/>
         <CommandLineParams Value="vwing.hob"/>
       </local>
+      <FormatVersion Value="2"/>
+      <Modes Count="1">
+        <Mode0 Name="default">
+          <local>
+            <CommandLineParams Value="vwing.hob"/>
+          </local>
+        </Mode0>
+      </Modes>
     </RunParams>
     <RequiredPackages Count="1">
       <Item1>
         <PackageName Value="LCL"/>
       </Item1>
     </RequiredPackages>
-    <Units Count="9">
+    <Units Count="14">
       <Unit0>
         <Filename Value="model_viewer.pas"/>
         <IsPartOfProject Value="True"/>
@@ -98,6 +105,26 @@
         <Filename Value="..\rs_units\rs_dat.pas"/>
         <IsPartOfProject Value="True"/>
       </Unit8>
+      <Unit9>
+        <Filename Value="util\dc2_encoder.pas"/>
+        <IsPartOfProject Value="True"/>
+      </Unit9>
+      <Unit10>
+        <Filename Value="util\png_writer.pas"/>
+        <IsPartOfProject Value="True"/>
+      </Unit10>
+      <Unit11>
+        <Filename Value="util\prediction.pas"/>
+        <IsPartOfProject Value="True"/>
+      </Unit11>
+      <Unit12>
+        <Filename Value="util\crc32fast.pas"/>
+        <IsPartOfProject Value="True"/>
+      </Unit12>
+      <Unit13>
+        <Filename Value="util\dc2core.pas"/>
+        <IsPartOfProject Value="True"/>
+      </Unit13>
     </Units>
   </ProjectOptions>
   <CompilerOptions>
@@ -108,7 +135,7 @@
     </Target>
     <SearchPaths>
       <IncludeFiles Value="$(ProjOutDir)"/>
-      <OtherUnitFiles Value="..\rs_units;sdl2;imgui"/>
+      <OtherUnitFiles Value="..\rs_units;sdl2;imgui;util"/>
       <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
     </SearchPaths>
     <Parsing>
diff --git a/model_viewer/model_viewer.pas b/model_viewer/model_viewer.pas
index e363308..9f0376a 100644
--- a/model_viewer/model_viewer.pas
+++ b/model_viewer/model_viewer.pas
@@ -21,7 +21,8 @@ program model_viewer;
 uses
   sysutils, classes, math, strutils, gvector,
   gl, glu, glext, sdl2, fpimgui, fpimgui_impl_sdlgl2,
-  rs_dat, hob_mesh;
+  rs_dat, hob_mesh,
+  png_writer, dc2core;
 
 const
   SCR_W_INIT = 1280;
@@ -56,7 +57,8 @@ var
       pitch: single;
       x, y: single;
       autorotate: boolean;
-      opts: TRenderOpts;
+      render: TRenderOpts;
+      mesh: TMeshOpts;
   end;
 
   key_pressed: record
@@ -124,9 +126,8 @@ end;
 
 
 // The main drawing function.
-procedure DrawGLScene;
+procedure DrawModel;
 begin
-  glClear( GL_COLOR_BUFFER_BIT or GL_DEPTH_BUFFER_BIT );
   glLoadIdentity;
 
   if view.distance < ZoomIncrement then
@@ -142,12 +143,8 @@ begin
       view.rotation_angle -= 360;
 
   if g_model <> nil then begin
-      g_model.DrawGL(view.opts);
+      g_model.DrawGL(view.render);
   end;
-
-  igRender;
-  
-  SDL_GL_SwapWindow(g_window);
 end;
 
 
@@ -237,12 +234,12 @@ begin
   view.y := 0;
   view.autorotate := true;
 
-  view.opts.fg_all := true;
-  view.opts.fg_to_draw := 0;
-  view.opts.wireframe := false;
-  view.opts.points := false;
-  view.opts.vcolors := true;
-  view.opts.textures := true;
+  view.render.fg_all := true;
+  view.render.fg_to_draw := 0;
+  view.render.wireframe := false;
+  view.render.points := false;
+  view.render.vcolors := true;
+  view.render.textures := true;
 end;
 
 
@@ -272,7 +269,7 @@ begin
   except
     g_model_loading_failed := true;
   end;
-  view.opts.fg_to_draw := 0;
+  view.render.fg_to_draw := 0;
 end;
 
 
@@ -318,30 +315,30 @@ begin
                    //g_model rendering opts
             SDLK_w:
                 if not key_pressed.wireframe then begin
-                    view.opts.wireframe := not view.opts.wireframe;
+                    view.render.wireframe := not view.render.wireframe;
                     key_pressed.wireframe := true;
                 end;
             SDLK_v:
                 if not key_pressed.vcolors then begin
-                    view.opts.vcolors := not view.opts.vcolors;
+                    view.render.vcolors := not view.render.vcolors;
                     key_pressed.vcolors := true;
                 end;
             SDLK_p:
                 if not key_pressed.points then begin
-                    view.opts.points := not view.opts.points;
+                    view.render.points := not view.render.points;
                     key_pressed.points := true;
                 end;
             SDLK_t:
                 if not key_pressed.textures then begin
-                    view.opts.textures := not view.opts.textures;
+                    view.render.textures := not view.render.textures;
                     key_pressed.textures := true;
                 end;
             SDLK_f:
-                view.opts.fg_all := not view.opts.fg_all;
+                view.render.fg_all := not view.render.fg_all;
             SDLK_LEFT:
-                view.opts.fg_to_draw := max(0, view.opts.fg_to_draw - 1);
+                view.render.fg_to_draw := max(0, view.render.fg_to_draw - 1);
             SDLK_RIGHT:
-                view.opts.fg_to_draw += 1;
+                view.render.fg_to_draw += 1;
             SDLK_UP:
                 if (g_selected_file_idx > 0) then begin
                     g_selected_file_idx -= 1;
@@ -420,34 +417,33 @@ end;
 
 procedure DrawGui;
 var
-  style: PImGuiStyle;
   file_item: TFileListItem;
   selected_item_idx: integer;
   selected_item: TFileListItem;
   i: Integer;
+  do_export: boolean;
 begin
-  ImGui_ImplSdlGL2_NewFrame(g_window);
-
-  style := Imgui.GetStyle();
-  style^.WindowRounding := 0;
-
   if g_selected_file_idx >= 0 then begin
       Imgui.Begin_('Mesh');  //window used in hob_mesh as well
       if not g_model_loading_failed then begin
           Imgui.Text(g_filelist[g_selected_file_idx].name);
       end else
           Imgui.Text('mesh loading failed :(');
+      if g_model <> nil then begin
+          do_export := Imgui.Button('Export to obj');
+          Imgui.SameLine(0, 15);
+          Imgui.Checkbox('with png textures', @view.mesh.export_png_textures);
+          if do_export then
+              g_model.ExportObj('rs_exported.obj', view.mesh.export_png_textures);
+      end;
       Imgui.End_;
   end;
 
   Imgui.Begin_('Rendering options');
-    Imgui.Checkbox('points', @view.opts.points);
-    Imgui.Checkbox('wireframe', @view.opts.wireframe);
-    Imgui.Checkbox('textures', @view.opts.textures);
-    Imgui.Checkbox('vertex colors', @view.opts.vcolors);
-    if g_model <> nil then
-        if Imgui.Button('Export to obj') then
-            g_model.ExportObj('rs_exported.obj');
+    Imgui.Checkbox('points', @view.render.points);
+    Imgui.Checkbox('wireframe', @view.render.wireframe);
+    Imgui.Checkbox('textures', @view.render.textures);
+    Imgui.Checkbox('vertex colors', @view.render.vcolors);
   Imgui.End_;
 
   if g_filelist.Size = 0 then
@@ -505,6 +501,22 @@ begin
 end;
 
 
+procedure BeginScene;
+var
+  style: PImGuiStyle;
+begin
+  ImGui_ImplSdlGL2_NewFrame(g_window);
+  style := Imgui.GetStyle();
+  style^.WindowRounding := 0;
+  glClear( GL_COLOR_BUFFER_BIT or GL_DEPTH_BUFFER_BIT );
+end;
+
+procedure EndScene;
+begin
+  igRender;
+  SDL_GL_SwapWindow(g_window);
+end;
+
 //******************************************************************************
 var
   sec, frames: integer;
@@ -542,9 +554,10 @@ begin
   key_pressed.wireframe := false;
   key_pressed.fullscreen := false;
   while not Done do begin
-
+      BeginScene;
+      DrawModel;
       DrawGui;
-      DrawGLScene;
+      EndScene;
 
       while SDL_PollEvent(@event) > 0 do
           HandleEvent(event, done);
diff --git a/model_viewer/util/crc32fast.pas b/model_viewer/util/crc32fast.pas
new file mode 100644
index 0000000..dff2375
--- /dev/null
+++ b/model_viewer/util/crc32fast.pas
@@ -0,0 +1,78 @@
+(*******************************************************************************
+crc32 slicing by 4 - http://create.stephan-brumme.com/crc32/
+
+*******************************************************************************)
+unit crc32fast;
+{$mode objfpc}{$H+}
+
+interface
+
+function crc32(crc: longword; const data: Pbyte; length: longword): longword;
+
+
+implementation
+
+var
+  Crc32Table: array[0..3, 0..255] of longword;
+
+procedure InitTable;
+const
+  POLY = $EDB88320;
+var
+  i, j: integer;
+  crc: longword;
+begin
+  for i := 0 to 255 do begin
+      crc := i;
+      for j := 0 to 7 do begin
+          crc := (crc >> 1) xor ((crc and 1) * POLY);
+      end;
+      Crc32Table[0, i] := crc;
+  end;
+
+  for i := 0 to 255 do begin
+      Crc32Table[1, i] := (Crc32Table[0, i] >> 8) xor Crc32Table[0, Crc32Table[0, i] and $ff];
+      Crc32Table[2, i] := (Crc32Table[1, i] >> 8) xor Crc32Table[0, Crc32Table[1, i] and $ff];
+      Crc32Table[3, i] := (Crc32Table[2, i] >> 8) xor Crc32Table[0, Crc32Table[2, i] and $ff];
+  end;
+end;
+
+
+function crc32(crc: longword; const data: Pbyte; length: longword): longword;
+var
+  current: plongword;
+  current_byte: pbyte;
+  x: longword;
+begin
+  if data = nil then
+    exit(0);
+
+  crc := crc xor $FFFFFFFF;
+  current := plongword(data);
+  while length >= 4 do begin
+      x := crc xor current^;
+      current += 1;
+      crc := Crc32Table[0, (x >> 24) and $ff] xor
+             Crc32Table[1, (x >> 16) and $ff] xor
+             Crc32Table[2, (x >>  8) and $ff] xor
+             Crc32Table[3, (x >>  0) and $ff];
+      length -= 4;
+  end;
+
+  current_byte := pbyte(current);
+  while length > 0 do begin
+      crc := (crc >> 8) xor Crc32Table[0, (crc and $ff) xor current_byte^];
+      current_byte += 1;
+      length -= 1;
+  end;
+
+  crc := crc xor $FFFFFFFF;
+  result := crc;
+end;
+
+
+initialization
+InitTable;
+
+end.
+
diff --git a/model_viewer/util/dc2_encoder.pas b/model_viewer/util/dc2_encoder.pas
new file mode 100644
index 0000000..dda2d10
--- /dev/null
+++ b/model_viewer/util/dc2_encoder.pas
@@ -0,0 +1,278 @@
+(*******************************************************************************
+dc2_encoder.pas
+Copyright (c) 2014-2015 David Pethes
+
+This file is part of Dc2.
+
+Dc2 is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+Dc2 is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Dc2.  If not, see <http://www.gnu.org/licenses/>.
+
+*******************************************************************************)
+unit dc2_encoder;
+{$mode objfpc}{$H+}
+
+interface
+
+uses
+  SysUtils, dc2core;
+
+const
+  DefaultCompLevel = 5;
+
+type
+  TEncodedSlice = record
+      data: pbyte;
+      size: integer;
+  end;
+
+  { TDc2Encoder }
+
+  TDc2Encoder = class
+  private type
+      TEncoderState = (stStartBlock, stProcessData, stWriteBlock);
+  private
+      dict: TMatchSearcher;              //string dictionary
+      search_results: PLiteralMatch;     //dictionary search results buffer
+      blockCoder: TBlockWriter;          //deflate block writing backend
+
+      output_buffer: pbyte;              //buffer for encoded data, used by bitstream writer
+      encoded_items: integer;            //encoded item count
+      slices_stored: integer;            //number of input slices in current block
+
+      block: record                      //current deflate block data
+          btype: TBlockTypeEnum;         //type (fixed/dynamic/raw)
+          size:  integer;                //size in bytes for raw blocks, coding elements otherwise
+          last:  boolean;                //last block in deflate stream
+      end;
+      state: TEncoderState;
+      clevel: byte;                      //compression level
+      use_fixed_huff_only: boolean;      //static huffcodes only
+      stats: stats_t;                    //block statistics
+
+      procedure SearchMatches(const stream: pbyte; const size: integer);
+
+  public
+      constructor Create(const compression_level: byte; const fixed_code_only: boolean = false);
+      destructor Destroy; override;
+      //Encode given block of data
+      procedure EncodeSlice(const data: pbyte; const input_size: longword; out slice: TEncodedSlice);
+      //Signal the last input block
+      procedure SetLastSlice;
+      //encoded data statistics
+      function GetStats: stats_t;
+  end;
+
+implementation
+
+{ TDc2Encoder }
+
+constructor TDc2Encoder.Create(const compression_level: byte; const fixed_code_only: boolean);
+begin
+  clevel := compression_level;
+  if clevel > MAX_COMPRESSION_LEVEL then
+      clevel := MAX_COMPRESSION_LEVEL;
+  use_fixed_huff_only := fixed_code_only;
+
+  dict := TMatchSearcher.Create;
+  dict.SetCompressionLevel(clevel);
+
+  output_buffer := getmem(MAX_BLOCK_SIZE * 2);  //padding for the case of data expansion
+  blockCoder := TBlockWriter.Create(output_buffer);
+
+  search_results := getmem(MAX_BLOCK_SIZE * 2 * sizeof(TLiteralMatch));
+
+  state := stStartBlock;
+  block.last := false;
+  Fillbyte(stats, sizeof(stats_t), 0);
+end;
+
+destructor TDc2Encoder.Destroy;
+begin
+  dict.Free;
+  blockCoder.Free;
+  freemem(output_buffer);
+  freemem(search_results);
+  inherited;
+end;
+
+{
+  EncodeSlice
+
+  Encode given piece of data:
+    - dictionary frontend: search for duplicate strings
+    - block coder backend: store search results in Deflate format
+}
+procedure TDc2Encoder.EncodeSlice(const data: pbyte; const input_size: longword; out slice: TEncodedSlice);
+var
+  processed_size: integer;
+
+  procedure WriteBlock;
+  var
+    append_bs_buffer: boolean;
+  begin
+    //write literals/matches to bitstream
+    if block.last then blockCoder.SetLast;
+    append_bs_buffer := slice.size > 0;
+    blockCoder.WriteBlock(data, input_size, search_results, encoded_items, append_bs_buffer);
+
+    //failed to compress the block - use raw block (if all data is available)
+    if (blockCoder.GetStreamSize >= input_size) and (slices_stored = 1) and not block.last then begin
+        block.btype := BTRaw;
+        blockCoder.InitNewBlock(block.btype);
+        blockCoder.WriteBlock(data, input_size, search_results, encoded_items);
+        encoded_items := input_size;
+    end;
+
+    //commit block data
+    blockCoder.Done;
+    slice.data := output_buffer;
+    slice.size += blockCoder.GetStreamSize();
+
+    stats.elements_encoded += encoded_items;
+    stats.blocks[block.btype] += 1;
+  end;
+
+begin
+  Assert(input_size <= MAX_BLOCK_SIZE, 'unexpected input data size');
+
+  //if no compression is specified, process the data as raw block
+  if clevel = 0 then begin
+      blockCoder.InitNewBlock(BTRaw);
+      if block.last then blockCoder.SetLast;
+      blockCoder.WriteBlock(data, input_size, search_results, 0);
+      blockCoder.Done;
+      slice.data := output_buffer;
+      slice.size := blockCoder.GetStreamSize();
+      stats.blocks[block.btype] += 1;
+      exit;
+  end;
+
+  processed_size := 0;
+  slice.data := nil;
+  slice.size := 0;
+
+  repeat
+      case state of
+      stStartBlock: begin
+          //use fixed huffcode block if the input stream is too small, the overhead of dynamic block header would kill any compression
+          block.btype := BTDynamic;
+          if use_fixed_huff_only or (input_size < 200) then
+              block.btype := BTFixed;
+
+          blockCoder.InitNewBlock(block.btype);
+          state := stProcessData;
+          encoded_items := 0;
+          slices_stored := 0;
+      end;
+      stProcessData: begin
+          //search input buffer for matches
+          SearchMatches(data, input_size);
+          processed_size := input_size;
+          slices_stored += 1;
+          //save block if no more data should go to block or last data was processed
+          if (encoded_items > MAX_BLOCK_SIZE div 8) or (slices_stored = 4) or block.last then
+               state := stWriteBlock;
+      end;
+      stWriteBlock: begin
+          WriteBlock;
+          state := stStartBlock;
+      end;
+      end;
+  until (state in [stProcessData, stStartBlock]) and (processed_size = input_size);
+end;
+
+procedure TDc2Encoder.SetLastSlice;
+begin
+  block.last := true;
+end;
+
+function TDc2Encoder.GetStats: stats_t;
+begin
+  result := stats;
+end;
+
+function InitMatch(const sr: TSearchResult): TLiteralMatch; inline;
+begin
+  result.match_length := sr.length;
+  result.offset := sr.distance;
+  result.literal := 0;
+end;
+
+function InitMatch(const l: byte): TLiteralMatch; inline;
+begin
+  result.match_length := 0;
+  result.offset := 0;
+  result.literal := l;
+end;
+
+{ SearchMatches
+  Search stream for duplicate strings using a dictionary built from gathered data
+}
+procedure TDc2Encoder.SearchMatches(const stream: pbyte; const size: integer);
+var
+  lm: TLiteralMatch;
+  lms: PLiteralMatch;
+  match, match_lazy: TSearchResult;
+  i: integer;
+  literal: byte;
+begin
+  dict.NewData(stream, size);
+  i := 0;
+  lms := search_results + encoded_items;
+  while i < size do begin
+      match := dict.FindMatch(stream + i, i);
+      literal := stream[i];
+
+      if match.length >= MIN_MATCH_LENGTH then begin
+
+          //lazy matching, biases are experimental and not tuned much
+          if (clevel >= 5) and (i < size - 1) then begin
+              if (match.length > 1) and (match.length < MAX_DEFLATE_MATCH_LENGTH-3) then begin
+
+                  match_lazy := dict.FindMatch(stream + i + 1, i + 1);
+                  if match_lazy.length > match.length + 2 then begin
+                      i += 1;
+                      lms^ := InitMatch(literal);
+                      lms += 1;
+                      blockCoder.UpdateStatsLiteral(literal);
+
+                      i += match_lazy.length;
+                      lms^ := InitMatch(match_lazy);
+                      lms += 1;
+                      blockCoder.UpdateStatsMatch(match_lazy.length, match_lazy.distance);
+
+                      Continue;
+                  end;
+
+              end;
+          end;
+          //end lazy matching
+
+          i += match.length;
+          lm := InitMatch(match);
+          blockCoder.UpdateStatsMatch(match.length, match.distance);
+      end
+      else begin
+          i += 1;
+          lm := InitMatch(literal);
+          blockCoder.UpdateStatsLiteral(literal);
+      end;
+
+      lms^ := lm;
+      lms += 1;
+  end;
+  encoded_items := lms - search_results;
+end;
+
+end.
+
diff --git a/model_viewer/util/dc2_simple_api.pas b/model_viewer/util/dc2_simple_api.pas
new file mode 100644
index 0000000..9f4d403
--- /dev/null
+++ b/model_viewer/util/dc2_simple_api.pas
@@ -0,0 +1,93 @@
+unit dc2_simple_api;
+{$mode objfpc}{$H+}
+{define use_zstream}
+
+interface
+
+uses
+  Classes, SysUtils,
+  {$ifdef use_zstream}zstream,{$endif}
+  dc2core, dc2_encoder;
+
+type
+  { TLzEncoder }
+  TLzEncoder = class
+  private
+      _ctx: TDc2Encoder;
+      _encoded_size: integer;
+
+  public
+      constructor Create(const compression_level: byte = 2);
+      destructor Destroy; override;
+      function EncodeBytesToStream(const src: pbyte; const size: integer; var dest: TMemoryStream): integer;
+      procedure WriteStats;
+  end;
+
+implementation
+
+{ TLzEncoder }
+
+constructor TLzEncoder.Create(const compression_level: byte);
+begin
+  _ctx := TDc2Encoder.Create(compression_level);
+  _encoded_size := 0;
+end;
+
+destructor TLzEncoder.Destroy;
+begin
+  inherited Destroy;
+  _ctx.free;
+end;
+
+function TLzEncoder.EncodeBytesToStream(const src: pbyte; const size: integer; var dest: TMemoryStream): integer;
+var
+  src_buffer: pbyte;
+  chunk_size: integer;
+  bytes_to_process: integer;
+  encdata: TEncodedSlice;
+  {$ifdef use_zstream}
+  zs: Tcompressionstream;
+  {$endif}
+begin
+  {$ifdef use_zstream}
+  zs := Tcompressionstream.create(cldefault, dest, true);
+  zs.WriteBuffer(src^, size);
+  zs.Free;
+  result := _encoded_size;
+  exit;
+  {$endif}
+  _encoded_size := 0;
+  src_buffer := src;
+  chunk_size := MAX_BLOCK_SIZE;
+  bytes_to_process := size;
+
+  while bytes_to_process > 0 do begin
+      if bytes_to_process <= chunk_size then begin
+          chunk_size := bytes_to_process;
+          _ctx.SetLastSlice();
+      end;
+
+      _ctx.EncodeSlice(src_buffer, chunk_size, encdata);
+      dest.Write(encdata.data^, encdata.size);
+      _encoded_size += encdata.size;
+
+      src_buffer += chunk_size;
+      bytes_to_process -= chunk_size;
+  end;
+
+  result := _encoded_size;
+end;
+
+procedure TLzEncoder.WriteStats;
+begin
+  //if _ctx^.stats.onb > 0 then
+  //    writeln('avg. offset: ', _ctx^.stats.osum / _ctx^.stats.onb:8:1);
+  //if _ctx^.stats.mnb > 0 then
+  //    writeln('avg. match : ', _ctx^.stats.msum / _ctx^.stats.mnb:8:2);
+  //with _ctx^.stats do
+  //    writeln('block types (raw/fix/dyn): ', blocks[0]:6, blocks[1]:6, blocks[2]:6);
+  //writeln('deflate bytes: ', _encoded_size);
+end;
+
+end.
+
diff --git a/model_viewer/util/dc2core.pas b/model_viewer/util/dc2core.pas
new file mode 100644
index 0000000..b962260
--- /dev/null
+++ b/model_viewer/util/dc2core.pas
@@ -0,0 +1,1957 @@
+unit dc2core;
+
+interface
+
+uses
+  sysutils, math;
+
+type
+
+TBitstreamBufferState = record
+    current_bits: longword;
+    mask: longword;
+end;
+
+{ TBitstreamWriter }
+
+TBitstreamWriter = class
+  private
+    buffer: plongword;
+    cur: plongword;
+    mask: longword;
+    closed: boolean;
+
+  public
+    constructor Create(const memory_buffer: pbyte);
+    destructor Destroy; override;
+
+    procedure Close;
+    function IsByteAligned: boolean;
+    procedure ByteAlign;
+    function GetBitSize: longword;
+    function GetByteSize: longword;
+    function GetUnbufferedByteSize: longword;
+    function GetDataStart: pbyte;
+
+    procedure Write(const bit: integer);
+    procedure Write(const bits, length: longword);  //write multiple bits, lsb first
+
+    function GetState: TBitstreamBufferState;
+    procedure SetState(const state: TBitstreamBufferState);
+
+    procedure ResetBufferPosition;
+end;
+
+
+{ TBitstreamReader }
+
+TBitstreamReader = class
+  private
+    buffer: plongword;
+    cur: plongword;
+    used: longword;
+  public
+    constructor Create(const memory_buffer: pbyte);
+    function  GetPosition(): longword;
+    function  GetBitCount(): longword;
+    function  GetUncachedPosition(): longword;
+    function  IsByteAligned(): boolean;
+    function  Read(): longword;  {$ifdef bs_inline} inline; {$endif}
+    function  Read(count: longword): longword;
+    function  Show(const count: longword): longword;
+    procedure Skip(const count: longword);
+    function  ReadInverse(bit_count: longword): longword;
+
+    function  GetState: TBitstreamBufferState;
+    procedure SetState(const state: TBitstreamBufferState);
+    procedure ResetBufferPosition;
+
+    function GetInternalState: TBitstreamBufferState;
+    procedure SetInternalState(const state: TBitstreamBufferState);
+end;
+
+function SwapBits (const bits, bit_count: longword): longword;
+
+
+const
+  MAX_BLOCK_SIZE = 32 * 1024;
+
+type
+  TBlockTypeEnum = (BTRaw := 0, BTFixed, BTDynamic, BTError);
+
+  //statistiky pre enkodovanie
+  stats_t = record
+      offsets_sum, offsets_used,             //sucet/pocet offsetov
+      matches_sum, elements_encoded: int64;      //   -||-     zhod
+      blocks: array[TBlockTypeEnum] of longword;
+  end;
+
+
+type
+  { TSlidingBuffer }
+  TSlidingBuffer = object
+  private
+      _buffer: pbyte;
+      _previous_bytes_count: integer;
+  public
+      constructor Init();
+      destructor Done;
+      function GetWindow: pbyte; inline;
+      procedure InsertData(const data: pbyte; const size: integer);
+  end;
+
+
+const
+  MAX_COMPRESSION_LEVEL = 7;
+  DICT_SIZE = MAX_BLOCK_SIZE;
+  MAX_DEFLATE_MATCH_LENGTH = 258;
+
+type
+  TSearchResult = record
+    distance,
+    length: word;
+  end;
+
+  { TMatchSearcher }
+  TMatchSearcher = class
+  private
+      _max_search_depth: integer;  //limit how many positions we want to check
+      _max_search_match_length: integer;  //limit how long match needs to be to satisfy search conditions
+      _links: pinteger;                   //linked list of hash occurences
+      _last_seen_idx: pinteger;           //last known position of a hash in the stream
+      _bytes_processed: integer;
+      _current_chunk_size: integer;
+      _sbuffer: TSlidingBuffer;    //sliding buffer for search window data
+
+      function Search(const window_end_ptr, str: pbyte; const current_idx, max_match_length: integer
+        ): TSearchResult;
+
+  public
+      constructor Create;
+      destructor Destroy; override;
+      procedure SetCompressionLevel(const level: integer);
+
+      { New chunk of data that to be processed. }
+      procedure NewData (const data: pbyte; const size: integer);
+
+      {
+        Find previous occurence of bytes in str.
+          str         - searched data pointer
+          data_index  - searched data index relative to current chunk
+      }
+      function FindMatch(const str: pbyte; const data_index: integer): TSearchResult;
+  end;
+
+
+const
+  //konstanty pre strom
+  END_OF_STREAM = 285;
+  TOP_NODE      = END_OF_STREAM * 2 + 1;
+
+type
+  //huff tree definitions
+  tree_node_t = record
+      weight: longword;
+      child_0: word;
+      child_1: word;
+  end;
+  tree_node_p = ^tree_node_t;
+
+  vlc_code_t = record
+      bits: word;
+      code_len: byte;
+  end;
+  vlc_code_p = ^vlc_code_t;
+
+  huff_tree_t = record
+      counts: plongword;
+      nodes:  tree_node_p;
+      codes:  vlc_code_p;
+      root_node: longword;
+  end;
+
+  //fixed huffcodes are actually constructed using codes <0..287>, but last 2 are never used
+  TDecodeTable = record
+      codes_of_legth: array[0..15] of word;     //number of codes for given length
+      code_value: array[0..END_OF_STREAM + 2] of word; //map code to literal/length value
+  end;
+  PDecodeTable = ^TDecodeTable;
+
+procedure huff_FillCanonDecodingTable(var tab: TDecodeTable; const code_lengths: pbyte; const count: integer);
+procedure huff_code2canon(const codes: vlc_code_p);
+
+procedure huff_init  (out h: huff_tree_t);
+procedure huff_free  (h: huff_tree_t);
+
+procedure huff_raise_count (var h: huff_tree_t; const val: word); inline;
+procedure huff_build_tree  (var h: huff_tree_t; max_cnt: word = 255);
+procedure huff_build_distance_tree (var h: huff_tree_t; max_cnt: word = 255);
+
+
+const
+  END_OF_BLOCK = 1000;  //must not collide with any valid Deflate values
+  END_OF_BLOCK_CODE = 256;
+
+type
+
+  { TVlcWriter }
+  TVlcWriter = object
+  private
+    bs: TBitstreamWriter;
+    len_tree, dist_tree: vlc_code_p;
+  public
+    procedure SetTrees(const bitstream: TBitstreamWriter; const length_tree, distance_tree: vlc_code_p);
+    procedure WriteMatch (const len, dist: longword);
+    procedure WriteLiteral (const c: byte);
+    procedure WriteBlockEnd ();
+  end;
+
+  TSymbolBits = record
+      symbol: word;
+      nbits: byte;
+  end;
+
+const
+  TAB0_BITS = 9;  //LUT bits, must be less or equal to maximum bit length of huff codes allowed by Deflate
+
+type
+  TDecodeLookupTables = record
+      codes_t0: array[0..511] of TSymbolBits;  //9 bits = 512 values
+      canon_table: TDecodeTable;
+  end;
+
+  { TVlcReader }
+
+  TVlcReader = class
+  private
+    bs: TBitstreamReader;
+    literal_dectable, distance_dectable: TDecodeLookupTables;
+  public
+    procedure SetTables(const bitreader: TBitstreamReader; const literal_table, distance_table: TDecodeLookupTables);
+    procedure ReadCodePair (out length, distance: word);
+  end;
+
+
+function Length2code (const len:  longword): longword;
+function Distance2code(const dist: longword): longword;
+
+function vlc_ReadCode(const bs: TBitstreamReader; const table: TDecodeTable): integer;
+function vlc_ReadCode(const bs: TBitstreamReader; const dectable: TDecodeLookupTables): integer;
+function InitDecodeLut(const code_lengths: pbyte; const count: integer): TDecodeLookupTables;
+
+
+const
+  MIN_MATCH_LENGTH = 3;
+
+type
+  TLiteralMatch = record
+      match_length: word; //match length
+      offset: word;       //match offset
+      literal: byte;      //byte from input stream
+  end;
+  PLiteralMatch = ^TLiteralMatch;
+
+  { TBlockWriter }
+  TBlockWriter = class
+  private
+    bitWriter: TBitstreamWriter;      //bitstream writer
+    literal_match_stats: pinteger;
+    distance_stats: pinteger;
+    literal_codes: vlc_code_p;
+    distance_codes: vlc_code_p;
+
+    _block_type: TBlockTypeEnum;
+    _last: boolean;                   //last block in stream
+    bs_cache: TBitstreamBufferState;  //state of the buffer at beginning of the block
+
+    procedure BeginBlock;
+    procedure BuildHuffCodes;
+    procedure BuildFixedHuffCodes;
+    procedure WriteCodingTrees;
+    procedure WriteBlockEncoded(const search_results: PLiteralMatch; const size: integer);
+    procedure WriteBlockRaw(const rawdata: pbyte; const rawsize: integer);
+
+  public
+    constructor Create(const output_buffer: pbyte);
+    destructor Destroy; override;
+
+    procedure InitNewBlock(const block_type: TBlockTypeEnum);
+    procedure SetLast;
+    procedure UpdateStatsMatch(const len, dist: longword); inline;
+    procedure UpdateStatsLiteral(const literal: byte); inline;
+
+    procedure WriteBlock(const rawdata: pbyte; const rawsize: integer;
+      const search_results: PLiteralMatch; const size: integer; const keep_buffer: boolean = false);
+    procedure Done;
+
+    function GetStreamSize: integer;
+  end;
+
+
+  TBlockContext = record
+      btype: TBlockTypeEnum;
+      size:  integer;
+      unfinished: boolean;
+      last:  boolean;       //last block flag
+  end;
+
+  { TBlockReader }
+  TBlockReader = class
+  private
+    _block_type: TBlockTypeEnum;
+    _vlc: TVlcReader;
+    procedure ReadHeaderCodes(const bs: TBitstreamReader);
+    procedure InitFixedCodes(const bs: TBitstreamReader);
+  public
+    constructor Create;
+    destructor Destroy; override;
+    function ReadBlockHeader(const bs: TBitstreamReader): TBlockContext;
+    function GetVlcReader: TVlcReader; inline;
+  end;
+
+
+
+
+
+
+
+
+
+
+
+
+(*******************************************************************************
+*******************************************************************************)
+implementation
+
+
+
+{ SwapBits
+  Swap bit ordering in source pattern. Swaps up to 16 bits.
+}
+function SwapBits (const bits, bit_count: longword): longword;
+var
+  x: longword;
+begin
+  x := bits;
+  x := ((x and $aaaaaaaa) >> 1) or ((x and $55555555) << 1);
+  x := ((x and $cccccccc) >> 2) or ((x and $33333333) << 2);
+  x := ((x and $f0f0f0f0) >> 4) or ((x and $0f0f0f0f) << 4);
+  x := ((x and $ff00ff00) >> 8) or ((x and $00ff00ff) << 8);
+  result := x >> (16 - bit_count);
+end;
+
+{ TBitstreamWriter }
+
+constructor TBitstreamWriter.Create(const memory_buffer: pbyte);
+begin
+  buffer := plongword (memory_buffer);
+  cur  := buffer;
+  cur^ := 0;
+  mask := 0;
+end;
+
+destructor TBitstreamWriter.Destroy;
+begin
+  if not closed then
+      Close;
+
+  inherited Destroy;
+end;
+
+function TBitstreamWriter.GetBitSize: longword;
+begin
+  result := 32 * (cur - buffer) + mask;
+end;
+
+function TBitstreamWriter.GetByteSize: longword;
+begin
+  result := (cur - buffer) * 4;
+  result += (mask + 7) div 8;  //+ buffer
+end;
+
+function TBitstreamWriter.GetUnbufferedByteSize: longword;
+begin
+  result := (cur - buffer) * 4;
+end;
+
+function TBitstreamWriter.GetDataStart: pbyte;
+begin
+  result := pbyte(buffer);
+end;
+
+procedure TBitstreamWriter.Close;
+begin
+end;
+
+function TBitstreamWriter.IsByteAligned: boolean;
+begin
+  result := mask mod 8 = 0;
+end;
+
+procedure TBitstreamWriter.ByteAlign;
+begin
+  while not IsByteAligned do
+      Write(0);
+end;
+
+procedure TBitstreamWriter.Write(const bit: integer);
+begin
+  cur^ := cur^ or longword((bit and 1) shl mask);
+  mask += 1;
+
+  if mask = 32 then begin
+      cur += 1;
+      cur^ := 0;
+      mask := 0;
+  end;
+end;
+
+procedure TBitstreamWriter.Write(const bits, length: longword);
+var
+  bits_: longword;
+begin
+  Assert(length <= 32, 'bit_count over 32');
+
+  //clear unused bits
+  bits_ := bits and ($ffffffff shr (32 - length));
+
+  cur^ := cur^ or (bits_ shl mask);
+  mask += length;
+  if mask >= 32 then begin
+      mask -= 32;  //number of bits that didn't fit into buffer
+      cur += 1;
+      cur^ := 0;
+
+      if mask > 0 then
+          cur^ := bits_ shr (length - mask);
+  end;
+end;
+
+function TBitstreamWriter.GetState: TBitstreamBufferState;
+begin
+  Result.mask := mask;
+  Result.current_bits := cur^;
+end;
+
+procedure TBitstreamWriter.SetState(const state: TBitstreamBufferState);
+begin
+  mask := state.mask;
+  cur^ := state.current_bits;
+end;
+
+procedure TBitstreamWriter.ResetBufferPosition;
+var
+  cache: TBitstreamBufferState;
+begin
+  cache := GetState;
+  cur := buffer;
+  SetState(cache);
+end;
+
+
+{ TBitstreamReader }
+
+constructor TBitstreamReader.Create(const memory_buffer: pbyte);
+begin
+  buffer := plongword (memory_buffer);
+  cur  := buffer;
+  used := 0;
+end;
+
+function TBitstreamReader.GetPosition: longword;
+begin
+  result := (cur - buffer) << 2;  //used dword count
+  result += (used + 7) shr 3;  //+ buffer
+end;
+
+function TBitstreamReader.GetBitCount: longword;
+begin
+  result := 32 * longword(cur - buffer) + used;
+end;
+
+function TBitstreamReader.GetUncachedPosition: longword;
+begin
+  result := (cur - buffer) * 4;  //used dword count
+end;
+
+function TBitstreamReader.IsByteAligned: boolean;
+begin
+  result := true;
+  if used mod 8 > 0 then result := false;
+end;
+
+function TBitstreamReader.Read: longword;
+begin
+  result := (cur^ shr used) and 1;
+  used += 1;
+  if used = 32 then begin
+      cur += 1;
+      used := 0;
+  end;
+end;
+
+function TBitstreamReader.Read(count: longword): longword;
+var
+  bits_left: integer;
+begin
+  result := cur^ shr used;
+  if count < (32 - used) then begin
+      result := result and ($ffffffff shr (32 - count));
+      used += count;
+  end else begin
+      bits_left := count - (32 - used);
+      cur += 1;
+      if bits_left > 0 then
+          result := result or (cur^ and ($ffffffff shr (32 - bits_left))) shl (32 - used);
+      used := bits_left;
+  end;
+end;
+
+function TBitstreamReader.Show(const count: longword): longword;
+var
+  bits_left: integer;
+begin
+  result := cur^ shr used;
+  if count < (32 - used) then begin
+      result := result and ($ffffffff shr (32 - count));
+  end else begin
+      bits_left := count - (32 - used);
+      if bits_left > 0 then
+          result := result or ((cur + 1)^ and ($ffffffff shr (32 - bits_left))) shl (32 - used);
+  end;
+end;
+
+procedure TBitstreamReader.Skip(const count: longword);
+begin
+  if count < (32 - used) then begin
+      used += count;
+  end else begin
+      cur += 1;
+      used := count - (32 - used);
+  end;
+end;
+
+function TBitstreamReader.ReadInverse(bit_count: longword): longword;
+var
+  i: integer;
+begin
+  result := 0;
+  for i := bit_count - 1 downto 0 do
+      result := result or Read() shl i;
+end;
+
+function TBitstreamReader.GetState: TBitstreamBufferState;
+begin
+  result.current_bits := cur^;
+end;
+
+procedure TBitstreamReader.SetState(const state: TBitstreamBufferState);
+begin
+  cur^ := state.current_bits;
+end;
+
+procedure TBitstreamReader.ResetBufferPosition;
+begin
+  cur := buffer;
+end;
+
+function TBitstreamReader.GetInternalState: TBitstreamBufferState;
+begin
+  result.current_bits := cur - buffer;
+  result.mask := used;
+end;
+
+procedure TBitstreamReader.SetInternalState(const state: TBitstreamBufferState);
+begin
+  cur := buffer + state.current_bits;
+  used := state.mask;
+end;
+
+
+{ TSlidingBuffer }
+
+constructor TSlidingBuffer.Init();
+begin
+  _buffer := getmem(2 * MAX_BLOCK_SIZE);
+  _buffer += MAX_BLOCK_SIZE;
+  _previous_bytes_count := 0;
+end;
+
+destructor TSlidingBuffer.Done;
+begin
+  freemem(_buffer - MAX_BLOCK_SIZE);
+end;
+
+function TSlidingBuffer.GetWindow: pbyte;
+begin
+  result := _buffer;
+end;
+
+procedure TSlidingBuffer.InsertData(const data: pbyte; const size: integer);
+begin
+  Assert(size <= MAX_BLOCK_SIZE, 'cannot insert more data than allocated range');
+
+  if _previous_bytes_count > 0 then
+      move((_buffer + _previous_bytes_count - MAX_BLOCK_SIZE)^,
+           (_buffer - MAX_BLOCK_SIZE)^,
+           MAX_BLOCK_SIZE);
+
+  move(data^, _buffer^, size);
+  _previous_bytes_count := size;
+end;
+
+const
+  SEARCH_DEPTH: array[0..MAX_COMPRESSION_LEVEL] of Integer = (0, 1, 8, 16, 32, 48, 64, 32*1024);
+  SEARCH_MATCH_DIVIDER: array[0..MAX_COMPRESSION_LEVEL] of Integer = (1, 4, 4, 4, 4, 4, 2, 1);
+  HASH_BITS = 18;
+
+{
+  Generate 16bit hash from first 3 bytes of a given pointer
+}
+function hash3(const x: pbyte): integer; inline;
+begin
+  result := ((x+2)^ shl 10) xor ((x+1)^ shl 5) xor x^;
+end;
+
+
+{ TMatchSearcher }
+
+constructor TMatchSearcher.Create;
+begin
+  _sbuffer.Init();
+  _max_search_depth := SEARCH_DEPTH[0];
+  _max_search_match_length := MAX_DEFLATE_MATCH_LENGTH div SEARCH_MATCH_DIVIDER[0];
+
+  _links := getmem(2 * DICT_SIZE * sizeof(integer));
+  _last_seen_idx := getmem(1 shl HASH_BITS * sizeof(integer));  //must be equal to hash bits
+  Filldword(_last_seen_idx^, 1 shl HASH_BITS, $ffffffff );  //negative indices don't get searched, so use -1
+  _current_chunk_size := 0;
+  _bytes_processed := 0;
+end;
+
+destructor TMatchSearcher.Destroy;
+begin
+  freemem(_links);
+  freemem(_last_seen_idx);
+  _sbuffer.Done;
+  inherited;
+end;
+
+procedure TMatchSearcher.SetCompressionLevel(const level: integer);
+begin
+  Assert(level <= MAX_COMPRESSION_LEVEL, 'invalid compression level');
+  _max_search_depth := SEARCH_DEPTH[level];
+  _max_search_match_length := MAX_DEFLATE_MATCH_LENGTH div SEARCH_MATCH_DIVIDER[level];
+end;
+
+{
+  Take next data chunk and create links between the occurences of the same hash
+}
+procedure TMatchSearcher.NewData(const data: pbyte; const size: integer);
+var
+  i, key, last_seen: integer;
+begin
+  _sbuffer.InsertData(data, size);
+  _bytes_processed += _current_chunk_size;
+  _current_chunk_size := size;
+
+  move((_links + DICT_SIZE)^, _links^, DICT_SIZE * sizeof(integer));
+  for i := 0 to size - 1 do begin
+      key := hash3(data + i);
+      last_seen := _last_seen_idx[key];
+      _links[DICT_SIZE + i] := last_seen;
+      _last_seen_idx[key] := i + _bytes_processed;
+  end;
+end;
+
+{
+  Compare strings, return length of the match. Loop at the last byte of the window.
+}
+function compare_strings_loop(const window, string_data: pbyte;
+   const max_match_length, window_size: integer): integer;
+var
+  i, k: integer;
+begin
+  result := 0;
+  i := 0;
+  for k := 0 to max_match_length - 1 do begin
+      if window[i] = string_data[k] then
+          result += 1
+      else
+          exit;
+      i += 1;
+      if i = window_size then
+          i := 0;
+  end;
+end;
+
+{
+  Compare strings, return length of the match.
+  There must be at least max_match_length valid bytes in window.
+}
+function compare_strings(const window, string_data: pbyte;
+   const max_match_length: integer): integer;
+var
+  i: integer;
+begin
+  result := 0;
+  for i := 0 to max_match_length - 1 do
+      if window[i] = string_data[i] then
+          result += 1
+      else
+          exit;
+end;
+
+{
+  Compare last byte of the window against current string.
+}
+function compare_strings_rle(const string_data: pbyte; const byte_value, max_match_length: integer): integer;
+var
+  i: integer;
+begin
+  result := 0;
+  for i := 0 to max_match_length - 1 do
+      if byte_value = string_data[i] then
+          result += 1
+      else
+          exit;
+end;
+
+
+function InitSearchResult(const distance, best_match: longword): TSearchResult; inline;
+begin
+  longword(result) := longword( best_match << 16 or distance );
+end;
+
+function TMatchSearcher.Search(const window_end_ptr, str: pbyte;
+  const current_idx, max_match_length: integer): TSearchResult;
+var
+  i: integer;
+  links: pinteger;
+  best_match_distance: integer;
+  best_match_length: integer;
+  last_seen_idx: integer;
+  min_allowed_idx: integer;
+  previous_idx: integer;
+  length: integer;
+  distance: integer;
+begin
+  Assert(max_match_length >= 3);
+
+  //test if searched string is a repetition of the last byte before full search
+  best_match_length := compare_strings_rle(str, window_end_ptr[-1], max_match_length);
+  result := InitSearchResult(1, best_match_length);
+  if best_match_length >= _max_search_match_length then
+      exit;
+
+  last_seen_idx := current_idx - _bytes_processed;
+  links := _links + DICT_SIZE;
+  best_match_distance := 1;
+  min_allowed_idx := max(0, current_idx - DICT_SIZE);
+
+  //early termination if links of the next searched position are much closer than current ones
+  if links[last_seen_idx] < links[last_seen_idx + 1] - (DICT_SIZE shr 1) then
+      exit;
+
+  for i := _max_search_depth - 1 downto 0 do begin
+      //if the position falls out of the sliding window_end_ptr range, it's too old and cannot be searched
+      previous_idx := links[last_seen_idx];
+      if previous_idx < min_allowed_idx then begin
+          break;
+      end;
+      last_seen_idx := previous_idx - _bytes_processed;
+
+      //compare data at given positions
+      distance := current_idx - previous_idx;
+      if previous_idx + max_match_length < current_idx then
+          length := compare_strings(window_end_ptr - distance, str, max_match_length)
+      else
+          length := compare_strings_loop(window_end_ptr - distance, str, max_match_length, distance);
+
+      if length > best_match_length then begin
+          best_match_length := length;
+          best_match_distance := distance;
+          if length >= _max_search_match_length then
+              break;
+      end;
+  end;
+
+  Assert(best_match_distance >= 0);
+  result := InitSearchResult(best_match_distance, best_match_length);
+end;
+
+{
+  Find best match between current bytes and bytes already seen.
+  If distance = 0 & length = 0 - no occurences were found
+}
+function TMatchSearcher.FindMatch(const str: pbyte; const data_index: integer): TSearchResult;
+var
+  max_match_length: integer;
+  current_idx: integer;
+  window_end_ptr: pbyte;
+begin
+  result := InitSearchResult(0, 0);
+
+  //reduce maximum possible match length at the end of the stream
+  //we need at least 3 bytes to be able to run search (hash function takes 3 bytes as input)
+  max_match_length := min(MAX_DEFLATE_MATCH_LENGTH, _current_chunk_size - data_index);
+  if max_match_length <= 2 then
+      exit;
+
+  //beginning of a stream, nothing to search
+  if _bytes_processed + data_index = 0 then
+      exit;
+
+  //get proper search window and currently searched string's file index
+  window_end_ptr := _sbuffer.GetWindow + data_index;
+  current_idx := _bytes_processed + data_index;
+
+  result := Search(window_end_ptr, str, current_idx, max_match_length);
+end;
+
+
+{ huff_FillCanonDecodingTable
+
+  code_lengths - array of lengths, indexed by code
+  count - number of codes to fill
+}
+procedure huff_FillCanonDecodingTable(var tab: TDecodeTable; const code_lengths: pbyte; const count: integer);
+var
+  len: integer; //current length; all deflate code lengths are between 1 and 15
+  same_length_count: integer;
+  i, j: integer;
+begin
+  j := 0;
+  tab.codes_of_legth[0] := 0;
+  for len := 1 to 15 do begin
+      same_length_count := 0;
+
+      for i := 0 to count - 1 do begin
+          if code_lengths[i] = len then begin
+              tab.code_value[j] := i;
+              j += 1;
+              same_length_count += 1;
+          end;
+      end;
+
+      tab.codes_of_legth[len] := same_length_count;
+  end;
+end;
+
+
+
+(*
+huff_code2canon
+zmen huffkody na kanonicke huffkody
+*)
+procedure huff_code2canon(const codes: vlc_code_p);
+var
+  len: integer;
+  b, i: integer;
+begin
+  b := 0;
+  for len := 1 to 15 do begin
+      for i := 0 to END_OF_STREAM do begin
+          if codes[i].code_len = len then begin
+              codes[i].bits := b;
+              b += 1;
+          end;
+      end;
+      b := b shl 1;
+  end;
+end;
+
+
+
+(*******************************************************************************
+generovanie stromu
+*)
+//huff_init
+procedure huff_init (out h: huff_tree_t);
+var
+  i: integer;
+begin
+  i := sizeof(longword) * (END_OF_STREAM + 1);
+  h.counts := getmem(i);
+  FillByte(h.counts^, i, 0);
+
+  i := sizeof(tree_node_t) * (END_OF_STREAM + 1) * 2;
+  h.nodes  := getmem(i);
+  FillByte(h.nodes^, i, 0);
+
+  i := sizeof(vlc_code_t) * (END_OF_STREAM + 1);
+  h.codes  := getmem(i);
+  FillByte(h.codes^, i, 0);
+
+  h.root_node := 0;
+end;
+
+
+
+//huff_free
+procedure huff_free (h: huff_tree_t);
+begin
+  freemem( h.counts );
+  freemem( h.nodes  );
+  freemem( h.codes  );
+end;
+
+
+
+procedure huff_raise_count(var h: huff_tree_t; const val: word);
+begin
+  h.counts[val] += 1;
+end;
+
+
+(*******************************************************************************
+scale_counts
+*)
+procedure scale_counts(var params: huff_tree_t; const max_cnt: word);
+var
+  counts: plongword;
+  b: integer;
+  max: longword;
+  new: longword;
+  ratio: single;
+begin
+  counts := params.counts;
+  max := 0;
+  for b := 0 to END_OF_STREAM do
+      if counts[b] > max then max := counts[b];
+
+  if max <= max_cnt then exit;
+
+  ratio := single( max ) / max_cnt;
+  for b := 0 to END_OF_STREAM do
+      if counts[b] > 0 then begin
+          new := round( counts[b] / ratio );
+          if new = 0 then
+              counts[b] := 1
+          else
+              counts[b] := new;
+      end;
+end;
+
+
+
+(*******************************************************************************
+build_tree
+zostavenie huffmanovho stromu, nastavenie indexu korenoveho uzla stromu
+*)
+procedure build_tree (var params: huff_tree_t);
+var
+  nodes:  tree_node_p;
+  counts: plongword;
+  next_free: integer;
+  i: integer;
+  min_1,
+  min_2: integer;
+begin
+  nodes  := params.nodes;
+  counts := params.counts;
+  for i := 0 to END_OF_STREAM do nodes[i].weight := counts[i];
+  nodes[TOP_NODE].weight := High(word);
+  next_free := END_OF_STREAM;
+
+  while true do begin
+      next_free := next_free + 1;
+      min_1 := TOP_NODE;
+      min_2 := TOP_NODE;
+
+      for i := 0 to next_free - 1 do
+          if nodes[i].weight > 0 then begin
+              if nodes[i].weight < nodes[min_1].weight then begin
+                  min_2 := min_1 ;
+                  min_1 := i ;
+              end else
+                  if nodes[i].weight < nodes[min_2].weight then min_2 := i;
+          end;
+
+      if min_2 = TOP_NODE then break;
+      nodes[next_free].weight := nodes[min_1].weight + nodes[min_2].weight;
+      nodes[next_free].child_0 := min_1;
+      nodes[next_free].child_1 := min_2;
+      nodes[min_1].weight := 0;
+      nodes[min_2].weight := 0;
+  end;
+
+  params.root_node := next_free - 1;
+end;
+
+
+
+(*******************************************************************************
+tree_to_code
+z korenoveho uzlu rekurzivne postupuj cez jednotlive listy
+a zapis cestu ako VLC kod + pocet jeho bitov do tabulky
+*)
+procedure tree_to_code (var p: huff_tree_t; code_len_current, node: integer);
+begin
+  if node <= END_OF_STREAM then
+      p.codes[node].code_len := code_len_current
+  else begin
+      code_len_current := code_len_current + 1;
+      tree_to_code (p, code_len_current, p.nodes[node].child_0 );
+      tree_to_code (p, code_len_current, p.nodes[node].child_1 );
+  end;
+end;
+
+
+
+(*******************************************************************************
+huff_build_tree
+zostav huffmanov strom, vrat index korenoveho uzla stromu
+*)
+procedure huff_build_tree (var h: huff_tree_t; max_cnt: word = 255);
+begin
+  scale_counts (h, max_cnt);
+  build_tree (h);
+  tree_to_code (h, 0, h.root_node);
+  huff_code2canon (h.codes);
+end;
+
+//special case, when there is only one symbol in alphabet - can happen with distance trees
+procedure huff_build_distance_tree (var h: huff_tree_t; max_cnt: word = 255);
+const
+  MAX_DIST_CODES = 32;
+var
+  i: integer;
+  last_nonzero_idx: integer;
+  used_symbols_count: integer;
+begin
+  used_symbols_count := 0;
+  last_nonzero_idx := 0;
+  for i := 0 to MAX_DIST_CODES - 1 do
+      if h.counts[i] > 0 then begin
+          used_symbols_count += 1;
+          if used_symbols_count > 1 then
+              break;
+          last_nonzero_idx := i;
+      end;
+
+  if used_symbols_count > 1 then begin
+      huff_build_tree(h, max_cnt);
+  end else begin
+      h.codes[last_nonzero_idx].code_len := 1;
+      h.codes[last_nonzero_idx].bits := 0;
+      h.root_node := last_nonzero_idx;
+  end
+end;
+
+
+{ Length2code
+  Map match length value to length code for huff encoding.
+}
+function Length2code (const len: longword): longword;
+const
+  table: array[byte] of byte = (
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14,
+  14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
+  18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
+  20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22,
+  22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23,
+  23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+  25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26,
+  26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+  26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+  27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+  27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29
+  );
+begin
+  Assert(len >= 3);
+  result := 256 + table[len-3];  //0..255 = literals, 256 = block end
+end;
+
+
+{ Code2length
+  Map decoded length code to length value.
+}
+function Code2length(const code: longword): longword; inline;
+const
+  table: array[0..28] of byte = (
+  3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67,
+  83, 99, 115, 131, 163, 195, 227, 0
+  );
+begin
+  result := table[ code - 257 ];
+  if result = 0 then result := 258;
+end;
+
+
+{ Distance2code
+  Map distance value to distance code for huff encoding.
+}
+function Distance2code(const dist: longword): longword;
+const
+  table_512: array [0..511] of byte = (
+  0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9,
+  9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17,
+  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+  17, 17, 17, 17
+  );
+  table_128: array[2..127] of byte = (
+  18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24,
+  24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+  26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+  27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29,
+  29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+  29, 29, 29, 29, 29, 29
+  );
+begin
+  if dist <= 512 then
+      result := table_512[dist - 1]
+  else begin
+      result := table_128[(dist - 1) shr 8];
+  end;
+end;
+
+
+{ Code2distance
+  Map decoded distance code to distance value.
+}
+function Code2distance(const code: longword): longword; inline;
+const
+  table: array[0..29] of word = (
+  1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
+  1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
+  );
+begin
+  result := table[ code ];
+end;
+
+{ TVlcWriter }
+
+procedure TVlcWriter.SetTrees(const bitstream: TBitstreamWriter;
+  const length_tree, distance_tree: vlc_code_p);
+begin
+  bs := bitstream;
+  len_tree := length_tree;
+  dist_tree := distance_tree;
+end;
+
+procedure TVlcWriter.WriteMatch(const len, dist: longword);
+var
+  code, bits: longword;
+begin
+  //length
+  code := Length2code(len);
+  bs.Write(len_tree[code].bits, len_tree[code].code_len);
+  if (code >= 265) and (code < 285) then begin  //extra bits
+      bits := 5 - (284 - code) div 4;
+      bs.Write(len - 3, bits);
+  end;
+
+  //offset / distance
+  code := Distance2code(dist);
+  bs.Write(dist_tree[code].bits, dist_tree[code].code_len);
+  if code >= 4 then begin
+      bits := code div 2 - 1;
+      bs.Write(dist - 1, bits);
+  end;
+end;
+
+procedure TVlcWriter.WriteLiteral(const c: byte);
+begin
+  bs.Write(len_tree[c].bits, len_tree[c].code_len);
+end;
+
+procedure TVlcWriter.WriteBlockEnd;
+begin
+  bs.Write(len_tree[END_OF_BLOCK_CODE].bits, len_tree[END_OF_BLOCK_CODE].code_len);
+end;
+
+{ vlc_ReadCode
+  Read one canonical huffman code using the given decoding table. Maximum symbol length cannot
+  exceed 15 bits (maximum allowed by Deflate), otherwise reading fails and bad things happen.
+}
+function vlc_ReadCode(const bs: TBitstreamReader; const table: TDecodeTable): integer;
+var
+  i, codes,
+  diff, value: longword;
+  value_low: longword;      //lowest value for code of given length
+  codes_skipped: longword;  //how many codes we already skipped
+  number_of_codes: pword;   //# codes of given length
+begin
+  i := 0;
+  value := 0;
+  codes_skipped := 0;
+  value_low := 0;
+  codes := 0;
+  number_of_codes := @table.codes_of_legth[0];
+  repeat
+      codes_skipped += codes;
+      value_low += codes;
+      value_low := value_low shl 1;
+
+      i += 1;
+      Assert(i < 16, 'could not read vlc code');
+      codes := number_of_codes[i];
+
+      value := (value shl 1) or bs.read();
+      diff := value - value_low;
+  until codes > diff;
+
+  result := table.code_value[ codes_skipped + diff ];
+end;
+
+
+{ vlc_ReadCode
+  Read one variable-length code using the given lookup table. If the code couldn't be read, try
+  to read with the canon huff decoding table.
+}
+function vlc_ReadCode(const bs: TBitstreamReader; const dectable: TDecodeLookupTables): integer;
+var
+  bits: integer;
+  sb: TSymbolBits;
+begin
+  bits := bs.Show(TAB0_BITS);
+  sb := dectable.codes_t0[bits];
+  result := sb.symbol;
+
+  if (sb.nbits = 0) then begin
+      result := vlc_ReadCode(bs, dectable.canon_table);
+  end else
+      bs.Skip(sb.nbits);
+end;
+
+
+{ InitDecodeLut
+  Assign canonical huff code bits to each code by its length and build a look-up table for fast
+  decoding. Uses separate code bits runs for each code length. Makes 2 passes over input data,
+  one pass could be removed if code length stats were provided beforehand, but it doesn't gain much.
+}
+function InitDecodeLut(const code_lengths: pbyte; const count: integer): TDecodeLookupTables;
+var
+  i, len, code_bits: integer;
+  value, k, b: integer;
+  sb: TSymbolBits;
+  num_lengths: array[0..15] of integer;  //# of codes of given length
+  length_bits: array[0..15] of integer;  //canonical bits for codes of given length
+begin
+  FillByte(num_lengths, sizeof(num_lengths), 0);
+  FillByte(length_bits, sizeof(length_bits), 0);
+  for i := 0 to count - 1 do begin
+      num_lengths[code_lengths[i]] += 1;
+  end;
+  b := 0;
+  for i := 1 to 15 do begin
+      length_bits[i] := b;
+      b += num_lengths[i];
+      b := b << 1;
+  end;
+
+  FillByte(result.codes_t0, sizeof(result.codes_t0), 0);
+  for i := 0 to count - 1 do begin
+      len := code_lengths[i];
+      if not (len in [1..TAB0_BITS]) then
+          continue;
+
+      code_bits := length_bits[len];
+      length_bits[len] += 1;
+      sb.symbol := i;
+      sb.nbits := len;
+
+      //insert each code length + junk code_bits combination
+      code_bits := SwapBits(code_bits, len);
+      for k := 0 to 1 << (TAB0_BITS - len) - 1 do begin
+          value := (k << len) or code_bits;
+          result.codes_t0[value] := sb;
+      end;
+  end;
+end;
+
+{ TVlcReader }
+
+procedure TVlcReader.SetTables(const bitreader: TBitstreamReader;
+  const literal_table, distance_table: TDecodeLookupTables);
+begin
+  bs := bitreader;
+  literal_dectable := literal_table;
+  distance_dectable := distance_table;
+end;
+
+procedure TVlcReader.ReadCodePair(out length, distance: word);
+const
+  LITERAL_EXTRA_BITS: array[257..285] of byte = (
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
+  );
+var
+  code, extra_bits: longword;
+begin
+  length := 1;
+  code := vlc_ReadCode(bs, literal_dectable);
+
+  //decode literals, length / distance, end of block
+  if code < 256 then begin
+      distance := code;
+  end
+  else if code > 256 then begin
+      length := Code2length(code);
+      extra_bits := LITERAL_EXTRA_BITS[code];
+      if extra_bits > 0 then begin
+          length += bs.Read(extra_bits);
+      end;
+
+      code := vlc_ReadCode(bs, distance_dectable);
+      distance := Code2distance(code);
+      if code >= 4 then begin
+          distance += bs.Read(code >> 1 - 1);
+      end;
+  end
+  else begin
+      length := END_OF_BLOCK;
+      distance := 0;
+  end;
+end;
+
+
+const
+  //code ordering for header code length alphabet
+  //see RFC1951 section 3.2.7. Compression with dynamic Huffman codes (BTYPE=10)
+  HeaderCodeLengthOrder: array[0..18] of byte = (
+      16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+  );
+
+  LITERAL_MATCH_ELEMENTS = END_OF_STREAM + 1;
+  DISTANCE_ELEMENTS = 30;
+
+type
+  TRleResult = record
+      size: integer;
+      nonzero: integer;
+      rl_pairs: array[0..LITERAL_MATCH_ELEMENTS-1] of record
+          code_len, repeats: byte;
+      end;
+      code_lengths: array[0..LITERAL_MATCH_ELEMENTS-1] of byte;
+  end;
+
+{
+  Build length-limited huffman code tree.
+  Length is limited by reducing the code occurence's statistics.
+  Less accuracy means that the differences between code lengths are reduced, too.
+  This is somewhat suboptimal.
+
+  Distance trees are special, because there are cases where they contain only one used symbol.
+}
+procedure build_limited_tree(var tree: huff_tree_t; limit, size: word; const for_distance: boolean = false);
+var
+  i: integer;
+  tree_ok: boolean;
+  freq_limit: integer;
+begin
+  freq_limit := 256*2;
+  repeat
+      if for_distance then
+          huff_build_distance_tree(tree, freq_limit)
+      else
+          huff_build_tree(tree, freq_limit);
+      tree_ok := true;
+      for i := 0 to size - 1 do
+          if tree.codes[i].code_len > limit then begin
+              tree_ok := false;
+              freq_limit := freq_limit shr 1;
+              break;
+          end;
+  until tree_ok;
+  //reverse bits for faster bitwriting
+  for i := 0 to size - 1 do begin
+      tree.codes[i].bits := SwapBits(tree.codes[i].bits, tree.codes[i].code_len);
+  end;
+end;
+
+{
+  WriteCodeLengths
+
+  Write lengths of tree's codes using, encoded by header_codes
+}
+procedure WriteCodeLengths
+  (const bs: TBitstreamWriter; const header_codes, tree_codes: vlc_code_p; const size: integer);
+var
+  i, k: integer;
+  bits, length: longword;
+begin
+  for i := 0 to size - 1 do begin
+      k := tree_codes[i].code_len;
+      bits   := header_codes[k].bits;
+      length := header_codes[k].code_len;
+      bs.Write(bits, length);
+  end;
+end;
+
+
+procedure WriteCodeLengthsRle
+  (const bs: TBitstreamWriter; const header_codes: vlc_code_p; const rle_res: TRleResult);
+var
+  i, k: integer;
+  bits, length: longword;
+  size: integer;
+begin
+  size := rle_res.size;
+  i := 0;
+  while i < size do begin
+      k := rle_res.rl_pairs[i].code_len;
+      bits   := header_codes[k].bits;
+      length := header_codes[k].code_len;
+      bs.Write(bits, length);
+
+      if k = 16 then begin //Copy the previous code length, the next 2 bits indicate repeat length
+          bs.Write(rle_res.rl_pairs[i].repeats, 2);
+      end
+      else if k = 17 then begin  //Repeat a code length of 0, 3 bits of length
+          bs.Write(rle_res.rl_pairs[i].repeats, 3);
+      end
+      else if k = 18 then begin  //Repeat a code length of 0, 7 bits of length
+          bs.Write(rle_res.rl_pairs[i].repeats, 7);
+      end;
+      i += 1;
+  end;
+end;
+
+
+{
+  GetHclen
+
+  Reduce the number of header tree code lengths that need to be stored.
+  Saves a couple of bits per block.
+}
+function GetHclen(const min_length, max_length: integer): integer;
+var
+  i, length: integer;
+begin
+  result := 19;
+  for i := 18 downto 5 do begin
+      length := HeaderCodeLengthOrder[i];
+      if (length >= min_length) and (length <= max_length) then
+          break
+      else
+          result -= 1;
+  end;
+  result -= 4;
+end;
+
+
+function RleCodeBuffer(const src: array of byte; const size: integer): TRleResult;
+var
+  i, k, m: integer;
+  value, next_value: byte;
+  max_lookahead: integer;
+  run_length: integer;
+  nonzero: integer;
+
+  procedure AddRl(const code_len, repeats: byte);
+  begin
+    result.rl_pairs[m].code_len := code_len;
+    result.rl_pairs[m].repeats := repeats;
+    m += 1;
+  end;
+
+begin
+  nonzero := 0;
+  i := 0;
+  m := 0;
+  while i < size do begin
+      value := src[i];
+      //zero runs
+      if value = 0 then begin
+          max_lookahead := 137;
+          if size - i < max_lookahead then
+              max_lookahead := size - i;
+
+          run_length := 1;
+          for k := i + 1 to i + max_lookahead - 1 do begin
+              next_value := src[k];
+              if value = next_value then
+                  run_length += 1
+              else
+                  break;
+          end;
+          if run_length < 3 then begin
+              AddRl(0, 0);
+              i += 1;
+          end else begin
+              i += run_length;
+              if run_length < 11 then
+                  AddRl(17, run_length - 3) //Repeat a code length of 0 for 3 - 10 times.
+              else
+                  AddRl(18, run_length - 11); //Repeat a code length of 0 for 11 - 138 times.
+          end;
+      end
+      //nonzero runs
+      else begin
+          max_lookahead := 6;
+          if size - i <= max_lookahead then
+              max_lookahead := size - i - 1;
+
+          run_length := 0;
+          for k := i + 1 to i + max_lookahead do begin
+              next_value := src[k];
+              if value = next_value then
+                  run_length += 1
+              else
+                  break;
+          end;
+          if run_length >= 3 then begin
+              AddRl(value, 0);
+              AddRl(16, run_length - 3); //Copy the previous code length 3 - 6 times.
+              i += run_length + 1;
+              nonzero += run_length + 1;
+          end else begin
+              AddRl(value, 0);
+              i += 1;
+              nonzero += 1;
+          end;
+      end;
+  end;
+
+  result.size := m;
+  result.nonzero := nonzero;
+
+  for i := 0 to result.size - 1 do
+      result.code_lengths[i] := result.rl_pairs[i].code_len;
+end;
+
+
+{ TBlockWriter }
+
+{
+  BeginBlock
+
+  Write block header.
+  header bytes:
+    BFINAL - 1 bit
+    BTYPE  - 2 bits
+}
+procedure TBlockWriter.BeginBlock;
+begin
+  bitWriter.Write(longword( _last ) and 1);
+  bitWriter.Write(longword( _block_type ), 2);
+end;
+
+{ BuildFixedHuffCodes
+  Create vlc trees for blocks compressed using fixed Huffman codes
+}
+procedure TBlockWriter.BuildFixedHuffCodes;
+var
+  i, bits: integer;
+
+  function vlc(const b, len: integer): vlc_code_t;
+  begin
+    result.bits := SwapBits(b, len);
+    result.code_len := len;
+    bits += 1;
+  end;
+
+begin
+  bits := 0;
+  for i := 256 to 279 do literal_codes[i] := vlc(bits, 7);
+  bits := bits << 1;
+  for i :=   0 to 143 do literal_codes[i] := vlc(bits, 8);
+  for i := 280 to 287 do literal_codes[i] := vlc(bits, 8);
+  bits := bits << 1;
+  for i := 144 to 255 do literal_codes[i] := vlc(bits, 9);
+  for i := 0 to 29 do distance_codes[i] := vlc(i, 5);
+end;
+
+{ BuildHuffCodes
+  Create vlc trees from accumulated statistics for literal/length and distance coding
+}
+procedure TBlockWriter.BuildHuffCodes;
+var
+  i: integer;
+  tree: huff_tree_t;
+begin
+  //generate literal/match codes
+  huff_init(tree);
+  for i := 0 to LITERAL_MATCH_ELEMENTS - 1 do
+      tree.counts[i] := literal_match_stats[i];
+  tree.counts[END_OF_BLOCK_CODE] := 1;
+
+  build_limited_tree(tree,  15, LITERAL_MATCH_ELEMENTS);
+  Move(tree.codes ^, literal_codes ^, LITERAL_MATCH_ELEMENTS * sizeof(vlc_code_t));
+  huff_free(tree);
+
+  //generate distance codes
+  huff_init(tree);
+  for i := 0 to DISTANCE_ELEMENTS - 1 do
+      tree.counts[i] := distance_stats[i];
+
+  build_limited_tree(tree, 15, DISTANCE_ELEMENTS, true);
+  Move(tree.codes ^, distance_codes ^, DISTANCE_ELEMENTS * sizeof(vlc_code_t));
+  huff_free(tree);
+end;
+
+{
+  WriteCodingTrees
+
+  Store literal/length and distance trees.
+  Canonical huff coding is used, so code lengths are enough to store the trees.
+}
+procedure TBlockWriter.WriteCodingTrees;
+var
+  max_used_codelength, min_used_codelength: integer;
+  block_header_tree: huff_tree_t;
+
+  procedure UpdateCodeLengthStats(const code_lengths: array of byte; const size: integer);
+  var
+    i: integer;
+    length: integer;
+  begin
+    i := 0;
+    while i < size do begin
+        length := code_lengths[i];
+        huff_raise_count(block_header_tree, length);
+        i += 1;
+        if (length < min_used_codelength) and (length > 0) then min_used_codelength := length;
+        if length > max_used_codelength then max_used_codelength := length;
+    end;
+  end;
+
+var
+  header_codes: vlc_code_p;
+  hclen, hlit, hdist: integer;
+  i: integer;
+
+  codelen_buffer: array[0..LITERAL_MATCH_ELEMENTS - 1] of byte;
+  distance_rle: TRleResult;
+  literal_rle: TRleResult;
+begin
+  //build header tree for coding the literal/length and distance tree code lengths
+  max_used_codelength := 0;
+  min_used_codelength := 15;
+  huff_init(block_header_tree);
+
+  for i := 0 to LITERAL_MATCH_ELEMENTS - 1 do
+      codelen_buffer[i] := literal_codes[i].code_len;
+  literal_rle := RleCodeBuffer(codelen_buffer, LITERAL_MATCH_ELEMENTS);
+  UpdateCodeLengthStats(literal_rle.code_lengths, literal_rle.size);
+
+  for i := 0 to DISTANCE_ELEMENTS - 1 do
+      codelen_buffer[i] := distance_codes[i].code_len;
+  distance_rle := RleCodeBuffer(codelen_buffer, DISTANCE_ELEMENTS);
+  UpdateCodeLengthStats(distance_rle.code_lengths, distance_rle.size);
+
+  build_limited_tree(block_header_tree, 7, 19);
+
+  //store the header tree code lengths
+  //when do we want to define a smaller hdist? if we used a smaller encoding window, thus limiting the distances?
+  hlit := 286 - 257;
+  hdist := 29;
+  hclen := GetHclen(min_used_codelength, max_used_codelength);
+  bitWriter.Write(hlit, 5);
+  bitWriter.Write(hdist, 5);
+  bitWriter.Write(hclen, 4);
+  header_codes := block_header_tree.codes;
+  for i := 0 to hclen + 4 - 1 do
+      bitWriter.Write( header_codes[ HeaderCodeLengthOrder[i] ].code_len, 3 );
+
+  //store codes of literal/length and distance trees
+  WriteCodeLengthsRle(bitWriter, header_codes, literal_rle);
+  WriteCodeLengthsRle(bitWriter, header_codes, distance_rle);
+
+  huff_free(block_header_tree);
+end;
+
+{ WriteBlockEncoded
+  Write a complete block into bitstream: literals and match length / distance pairs, END_OF_BLOCK symbol.
+  Handles distinction between blocks compressed using fixed or dynamic huff codes
+}
+procedure TBlockWriter.WriteBlockEncoded(const search_results: PLiteralMatch; const size: integer);
+var
+  i: integer;
+  lm: TLiteralMatch;
+  vlc: TVlcWriter;
+begin
+  if _block_type = BTDynamic then begin
+      BuildHuffCodes;
+      WriteCodingTrees();
+  end else
+      BuildFixedHuffCodes();
+  vlc.SetTrees(bitWriter, literal_codes, distance_codes);
+
+  for i := 0 to size - 1 do begin
+      lm := search_results[i];
+      if lm.match_length > 0 then
+          vlc.WriteMatch(lm.match_length, lm.offset)
+      else
+          vlc.WriteLiteral(lm.literal);
+  end;
+
+  vlc.WriteBlockEnd();
+end;
+
+{
+  WriteBlockRaw
+
+  Write a raw block into bitstream: copy input values.
+  Raw block header:
+    n bits  - byte alignment
+    16 bits - data length
+    16 bits - inverted data length
+}
+procedure TBlockWriter.WriteBlockRaw(const rawdata: pbyte; const rawsize: integer);
+var
+  i: integer;
+begin
+  bitWriter.ByteAlign;
+  bitWriter.Write(rawsize, 16);
+  bitWriter.Write(rawsize xor $ffff, 16);
+
+  for i := 0 to rawsize - 1 do
+      bitWriter.Write(rawdata[i], 8);  //todo: memcpy
+end;
+
+constructor TBlockWriter.Create(const output_buffer: pbyte);
+begin
+  bitWriter := TBitstreamWriter.Create(output_buffer);
+  bs_cache := bitWriter.GetState;
+
+  literal_match_stats := GetMem(LITERAL_MATCH_ELEMENTS * sizeof(integer));
+  distance_stats := Getmem(DISTANCE_ELEMENTS * sizeof(integer));
+
+  literal_codes := GetMem(LITERAL_MATCH_ELEMENTS * sizeof(vlc_code_t));
+  distance_codes := GetMem(DISTANCE_ELEMENTS * sizeof(vlc_code_t));
+end;
+
+destructor TBlockWriter.Destroy;
+begin
+  inherited Destroy;
+  bitWriter.Free;
+  Freemem(literal_match_stats);
+  Freemem(distance_stats);
+  Freemem(literal_codes);
+  Freemem(distance_codes);
+end;
+
+procedure TBlockWriter.InitNewBlock(const block_type: TBlockTypeEnum);
+begin
+  FillDWord(literal_match_stats^, LITERAL_MATCH_ELEMENTS, 0);
+  FillDWord(distance_stats^, DISTANCE_ELEMENTS, 0);
+
+  bitWriter.SetState(bs_cache);
+  _block_type := block_type;
+  _last := false;
+end;
+
+procedure TBlockWriter.SetLast;
+begin
+  _last := true;
+end;
+
+procedure TBlockWriter.UpdateStatsMatch(const len, dist: longword);
+begin
+  literal_match_stats[ Length2code(len) ] += 1;
+  distance_stats[ Distance2code(dist) ] += 1;
+end;
+
+procedure TBlockWriter.UpdateStatsLiteral(const literal: byte);
+begin
+  literal_match_stats[ literal ] += 1;
+end;
+
+procedure TBlockWriter.WriteBlock(const rawdata: pbyte; const rawsize: integer;
+  const search_results: PLiteralMatch; const size: integer; const keep_buffer: boolean);
+begin
+  if not keep_buffer then
+      bitWriter.ResetBufferPosition;
+  BeginBlock();
+  if _block_type <> BTRaw then
+      WriteBlockEncoded(search_results, size)
+  else
+      WriteBlockRaw(rawdata, rawsize);
+end;
+
+procedure TBlockWriter.Done;
+begin
+  //throw away bs cache
+  bs_cache := bitWriter.GetState;
+end;
+
+{
+  GetStreamSize
+
+  Returns number of whole bytes that were written into bitstream for current block.
+  The last written bit doesn't have to be at a byte aligned position,
+  so we need to cache the write buffer and mask to put the bits in the next processed block.
+  If the current block is the last one processed, the outstanding bits must be counted into the stream size
+  (they would be lost otherwise).
+}
+function TBlockWriter.GetStreamSize: integer;
+begin
+  if not _last then begin
+      result := bitWriter.GetUnbufferedByteSize;
+  end else begin
+      bitWriter.Close;
+      result := bitWriter.GetByteSize;
+  end;
+end;
+
+
+{ TBlockReader }
+
+{ ReadHeaderCodes
+  Read code lengths and generate tables for dynamic block decoding
+}
+procedure TBlockReader.ReadHeaderCodes(const bs: TBitstreamReader);
+const
+  MAX_CODE_LENGTHS = 286 + 32; //# of Literal/Length codes + # of Distance codes
+var
+  literal_dectable,                       //literal/length decoding table
+  distance_dectable: TDecodeLookupTables; //distance decoding table
+  hlit, hdist, hclen: word;
+  len, last_len: integer;  //code length, previous code length
+  i, k, extra_bits: longword;
+  code_lengths: array[0..MAX_CODE_LENGTHS-1] of byte;
+  dt: TDecodeLookupTables;
+begin
+  hlit  := bs.Read(5) + 257;
+  hdist := bs.Read(5) + 1;
+  hclen := bs.Read(4) + 4;
+
+  //get code_len codes
+  FillByte(code_lengths, 19, 0);
+  for i := 0 to hclen - 1 do begin
+      k := HeaderCodeLengthOrder[i];
+      code_lengths[k] := bs.Read(3);
+  end;
+  dt := InitDecodeLut(code_lengths, 19);
+
+  //decode symbols
+  FillByte(code_lengths, MAX_CODE_LENGTHS, 0);
+  i := 0;
+  last_len := 16;
+  while i < hlit + hdist do begin
+      len := vlc_ReadCode(bs, dt);
+
+      if len < 16 then begin
+           code_lengths[i] := len;
+           i += 1;
+           last_len := len;
+      end
+      else
+          case len of
+              16: begin  //rep previous length
+                  Assert(last_len <> 16, 'dynamic block header error');
+                  extra_bits := bs.Read(2);
+                  k := i;
+                  i += extra_bits + 3;
+                  for k := k to i - 1 do
+                      code_lengths[k] := last_len;
+                  end;
+              17: begin  //rep zero length
+                  extra_bits := bs.Read(3);
+                  i += extra_bits + 3;
+                  end;
+              18: begin  //rep zero length
+                  extra_bits := bs.Read(7);
+                  i += extra_bits + 11;
+                  end
+          end;
+  end;
+
+  literal_dectable  := InitDecodeLut(pbyte(@code_lengths), hlit);
+  huff_FillCanonDecodingTable(literal_dectable.canon_table, pbyte(@code_lengths), hlit);
+
+  distance_dectable := InitDecodeLut(pbyte(@code_lengths) + hlit, hdist);
+  huff_FillCanonDecodingTable(distance_dectable.canon_table, pbyte(@code_lengths) + hlit, hdist);
+
+  _vlc.SetTables(bs, literal_dectable, distance_dectable);
+end;
+
+procedure TBlockReader.InitFixedCodes(const bs: TBitstreamReader);
+var
+  literal_dectable,                       //literal/length decoding table
+  distance_dectable: TDecodeLookupTables; //distance decoding table
+  code_lengths: array[0..287] of byte;
+  i: integer;
+begin
+  for i := 256 to 279 do code_lengths[i] := 7;
+  for i :=   0 to 143 do code_lengths[i] := 8;
+  for i := 280 to 287 do code_lengths[i] := 8;
+  for i := 144 to 255 do code_lengths[i] := 9;
+  literal_dectable := InitDecodeLut(pbyte(@code_lengths), 288);
+  huff_FillCanonDecodingTable(literal_dectable.canon_table, pbyte(@code_lengths), 288);
+
+  for i := 0 to 31 do code_lengths[i] := 5;
+  distance_dectable := InitDecodeLut(pbyte(@code_lengths), 32);
+  huff_FillCanonDecodingTable(distance_dectable.canon_table, pbyte(@code_lengths), 32);
+
+  _vlc.SetTables(bs, literal_dectable, distance_dectable);
+end;
+
+constructor TBlockReader.Create;
+begin
+  _vlc := TVlcReader.Create;
+end;
+
+destructor TBlockReader.Destroy;
+begin
+  _vlc.Free;
+  inherited;
+end;
+
+{ ReadBlockHeader
+  Reads block header including length trees for codes
+}
+function TBlockReader.ReadBlockHeader(const bs: TBitstreamReader): TBlockContext;
+var
+  block: TBlockContext;
+  t: integer;
+begin
+  block.last  := bs.Read() = 1;
+  block.btype := TBlockTypeEnum( bs.Read(2) );
+  _block_type := block.btype;
+
+  case block.btype of
+      BTRaw: begin
+          while not bs.IsByteAligned() do
+              bs.Read();
+          block.size := bs.Read(16);
+          t := not integer(bs.Read(16));
+          Assert(block.size = t, 'blk size mismatch');
+      end;
+      BTDynamic: begin
+          ReadHeaderCodes(bs);
+      end;
+      BTFixed: begin
+          InitFixedCodes(bs);
+      end;
+  end;
+
+  Result := block;
+end;
+
+
+function TBlockReader.GetVlcReader(): TVlcReader;
+begin
+  result := _vlc;
+end;
+
+end.
+
diff --git a/model_viewer/util/png_writer.pas b/model_viewer/util/png_writer.pas
new file mode 100644
index 0000000..945a1f5
--- /dev/null
+++ b/model_viewer/util/png_writer.pas
@@ -0,0 +1,192 @@
+unit png_writer;
+{$mode objfpc}{$H+}
+
+interface
+
+uses
+  sysutils, classes,
+  crc32fast, prediction, dc2_simple_api;
+
+function ImageToPngStream(const p: pbyte; const w, h: integer): TMemoryStream;
+procedure png_write(const fname: string; const p: pbyte; const w, h, bitdepth: integer);
+
+implementation
+
+const
+  CompressionLevel = 2;
+
+var
+  //le global hack
+  g_SamplesPerPixel: byte;  //use 3 for rgb, 1 for grayscale
+
+function Adler32(const checksum: longword; const data: pbyte; const size: longword): longword;
+const
+  BASE = 65521;
+  BLOCK_SIZE = 4*1024;
+var
+  i: integer;
+  s1, s2: longword;
+  k, blocks: integer;
+  p: pbyte;
+
+begin
+  s1 := checksum and $ffff;
+  s2 := (checksum shr 16) and $ffff;
+  p := data;
+
+  //process stream in blocks
+  blocks := size div BLOCK_SIZE;
+  for k := 0 to blocks - 1 do begin
+      for i := 0 to BLOCK_SIZE - 1 do begin
+          s1 += p[i];
+          s2 += s1;
+      end;
+      s1 := s1 mod BASE;
+      s2 := s2 mod BASE;
+      p += BLOCK_SIZE;
+  end;
+
+  //final bytes
+  for i := 0 to (size mod BLOCK_SIZE) - 1 do begin
+      s1 += p[i];
+      s2 += s1;
+  end;
+  s1 := s1 mod BASE;
+  s2 := s2 mod BASE;
+
+  result := s2 shl 16 + s1;
+end;
+
+
+procedure StreamWriteDataCRC(var f: TMemoryStream; const p: pbyte; const size: longword);
+var
+  checksum: longword;
+begin
+  checksum := crc32(0, nil, 0);
+  checksum := NtoBE( crc32(checksum, p, size) );
+  f.Write(checksum, 4);  //chunk crc
+end;
+
+
+procedure WriteHeader(var f: TMemoryStream; const width, height: integer);
+const
+  CHUNK_IHDR: array[0..3] of char = ('I','H','D','R');
+  HEADER_DATA_SIZE: longword = 13;
+var
+  chunk_start_position: integer;
+  chunk_data: pbyte;
+  chunk_size: integer;
+begin
+  f.WriteDWord(NtoBE(HEADER_DATA_SIZE));
+  chunk_start_position := f.Position;
+  f.WriteBuffer(CHUNK_IHDR, 4);  //id
+  f.WriteDWord(longword(NtoBE(width)));    //width
+  f.WriteDWord(longword(NtoBE(height)));   //height
+  f.WriteByte(8);                //bits per component
+  //color type: rgb, grayscale
+  if g_SamplesPerPixel = 3 then
+      f.WriteByte(2)
+  else
+      f.WriteByte(0);
+  f.WriteByte(0);                //Compression method, Filter method, Interlace method
+  f.WriteByte(0);
+  f.WriteByte(0);
+  chunk_data := pbyte(f.Memory) + chunk_start_position;
+  chunk_size := f.Position - chunk_start_position;
+  StreamWriteDataCRC(f, chunk_data, chunk_size);
+end;
+
+
+procedure WriteEnding(var f: TMemoryStream);
+const
+  CHUNK_IEND: array[0..3] of char = ('I','E','N','D');
+begin
+  f.WriteDWord(0);
+  f.Write(CHUNK_IEND, 4);
+  StreamWriteDataCRC(f, @CHUNK_IEND, 4);
+end;
+
+
+procedure CompressAndWriteData(var f: TMemoryStream; const pixels: pbyte; const width, height: integer);
+var
+  predictor: TPngPredict;
+  data: pbyte;
+  data_size: integer;
+  stride: integer;
+  checksum: longword;
+  encoder: TLzEncoder;
+begin
+  //pixel prediction & proper data layout
+  stride := width * g_SamplesPerPixel;
+  data := predictor.PredictData(pixels, width, height, g_SamplesPerPixel);
+  data_size := height * (stride + 1);
+
+  //checksum for raw data
+  checksum := 1;
+  checksum := Adler32(checksum, data, data_size);
+
+  //compression
+  encoder := TLzEncoder.Create(CompressionLevel);
+  encoder.EncodeBytesToStream(data, data_size, f);
+  encoder.Free;
+  freemem(data);
+
+  f.WriteDWord(NtoBE(checksum));  //adler32
+end;
+
+
+procedure WritePixelData(var f: TMemoryStream; const w, h: integer; const p: pbyte);
+const
+  CHUNK_IDAT: array[0..3] of char = ('I','D','A','T');
+var
+  chunk_start_position: integer;
+  chunk_data: pbyte;
+  chunk_size: integer;
+begin
+  f.WriteDWord(0);                //data size placeholder
+  chunk_start_position := f.Position;
+  f.WriteBuffer(CHUNK_IDAT, 4);   //id
+  f.WriteByte($78);               //zlib headers: compression method, flags
+  f.WriteByte($DA);               //clevel, fdict, parity
+  CompressAndWriteData(f, p, w, h);
+
+  chunk_data := pbyte(f.Memory) + chunk_start_position;
+  chunk_size := f.Position - chunk_start_position;
+  StreamWriteDataCRC(f, chunk_data, chunk_size);
+
+  //set proper chunk data size
+  f.Seek(chunk_start_position - 4, soBeginning);
+  chunk_size -= 4;
+  f.WriteDWord(longword(NtoBE(chunk_size)));
+  f.Seek(0, soEnd);
+end;
+
+
+function ImageToPngStream(const p: pbyte; const w, h: integer): TMemoryStream;
+const
+  HEAD_MAGIC: array[0..7] of byte = (137, 80, 78, 71, 13, 10, 26, 10);
+var
+  stream: TMemoryStream;
+begin
+  stream := TMemoryStream.Create;
+  stream.Write(HEAD_MAGIC, 8);
+  WriteHeader(stream, w, h);
+  WritePixelData(stream, w, h, p);
+  WriteEnding(stream);
+  result := stream;
+end;
+
+procedure png_write(const fname: string; const p: pbyte; const w, h,
+  bitdepth: integer);
+var
+  stream: TMemoryStream;
+begin
+  g_SamplesPerPixel := bitdepth div 8;
+  stream := ImageToPngStream(p, w, h);
+  stream.SaveToFile(fname);
+  stream.Free;
+end;
+
+
+end.
+
diff --git a/model_viewer/util/prediction.pas b/model_viewer/util/prediction.pas
new file mode 100644
index 0000000..f60ed57
--- /dev/null
+++ b/model_viewer/util/prediction.pas
@@ -0,0 +1,204 @@
+unit prediction;
+{$mode objfpc}
+
+interface
+
+type
+  { TPngPredict }
+  //why object? to cache row_data buffer in the future perhaps?
+  TPngPredict = object
+    public
+      function PredictData(const pixels: pbyte; const width, height, samples_per_pixel: integer): pbyte;
+  end;
+
+
+//**************************************************************************************************
+implementation
+
+function filter_sub(dst, src: pbyte; const stride: integer): integer;
+var
+  i: integer;
+  delta: integer;
+begin
+  result := 0;
+  dst += 3;
+  for i := 0 to stride - 4 do begin
+      delta := dst^ - src^;
+      dst^ := byte(delta);
+      result += abs(delta);
+      dst += 1;
+      src += 1;
+  end;
+end;
+
+function filter_up(dst, src_above: pbyte; const stride: integer): integer;
+var
+  i: integer;
+  delta: integer;
+begin
+  result := 0;
+  for i := 0 to stride - 1 do begin
+      delta := dst^ - src_above^;
+      dst^ := byte(delta);
+      result += abs(delta);
+      dst += 1;
+      src_above += 1;
+  end;
+end;
+
+function filter_average(dst, src, src_above: pbyte; const stride: integer): integer;
+var
+  i: integer;
+  delta: integer;
+begin
+  result := 0;
+  for i := 0 to 2 do begin
+      delta := dst[i] - src_above[i] shr 1;
+      dst[i] := byte(delta);
+      result += abs(delta);
+  end;
+  dst += 3;
+  src_above += 3;
+  for i := 0 to stride - 4 do begin
+      delta := integer(dst^) - integer(src_above^ + src^) shr 1;
+      dst^ := byte(delta);
+      result += abs(delta);
+      dst += 1;
+      src += 1;
+      src_above += 1;
+  end;
+end;
+
+//a,b,c = left, above, upper left
+function paeth_predictor (a, b, c: integer): integer; inline;
+var
+  p, pa, pb, pc: integer;
+begin
+  p := a + b - c;
+  pa := abs(p - a);
+  pb := abs(p - b);
+  pc := abs(p - c);
+  if (pa <= pb) and (pa <= pc) then
+      result := a
+  else
+      if (pb <= pc) then
+          result := b
+      else
+          result := c;
+end;
+
+//for the first pixel, take the difference against top only. then do paeth
+function filter_paeth(dst, src, src_above: pbyte; const stride: integer): integer;
+var
+  i: integer;
+  delta: integer;
+begin
+  result := 0;
+  for i := 0 to 2 do begin
+      delta := dst[i] - src_above[i];
+      dst[i] := byte(delta);
+      result += abs(delta);
+  end;
+  dst += 3;
+  src_above += 3;
+  for i := 0 to stride - 4 do begin
+      delta := dst^ - paeth_predictor(src^, src_above^, (src_above - 3)^);
+      dst^ := byte(delta);
+      result += abs(delta);
+      dst += 1;
+      src += 1;
+      src_above += 1;
+  end;
+end;
+
+function sum_line(p: pbyte; length: integer): integer;
+var
+  i: integer;
+begin
+  result := p[0];
+  for i := 1 to length - 1 do begin
+      if (p[i-1] <> p[i]) then
+          result += p[i];
+  end;
+end;
+
+
+function Predict(const pixels: pbyte; const width, height, samples_per_pixel: integer): pbyte;
+var
+  stride: integer;
+  best_score: integer;
+  best_prediction: integer;
+  row_buffer: array[0..4] of pbyte;
+  row_src: pbyte;
+
+  procedure TestPrediction(const pred_mode: integer);
+  var
+    row_score: integer;
+  begin
+    case pred_mode of
+         0: row_score := sum_line(row_buffer[0], stride);
+         //1: row_score := filter_sub(row_buffer[1], row_src, stride);
+         2: row_score := filter_up (row_buffer[2], row_src - stride, stride);
+         //3: row_score := filter_average(row_buffer[3], row_src, row_src - stride, stride);
+         //4: row_score := filter_paeth  (row_buffer[4], row_src, row_src - stride, stride);
+    else
+        row_score := MaxInt;
+    end;
+    if row_score < best_score then begin
+        best_prediction := pred_mode;
+        best_score := row_score;
+    end;
+  end;
+
+var
+  i, y, pred_mode: integer;
+  row_dst: pbyte;
+begin
+  stride := width * samples_per_pixel;
+  result := getmem(height * (stride + 1));
+  //buffer for filter = none is set to original data, so no allocation is needed
+  row_buffer[1] := getmem(stride * 4);
+  for i := 1 to 3 do
+      row_buffer[i + 1] := row_buffer[1] + stride * i;
+
+  for y := 0 to height - 1 do begin
+      //setup pointers and copy input data to processing buffers: TestPrediction is destructive
+      row_src := pixels + y * stride;
+      row_buffer[0] := row_src;
+      for i := 1 to 4 do
+          move(row_src^, row_buffer[i]^, stride);
+
+      //only filter = none, filter = sub can be used on the first pixel row: there's no other row to predict from
+      //Also use early termination if score gets below treshold to gain some speed.
+      best_score := MaxInt;
+      best_prediction := 0;
+      TestPrediction(1);
+      if y > 0 then begin
+          for pred_mode := 2 to 4 do begin
+              if best_score < stride then
+                  break;
+              TestPrediction(pred_mode);
+          end;
+      end;
+      if best_prediction = 1 then
+          TestPrediction(0);
+
+      //save best prediction mode and predicted data to output
+      row_dst := result + y * (1 + stride);
+      row_dst^ := best_prediction;
+      move(row_buffer[best_prediction]^, (row_dst + 1)^, stride);
+  end;
+
+  freemem(row_buffer[1]);
+end;
+
+{ TPngPredict }
+
+function TPngPredict.PredictData(const pixels: pbyte; const width, height,
+  samples_per_pixel: integer): pbyte;
+begin
+  result := Predict(pixels, width, height, samples_per_pixel);
+end;
+
+end.
+