From 27c61dcd5f6a7c03df075a27c48909f3df2139aa Mon Sep 17 00:00:00 2001
From: teachmain <teachmain@outlook.com>
Date: Tue, 18 Jun 2024 16:10:04 +0800
Subject: [PATCH 01/24] move a node

---
 {zeno/src/nodes/prim => projects/zenvdb}/VolumeBox.cpp | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {zeno/src/nodes/prim => projects/zenvdb}/VolumeBox.cpp (100%)
diff --git a/zeno/src/nodes/prim/VolumeBox.cpp b/projects/zenvdb/VolumeBox.cpp
similarity index 100%
rename from zeno/src/nodes/prim/VolumeBox.cpp
rename to projects/zenvdb/VolumeBox.cpp

From 9339dd70c35e4ea4849ccae0caae3bd3c0fc417b Mon Sep 17 00:00:00 2001
From: teachmain <teachmain@outlook.com>
Date: Tue, 18 Jun 2024 16:32:47 +0800
Subject: [PATCH 02/24] fix linux compile problem

---
 zenovis/xinxinoptix/optixPathTracer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 04354801e4..b33f92b89f 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -2132,7 +2132,7 @@ void updatePortalLights(const std::vector<Portal>& portals) {
     auto& pll = state.plights;
     auto& pls = pll.list;
     pls.clear();
-    pls.reserve(std::max(portals.size(), 0llu) );
+    pls.reserve(std::max(portals.size(), size_t(0)) );
 
     glm::mat4 rotation = glm::mat4(1.0f);
     rotation = glm::rotate(rotation, glm::radians(state.params.sky_rot_y), glm::vec3(0,1,0));

From c98b5862b7c37fcb9449e87a519c8eda465c5ac7 Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Wed, 19 Jun 2024 20:23:12 +0800
Subject: [PATCH 03/24] float4 -> float3

---
 zenovis/xinxinoptix/PTKernel.cu         | 20 ++++++++++----------
 zenovis/xinxinoptix/optixPathTracer.cpp | 24 ++++++++++++------------
 zenovis/xinxinoptix/optixPathTracer.h   | 10 +++++-----
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu
index e5ee736af3..edd4ee0247 100644
--- a/zenovis/xinxinoptix/PTKernel.cu
+++ b/zenovis/xinxinoptix/PTKernel.cu
@@ -357,11 +357,11 @@ extern "C" __global__ void __raygen__rg()
     if( subframe_index > 0 )
     {
         const float                 a = 1.0f / static_cast<float>( subframe_index+1 );
-        const float3 accum_color_prev = make_float3( params.accum_buffer[ image_index ]);
-        const float3 accum_color_prev_d = make_float3( params.accum_buffer_D[ image_index ]);
-        const float3 accum_color_prev_s = make_float3( params.accum_buffer_S[ image_index ]);
-        const float3 accum_color_prev_t = make_float3( params.accum_buffer_T[ image_index ]);
-        const float3 accum_color_prev_b = make_float3( params.accum_buffer_B[ image_index ]);
+        const float3 accum_color_prev   = params.accum_buffer[ image_index ];
+        const float3 accum_color_prev_d = params.accum_buffer_D[ image_index ];
+        const float3 accum_color_prev_s = params.accum_buffer_S[ image_index ];
+        const float3 accum_color_prev_t = params.accum_buffer_T[ image_index ];
+        const float3 accum_color_prev_b = params.accum_buffer_B[ image_index ];
         const float3 accum_mask_prev    = params.frame_buffer_M[ image_index ];
         accum_color   = mix( vec3(accum_color_prev), accum_color, a );
         accum_color_d = mix( vec3(accum_color_prev_d), accum_color_d, a );
@@ -380,11 +380,11 @@ extern "C" __global__ void __raygen__rg()
         }
     }
 
-    params.accum_buffer[ image_index ] = make_float4( accum_color.x, accum_color.y, accum_color.z, 1.0f);
-    params.accum_buffer_D[ image_index ] = make_float4( accum_color_d.x,accum_color_d.y,accum_color_d.z, 1.0f);
-    params.accum_buffer_S[ image_index ] = make_float4( accum_color_s.x,accum_color_s.y, accum_color_s.z, 1.0f);
-    params.accum_buffer_T[ image_index ] = make_float4( accum_color_t.x,accum_color_t.y,accum_color_t.z, 1.0f);
-    params.accum_buffer_B[ image_index ] = make_float4( accum_color_b, 1.0f);
+    params.accum_buffer[ image_index ]   = make_float3( accum_color.x, accum_color.y, accum_color.z);
+    params.accum_buffer_D[ image_index ] = make_float3( accum_color_d.x,accum_color_d.y,accum_color_d.z);
+    params.accum_buffer_S[ image_index ] = make_float3( accum_color_s.x,accum_color_s.y, accum_color_s.z);
+    params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z);
+    params.accum_buffer_B[ image_index ] = accum_color_b;
 
 
     vec3 rgb_mapped = PhysicalCamera(vec3(accum_color), aperture, shutter_speed, iso, midGray, false, false);
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index b33f92b89f..e462a536f3 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -568,9 +568,9 @@ static void initLaunchParams( PathTracerState& state )
 
     CUDA_CHECK( cudaMalloc(
                 reinterpret_cast<void**>( &state.accum_buffer_p.reset() ),
-                state.params.width * state.params.height * sizeof( float4 )
+                state.params.width * state.params.height * sizeof( float3 )
                 ) );
-    state.params.accum_buffer = (float4*)(CUdeviceptr)state.accum_buffer_p;
+    state.params.accum_buffer = (float3*)(CUdeviceptr)state.accum_buffer_p;
 
     auto& params = state.params;
 
@@ -628,25 +628,25 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
     // Realloc accumulation buffer
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_p .reset()),
-        params.width * params.height * sizeof( float4 )
+        params.width * params.height * sizeof( float3 )
             ) );
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_d .reset()),
-        params.width * params.height * sizeof( float4 )
+        params.width * params.height * sizeof( float3 )
             ) );
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_s .reset()),
-        params.width * params.height * sizeof( float4 )
+        params.width * params.height * sizeof( float3 )
             ) );
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_t .reset()),
-        params.width * params.height * sizeof( float4 )
+        params.width * params.height * sizeof( float3 )
             ) );
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_b .reset()),
-        params.width * params.height * sizeof( float4 )
+        params.width * params.height * sizeof( float3 )
             ) );
-    state.params.accum_buffer = (float4*)(CUdeviceptr)state.accum_buffer_p;
+    state.params.accum_buffer = (float3*)(CUdeviceptr)state.accum_buffer_p;
 
     CUDA_CHECK( cudaMallocManaged(
                 reinterpret_cast<void**>( &state.albedo_buffer_p.reset()),
@@ -660,10 +660,10 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
                 ) );
     state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p;
     
-    state.params.accum_buffer_D = (float4*)(CUdeviceptr)state.accum_buffer_d;
-    state.params.accum_buffer_S = (float4*)(CUdeviceptr)state.accum_buffer_s;
-    state.params.accum_buffer_T = (float4*)(CUdeviceptr)state.accum_buffer_t;
-    state.params.accum_buffer_B = (float4*)(CUdeviceptr)state.accum_buffer_b;
+    state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d;
+    state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s;
+    state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t;
+    state.params.accum_buffer_B = (float3*)(CUdeviceptr)state.accum_buffer_b;
     state.params.subframe_index = 0;
 }
 
diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h
index 7df84d652e..9e034d9f86 100644
--- a/zenovis/xinxinoptix/optixPathTracer.h
+++ b/zenovis/xinxinoptix/optixPathTracer.h
@@ -154,11 +154,11 @@ struct CameraInfo
 struct Params
 {
     unsigned int subframe_index;
-    float4*      accum_buffer;
-    float4*      accum_buffer_D;
-    float4*      accum_buffer_S;
-    float4*      accum_buffer_T;
-    float4*      accum_buffer_B;
+    float3*      accum_buffer;
+    float3*      accum_buffer_D;
+    float3*      accum_buffer_S;
+    float3*      accum_buffer_T;
+    float3*      accum_buffer_B;
     uchar4*      frame_buffer;
     float3*      frame_buffer_C;
     float3*      frame_buffer_D;

From e1f83f21b168728204e320fc7d8e729db6a8253a Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Thu, 20 Jun 2024 15:55:33 +0800
Subject: [PATCH 04/24] fix show background

---
 zenovis/xinxinoptix/optixPathTracer.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index e462a536f3..2d1fb17761 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -2123,6 +2123,7 @@ void using_hdr_sky(bool enable) {
 
 void show_background(bool enable) {
     state.params.show_background = enable;
+    state.params.subframe_index = 0;
 }
 
 void updatePortalLights(const std::vector<Portal>& portals) {

From bc3c08dbc87892d8b89f3db21b44441ce30e6842 Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Thu, 20 Jun 2024 16:17:43 +0800
Subject: [PATCH 05/24] direct use accum buffer

---
 zenovis/xinxinoptix/optixPathTracer.cpp | 46 ++++++++++++++-----------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 2d1fb17761..e630028244 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -3785,26 +3785,30 @@ void set_outside_random_number(int32_t outside_random_number) {
     state.params.outside_random_number = outside_random_number;
 }
 
-void *optixgetimg_extra(std::string name) {
+std::vector<float> optixgetimg_extra2(std::string name, int w, int h) {
+    std::vector<float> tex_data(w * h * 3);
     if (name == "diffuse") {
-        return output_buffer_diffuse->getHostPointer();
+        cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_d.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
     else if (name == "specular") {
-        return output_buffer_specular->getHostPointer();
+        cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_s.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
     else if (name == "transmit") {
-        return output_buffer_transmit->getHostPointer();
+        cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_t.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
     else if (name == "background") {
-        return output_buffer_background->getHostPointer();
+        cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_b.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
     else if (name == "mask") {
-        return output_buffer_mask->getHostPointer();
+        std::copy_n((float*) output_buffer_mask->getHostPointer(), tex_data.size(), tex_data.data());
     }
     else if (name == "color") {
-        return output_buffer_color->getHostPointer();
+        cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_p.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
-    throw std::runtime_error("invalid optixgetimg_extra name: " + name);
+    else {
+        throw std::runtime_error("invalid optixgetimg_extra name: " + name);
+    }
+    return tex_data;
 }
 static void save_exr(float3* ptr, int w, int h, std::string path) {
     std::vector<float3> data(w * h);
@@ -3880,7 +3884,7 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) {
         auto exr_path = path.substr(0, path.size() - 4) + ".exr";
         if (enable_output_mask) {
             path = path.substr(0, path.size() - 4);
-            save_png_data(path + "_mask.png", w, h,  (float*)optixgetimg_extra("mask"));
+            save_png_data(path + "_mask.png", w, h,  optixgetimg_extra2("mask", w, h).data());
         }
         // AOV
         if (enable_output_aov) {
@@ -3888,12 +3892,12 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) {
                 zeno::create_directories_when_write_file(exr_path);
                 SaveMultiLayerEXR(
                         {
-                                (float*)optixgetimg_extra("color"),
-                                (float*)optixgetimg_extra("diffuse"),
-                                (float*)optixgetimg_extra("specular"),
-                                (float*)optixgetimg_extra("transmit"),
-                                (float*)optixgetimg_extra("background"),
-                                (float*)optixgetimg_extra("mask"),
+                                optixgetimg_extra2("color", w, h).data(),
+                                optixgetimg_extra2("diffuse", w, h).data(),
+                                optixgetimg_extra2("specular", w, h).data(),
+                                optixgetimg_extra2("transmit", w, h).data(),
+                                optixgetimg_extra2("background", w, h).data(),
+                                optixgetimg_extra2("mask", w, h).data(),
                         },
                         w,
                         h,
@@ -3911,17 +3915,17 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) {
             }
             else {
                 path = path.substr(0, path.size() - 4);
-                save_png_color(path + ".aov.diffuse.png", w, h,  (float*)optixgetimg_extra("diffuse"));
-                save_png_color(path + ".aov.specular.png", w, h,  (float*)optixgetimg_extra("specular"));
-                save_png_color(path + ".aov.transmit.png", w, h,  (float*)optixgetimg_extra("transmit"));
-                save_png_data(path + ".aov.background.png", w, h,  (float*)optixgetimg_extra("background"));
-                save_png_data(path + ".aov.mask.png", w, h,  (float*)optixgetimg_extra("mask"));
+                save_png_color(path + ".aov.diffuse.png",   w, h,  optixgetimg_extra2("diffuse", w, h).data());
+                save_png_color(path + ".aov.specular.png",  w, h,  optixgetimg_extra2("specular", w, h).data());
+                save_png_color(path + ".aov.transmit.png",  w, h,  optixgetimg_extra2("transmit", w, h).data());
+                save_png_data(path + ".aov.background.png", w, h,  optixgetimg_extra2("background", w, h).data());
+                save_png_data(path + ".aov.mask.png",       w, h,  optixgetimg_extra2("mask", w, h).data());
             }
         }
         else {
             if (enable_output_exr) {
                 zeno::create_directories_when_write_file(exr_path);
-                save_exr((float3 *)optixgetimg_extra("color"), w, h, exr_path);
+                save_exr((float3 *)optixgetimg_extra2("color", w, h).data(), w, h, exr_path);
             }
             else {
                 std::string jpg_native_path = zeno::create_directories_when_write_file(path);

From 5dce05da579105c2ed7b67562aa0927bf3823401 Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Thu, 20 Jun 2024 16:30:49 +0800
Subject: [PATCH 06/24] remove temp buffer

---
 zenovis/xinxinoptix/PTKernel.cu         | 19 +-------
 zenovis/xinxinoptix/optixPathTracer.cpp | 64 -------------------------
 zenovis/xinxinoptix/optixPathTracer.h   |  5 --
 3 files changed, 1 insertion(+), 87 deletions(-)

diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu
index edd4ee0247..bec7c39fd5 100644
--- a/zenovis/xinxinoptix/PTKernel.cu
+++ b/zenovis/xinxinoptix/PTKernel.cu
@@ -386,24 +386,7 @@ extern "C" __global__ void __raygen__rg()
     params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z);
     params.accum_buffer_B[ image_index ] = accum_color_b;
 
-
-    vec3 rgb_mapped = PhysicalCamera(vec3(accum_color), aperture, shutter_speed, iso, midGray, false, false);
-    vec3 d_mapped = PhysicalCamera(vec3(accum_color_d), aperture, shutter_speed, iso, midGray, false, false);
-    vec3 s_mapped = PhysicalCamera(vec3(accum_color_s), aperture, shutter_speed, iso, midGray, false, false);
-    vec3 t_mapped = PhysicalCamera(vec3(accum_color_t), aperture, shutter_speed, iso, midGray, false, false);
-
-
-    float3 out_color = rgb_mapped;
-    float3 out_color_d = d_mapped;
-    float3 out_color_s = s_mapped;
-    float3 out_color_t = t_mapped;
-    float3 out_color_b = accum_color_b;
-    params.frame_buffer[ image_index ] = make_color ( out_color );
-    params.frame_buffer_C[ image_index ] = out_color;
-    params.frame_buffer_D[ image_index ] = out_color_d;
-    params.frame_buffer_S[ image_index ] = out_color_s;
-    params.frame_buffer_T[ image_index ] = out_color_t;
-    params.frame_buffer_B[ image_index ] = accum_color_b;
+    params.frame_buffer[ image_index ] = make_color ( accum_color );
     params.frame_buffer_M[ image_index ] = accum_mask;
 
     if (params.denoise) {
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index e630028244..d01df32bdd 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -253,11 +253,6 @@ ushort2 halfNormal(float4 in)
 #endif
 
 std::optional<sutil::CUDAOutputBuffer<uchar4>> output_buffer_o;
-std::optional<sutil::CUDAOutputBuffer<float3>> output_buffer_color;
-std::optional<sutil::CUDAOutputBuffer<float3>> output_buffer_diffuse;
-std::optional<sutil::CUDAOutputBuffer<float3>> output_buffer_specular;
-std::optional<sutil::CUDAOutputBuffer<float3>> output_buffer_transmit;
-std::optional<sutil::CUDAOutputBuffer<float3>> output_buffer_background;
 std::optional<sutil::CUDAOutputBuffer<float3>> output_buffer_mask;
 using Vertex = float4;
 
@@ -618,11 +613,6 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
     resize_dirty = false;
 
     output_buffer.resize( params.width, params.height );
-    (*output_buffer_color).resize( params.width, params.height );
-    (*output_buffer_diffuse).resize( params.width, params.height );
-    (*output_buffer_specular).resize( params.width, params.height );
-    (*output_buffer_transmit).resize( params.width, params.height );
-    (*output_buffer_background).resize( params.width, params.height );
     (*output_buffer_mask).resize( params.width, params.height );
 
     // Realloc accumulation buffer
@@ -687,11 +677,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Path
     // Launch
     uchar4* result_buffer_data = output_buffer.map();
     state.params.frame_buffer  = result_buffer_data;
-    state.params.frame_buffer_C = (*output_buffer_color     ).map();
-    state.params.frame_buffer_D = (*output_buffer_diffuse   ).map();
-    state.params.frame_buffer_S = (*output_buffer_specular  ).map();
-    state.params.frame_buffer_T = (*output_buffer_transmit  ).map();
-    state.params.frame_buffer_B = (*output_buffer_background).map();
     state.params.frame_buffer_M = (*output_buffer_mask      ).map();
     state.params.num_lights = lightsWrapper.g_lights.size();
     state.params.denoise = denoise;
@@ -725,11 +710,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Path
       }
     }
     output_buffer.unmap();
-    (*output_buffer_color   ).unmap();
-    (*output_buffer_diffuse   ).unmap();
-    (*output_buffer_specular  ).unmap();
-    (*output_buffer_transmit  ).unmap();
-    (*output_buffer_background).unmap();
     (*output_buffer_mask      ).unmap();
 
     try {
@@ -1627,46 +1607,6 @@ void optixinit( int argc, char* argv[] )
       );
       output_buffer_o->setStream( 0 );
     }
-    if (!output_buffer_color) {
-      output_buffer_color.emplace(
-          output_buffer_type,
-          state.params.width,
-          state.params.height
-      );
-      output_buffer_color->setStream( 0 );
-    }
-    if (!output_buffer_diffuse) {
-      output_buffer_diffuse.emplace(
-          output_buffer_type,
-          state.params.width,
-          state.params.height
-      );
-      output_buffer_diffuse->setStream( 0 );
-    }
-    if (!output_buffer_specular) {
-      output_buffer_specular.emplace(
-          output_buffer_type,
-          state.params.width,
-          state.params.height
-      );
-      output_buffer_specular->setStream( 0 );
-    }
-    if (!output_buffer_transmit) {
-      output_buffer_transmit.emplace(
-          output_buffer_type,
-          state.params.width,
-          state.params.height
-      );
-      output_buffer_transmit->setStream( 0 );
-    }
-    if (!output_buffer_background) {
-      output_buffer_background.emplace(
-          output_buffer_type,
-          state.params.width,
-          state.params.height
-      );
-      output_buffer_background->setStream( 0 );
-    }
     if (!output_buffer_mask) {
       output_buffer_mask.emplace(
           output_buffer_type,
@@ -4025,10 +3965,6 @@ void optixDestroy() {
     OptixUtil::shaderCoreLUT.clear();
 
     output_buffer_o           .reset();
-    output_buffer_diffuse     .reset();
-    output_buffer_specular    .reset();
-    output_buffer_transmit    .reset();
-    output_buffer_background  .reset();
     output_buffer_mask        .reset();
     g_StaticMeshPieces        .clear();
     g_meshPieces              .clear();
diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h
index 9e034d9f86..d1a59cdfb5 100644
--- a/zenovis/xinxinoptix/optixPathTracer.h
+++ b/zenovis/xinxinoptix/optixPathTracer.h
@@ -160,11 +160,6 @@ struct Params
     float3*      accum_buffer_T;
     float3*      accum_buffer_B;
     uchar4*      frame_buffer;
-    float3*      frame_buffer_C;
-    float3*      frame_buffer_D;
-    float3*      frame_buffer_S;
-    float3*      frame_buffer_T;
-    float3*      frame_buffer_B;
     float3*      frame_buffer_M;
 
     float3*      debug_buffer;

From 384e45a3c617c7e216e6d43a27cb0c8cb88e1447 Mon Sep 17 00:00:00 2001
From: iaomw <iaomw@live.com>
Date: Thu, 20 Jun 2024 16:50:45 +0800
Subject: [PATCH 07/24] Improve Optix cleanup

---
 zenovis/xinxinoptix/optixPathTracer.cpp | 56 +++++++++++--------------
 zenovis/xinxinoptix/xinxinoptixapi.h    |  2 +-
 2 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index b33f92b89f..172da61aa9 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -1507,20 +1507,6 @@ static void cleanupState( PathTracerState& state )
     OPTIX_CHECK(optixModuleDestroy(OptixUtil::ray_module));
     OPTIX_CHECK(optixModuleDestroy(OptixUtil::sphere_module));
 
-    cleanupSpheresGPU();
-    lightsWrapper.reset();
-    
-    for (auto& ele : list_volume) {
-        cleanupVolume(*ele);
-    }
-    list_volume.clear();
-
-    for (auto const& [key, val] : OptixUtil::g_vdb_cached_map) {
-        cleanupVolume(*val);
-    }
-    OptixUtil::g_vdb_cached_map.clear();
-    OptixUtil::g_ies.clear();
-
     std::cout << "optix cleanup" << std::endl;
 }
 
@@ -3984,31 +3970,42 @@ void optixCleanup() {
     }
    
     OptixUtil::sky_tex = OptixUtil::default_sky_tex;
+
+    cleanupSpheresGPU();
+    lightsWrapper.reset();
+    
+    for (auto& ele : list_volume) {
+        cleanupVolume(*ele);
+    }
+    list_volume.clear();
+
+    for (auto const& [key, val] : OptixUtil::g_vdb_cached_map) {
+        cleanupVolume(*val);
+    }
+    OptixUtil::g_vdb_cached_map.clear();
+    OptixUtil::g_ies.clear();
+
+    g_StaticMeshPieces.clear();
+    g_meshPieces.clear();
 }
 
 void optixDestroy() {
     using namespace OptixUtil;
     try {
         CUDA_SYNC_CHECK();
+        optixCleanup();
         cleanupState( state );
         rtMaterialShaders.clear();
 
+        OptixUtil::shaderCoreLUT.clear();
+
         OPTIX_CHECK(optixPipelineDestroy(state.pipeline));
         OPTIX_CHECK(optixDeviceContextDestroy(state.context));
     }
     catch (sutil::Exception const& e) {
         std::cout << "OptixCleanupError: " << e.what() << std::endl;
     }
-////    state.d_vertices.reset();
-////    state.d_clr.reset();
-////    state.d_mat_indices.reset();
-////    state.d_nrm.reset();
-////    state.d_tan.reset();
-////    state.d_uv.reset();
-//        std::memset((void *)&state, 0, sizeof(state));
-//        //std::memset((void *)&rtMaterialShaders[0], 0, sizeof(rtMaterialShaders[0]) * rtMaterialShaders.size());
-//
-//
+
     context                  .handle=0;
     pipeline                 .handle=0;
     ray_module               .handle=0;
@@ -4017,22 +4014,17 @@ void optixDestroy() {
     radiance_miss_group      .handle=0;
     occlusion_miss_group     .handle=0;
 
-    OptixUtil::shaderCoreLUT.clear();
-
     output_buffer_o           .reset();
     output_buffer_diffuse     .reset();
     output_buffer_specular    .reset();
     output_buffer_transmit    .reset();
     output_buffer_background  .reset();
     output_buffer_mask        .reset();
-    g_StaticMeshPieces        .clear();
-    g_meshPieces              .clear();
-    state = {};
-    isPipelineCreated               = false;
 
-
-            
+    state = {};
+    isPipelineCreated = false;         
 }
+
 #if 0
         if( outfile.empty() )
         {
diff --git a/zenovis/xinxinoptix/xinxinoptixapi.h b/zenovis/xinxinoptix/xinxinoptixapi.h
index 135cd460db..a75af3a313 100644
--- a/zenovis/xinxinoptix/xinxinoptixapi.h
+++ b/zenovis/xinxinoptix/xinxinoptixapi.h
@@ -34,8 +34,8 @@ namespace xinxinoptix {
 std::set<std::string> uniqueMatsForMesh();
 
 void optixCleanup();
-
 void optixDestroy();
+
 void optixrender(int fbo = 0, int samples = 1, bool denoise = false, bool simpleRender = false);
 void *optixgetimg(int &w, int &h);
 void optixinit(int argc, char* argv[]);

From cc75adf3f8997418c605a2d006ac6d661a90ac99 Mon Sep 17 00:00:00 2001
From: iaomw <iaomw@live.com>
Date: Thu, 20 Jun 2024 16:52:17 +0800
Subject: [PATCH 08/24] dirty fix

---
 zenovis/xinxinoptix/optixPathTracer.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 172da61aa9..ad38fddad9 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -2108,7 +2108,10 @@ void using_hdr_sky(bool enable) {
 }
 
 void show_background(bool enable) {
-    state.params.show_background = enable;
+    if (enable != state.params.show_background) {
+        state.params.show_background = enable;
+        state.params.subframe_index = 0;
+    }
 }
 
 void updatePortalLights(const std::vector<Portal>& portals) {

From b3473e45277f4dcf54a61d0844b8c9189973019c Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Thu, 20 Jun 2024 17:12:07 +0800
Subject: [PATCH 09/24] Revert "fix show background"

This reverts commit e1f83f21b168728204e320fc7d8e729db6a8253a.
---
 zenovis/xinxinoptix/optixPathTracer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index d01df32bdd..2e01f3d04e 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -2063,7 +2063,6 @@ void using_hdr_sky(bool enable) {
 
 void show_background(bool enable) {
     state.params.show_background = enable;
-    state.params.subframe_index = 0;
 }
 
 void updatePortalLights(const std::vector<Portal>& portals) {

From 06c0b5e0ebc45386d882a65084c952420827139c Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Thu, 20 Jun 2024 18:23:27 +0800
Subject: [PATCH 10/24] accum_buffer_B use half3

---
 zenovis/xinxinoptix/PTKernel.cu         | 31 +++++++++++++++++++++++--
 zenovis/xinxinoptix/TypeCaster.cpp      | 11 +++++++++
 zenovis/xinxinoptix/TypeCaster.h        |  3 ++-
 zenovis/xinxinoptix/optixPathTracer.cpp | 13 ++++++++---
 zenovis/xinxinoptix/optixPathTracer.h   |  2 +-
 5 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu
index bec7c39fd5..8f6f340d4f 100644
--- a/zenovis/xinxinoptix/PTKernel.cu
+++ b/zenovis/xinxinoptix/PTKernel.cu
@@ -92,6 +92,33 @@ vec3 PhysicalCamera(vec3 in,
   mapped = in * exposure;
   return  enableExposure? (enableACES? ACESFilm(mapped):mapped ) : (enableACES? ACESFilm(in) : in);
 }
+
+static __inline__ __device__
+ushort3 float3_to_half3(float3 in)
+{
+    half x = __float2half(in.x);
+    half y = __float2half(in.y);
+    half z = __float2half(in.z);
+    ushort3 v;
+    v.x = reinterpret_cast<unsigned short&>(x);
+    v.y = reinterpret_cast<unsigned short&>(y);
+    v.z = reinterpret_cast<unsigned short&>(z);
+    return v;
+}
+
+static __inline__ __device__
+float3 half3_to_float3(ushort3 in)
+{
+    half x = reinterpret_cast<half&>(in.x);
+    half y = reinterpret_cast<half&>(in.y);
+    half z = reinterpret_cast<half&>(in.z);
+    float3 v;
+    v.x = __half2float(x);
+    v.y = __half2float(y);
+    v.z = __half2float(z);
+    return v;
+}
+
 extern "C" __global__ void __raygen__rg()
 {
 
@@ -361,7 +388,7 @@ extern "C" __global__ void __raygen__rg()
         const float3 accum_color_prev_d = params.accum_buffer_D[ image_index ];
         const float3 accum_color_prev_s = params.accum_buffer_S[ image_index ];
         const float3 accum_color_prev_t = params.accum_buffer_T[ image_index ];
-        const float3 accum_color_prev_b = params.accum_buffer_B[ image_index ];
+        const float3 accum_color_prev_b = half3_to_float3(params.accum_buffer_B[ image_index ]);
         const float3 accum_mask_prev    = params.frame_buffer_M[ image_index ];
         accum_color   = mix( vec3(accum_color_prev), accum_color, a );
         accum_color_d = mix( vec3(accum_color_prev_d), accum_color_d, a );
@@ -384,7 +411,7 @@ extern "C" __global__ void __raygen__rg()
     params.accum_buffer_D[ image_index ] = make_float3( accum_color_d.x,accum_color_d.y,accum_color_d.z);
     params.accum_buffer_S[ image_index ] = make_float3( accum_color_s.x,accum_color_s.y, accum_color_s.z);
     params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z);
-    params.accum_buffer_B[ image_index ] = accum_color_b;
+    params.accum_buffer_B[ image_index ] = float3_to_half3(accum_color_b);
 
     params.frame_buffer[ image_index ] = make_color ( accum_color );
     params.frame_buffer_M[ image_index ] = accum_mask;
diff --git a/zenovis/xinxinoptix/TypeCaster.cpp b/zenovis/xinxinoptix/TypeCaster.cpp
index eba49af49d..1d19df7fc3 100644
--- a/zenovis/xinxinoptix/TypeCaster.cpp
+++ b/zenovis/xinxinoptix/TypeCaster.cpp
@@ -18,4 +18,15 @@ ushort3 toHalf(float4 in)
 ushort3 toHalf(float3 in)
 {
   return toHalf({in.x, in.y, in.z, 0.0f});
+}
+
+float3 toFloat(ushort3 in) {
+    half x = reinterpret_cast<half&>(in.x);
+    half y = reinterpret_cast<half&>(in.y);
+    half z = reinterpret_cast<half&>(in.z);
+    return {
+        __half2float(x),
+        __half2float(y),
+        __half2float(z),
+    };
 }
\ No newline at end of file
diff --git a/zenovis/xinxinoptix/TypeCaster.h b/zenovis/xinxinoptix/TypeCaster.h
index 519fb09e8e..3c5db83790 100644
--- a/zenovis/xinxinoptix/TypeCaster.h
+++ b/zenovis/xinxinoptix/TypeCaster.h
@@ -3,4 +3,5 @@
 #include <vector_types.h>
 
 ushort3 toHalf(float4 in);
-ushort3 toHalf(float3 in);
\ No newline at end of file
+ushort3 toHalf(float3 in);
+float3  toFloat(ushort3 in);
\ No newline at end of file
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 2e01f3d04e..81e4649197 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -634,7 +634,7 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
             ) );
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_b .reset()),
-        params.width * params.height * sizeof( float3 )
+        params.width * params.height * sizeof( ushort3 )
             ) );
     state.params.accum_buffer = (float3*)(CUdeviceptr)state.accum_buffer_p;
 
@@ -653,7 +653,7 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
     state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d;
     state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s;
     state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t;
-    state.params.accum_buffer_B = (float3*)(CUdeviceptr)state.accum_buffer_b;
+    state.params.accum_buffer_B = (ushort3*)(CUdeviceptr)state.accum_buffer_b;
     state.params.subframe_index = 0;
 }
 
@@ -3736,7 +3736,14 @@ std::vector<float> optixgetimg_extra2(std::string name, int w, int h) {
         cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_t.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
     else if (name == "background") {
-        cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_b.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
+        std::vector<ushort3> temp_buffer(w * h);
+        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_b.handle, sizeof(ushort3) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        for (auto i = 0; i < temp_buffer.size(); i++) {
+            float3 v = toFloat(temp_buffer[i]);
+            tex_data[i * 3 + 0] = v.x;
+            tex_data[i * 3 + 1] = v.y;
+            tex_data[i * 3 + 2] = v.z;
+        }
     }
     else if (name == "mask") {
         std::copy_n((float*) output_buffer_mask->getHostPointer(), tex_data.size(), tex_data.data());
diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h
index d1a59cdfb5..011ed43623 100644
--- a/zenovis/xinxinoptix/optixPathTracer.h
+++ b/zenovis/xinxinoptix/optixPathTracer.h
@@ -158,7 +158,7 @@ struct Params
     float3*      accum_buffer_D;
     float3*      accum_buffer_S;
     float3*      accum_buffer_T;
-    float3*      accum_buffer_B;
+    ushort3*     accum_buffer_B;
     uchar4*      frame_buffer;
     float3*      frame_buffer_M;
 

From f39e944f08618193cc76b935bb991da16778da34 Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Thu, 20 Jun 2024 19:16:00 +0800
Subject: [PATCH 11/24] albedo normal buffer half3

---
 zenovis/xinxinoptix/PTKernel.cu         |  8 +++----
 zenovis/xinxinoptix/optixPathTracer.cpp | 31 +++++++++++++++----------
 zenovis/xinxinoptix/optixPathTracer.h   |  4 ++--
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu
index 8f6f340d4f..7e0286cece 100644
--- a/zenovis/xinxinoptix/PTKernel.cu
+++ b/zenovis/xinxinoptix/PTKernel.cu
@@ -399,10 +399,10 @@ extern "C" __global__ void __raygen__rg()
 
         if (params.denoise) {
 
-            const float3 accum_albedo_prev = params.albedo_buffer[ image_index ];
+            const float3 accum_albedo_prev = half3_to_float3(params.albedo_buffer[ image_index ]);
             tmp_albedo = lerp(accum_albedo_prev, tmp_albedo, a);
 
-            const float3 accum_normal_prev = params.normal_buffer[ image_index ];
+            const float3 accum_normal_prev = half3_to_float3(params.normal_buffer[ image_index ]);
             tmp_normal = lerp(accum_normal_prev, tmp_normal, a);
         }
     }
@@ -417,8 +417,8 @@ extern "C" __global__ void __raygen__rg()
     params.frame_buffer_M[ image_index ] = accum_mask;
 
     if (params.denoise) {
-        params.albedo_buffer[ image_index ] = tmp_albedo;
-        params.normal_buffer[ image_index ] = tmp_normal;
+        params.albedo_buffer[ image_index ] = float3_to_half3(tmp_albedo);
+        params.normal_buffer[ image_index ] = float3_to_half3(tmp_normal);
     }
 }
 
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 81e4649197..9dd092136c 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -571,15 +571,15 @@ static void initLaunchParams( PathTracerState& state )
 
     CUDA_CHECK( cudaMallocManaged(
             reinterpret_cast<void**>( &state.albedo_buffer_p.reset()),
-            params.width * params.height * sizeof( float3 )
+            params.width * params.height * sizeof( ushort3 )
             ) );
-    state.params.albedo_buffer = (float3*)(CUdeviceptr)state.albedo_buffer_p;
+    state.params.albedo_buffer = (ushort3*)(CUdeviceptr)state.albedo_buffer_p;
     
     CUDA_CHECK( cudaMallocManaged(
             reinterpret_cast<void**>( &state.normal_buffer_p.reset()),
-            params.width * params.height * sizeof( float3 )
+            params.width * params.height * sizeof( ushort3 )
             ) );
-    state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p;
+    state.params.normal_buffer = (ushort3*)(CUdeviceptr)state.normal_buffer_p;
     
     state.params.frame_buffer = nullptr;  // Will be set when output buffer is mapped
 
@@ -640,15 +640,15 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
 
     CUDA_CHECK( cudaMallocManaged(
                 reinterpret_cast<void**>( &state.albedo_buffer_p.reset()),
-                params.width * params.height * sizeof( float3 )
+                params.width * params.height * sizeof( ushort3 )
                 ) );
-    state.params.albedo_buffer = (float3*)(CUdeviceptr)state.albedo_buffer_p;
+    state.params.albedo_buffer = (ushort3*)(CUdeviceptr)state.albedo_buffer_p;
     
     CUDA_CHECK( cudaMallocManaged(
                 reinterpret_cast<void**>( &state.normal_buffer_p.reset()),
-                params.width * params.height * sizeof( float3 )
+                params.width * params.height * sizeof( ushort3 )
                 ) );
-    state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p;
+    state.params.normal_buffer = (ushort3*)(CUdeviceptr)state.normal_buffer_p;
     
     state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d;
     state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s;
@@ -3877,17 +3877,24 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) {
                 std::string jpg_native_path = zeno::create_directories_when_write_file(path);
                 stbi_write_jpg(jpg_native_path.c_str(), w, h, 4, p, 100);
                 if (denoise) {
-                    const float* _albedo_buffer = reinterpret_cast<float*>(state.albedo_buffer_p.handle);
+                    std::vector<float3> temp_buffer(w * h);
+                    const ushort3* _albedo_buffer = reinterpret_cast<ushort3 *>(state.albedo_buffer_p.handle);
+                    for (auto i = 0; i < w * h; i++) {
+                        temp_buffer[i] = toFloat(_albedo_buffer[i]);
+                    }
                     //SaveEXR(_albedo_buffer, w, h, 4, 0, (path+".albedo.exr").c_str(), nullptr);
                     auto a_path = path + ".albedo.pfm";
                     std::string native_a_path = zeno::create_directories_when_write_file(a_path);
-                    zeno::write_pfm(native_a_path.c_str(), w, h, _albedo_buffer);
+                    zeno::write_pfm(native_a_path.c_str(), w, h, (float*)temp_buffer.data());
 
-                    const float* _normal_buffer = reinterpret_cast<float*>(state.normal_buffer_p.handle);
+                    const ushort3* _normal_buffer = reinterpret_cast<ushort3*>(state.normal_buffer_p.handle);
+                    for (auto i = 0; i < w * h; i++) {
+                        temp_buffer[i] = toFloat(_normal_buffer[i]);
+                    }
                     //SaveEXR(_normal_buffer, w, h, 4, 0, (path+".normal.exr").c_str(), nullptr);
                     auto n_path = path + ".normal.pfm";
                     std::string native_n_path = zeno::create_directories_when_write_file(n_path);
-                    zeno::write_pfm(native_n_path.c_str(), w, h, _normal_buffer);
+                    zeno::write_pfm(native_n_path.c_str(), w, h, (float*)temp_buffer.data());
                 }
             }
         }
diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h
index 011ed43623..4e2e186756 100644
--- a/zenovis/xinxinoptix/optixPathTracer.h
+++ b/zenovis/xinxinoptix/optixPathTracer.h
@@ -163,8 +163,8 @@ struct Params
     float3*      frame_buffer_M;
 
     float3*      debug_buffer;
-    float3*      albedo_buffer;
-    float3*      normal_buffer;
+    ushort3*     albedo_buffer;
+    ushort3*     normal_buffer;
 
     unsigned int width;
     unsigned int height;

From 7f09426df789bf20bd4009e38f084253ed30c2b5 Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Thu, 20 Jun 2024 19:37:39 +0800
Subject: [PATCH 12/24] accum_buffer_B use half

---
 zenovis/xinxinoptix/PTKernel.cu         | 22 ++++++++++++++++++++--
 zenovis/xinxinoptix/TypeCaster.cpp      |  4 ++++
 zenovis/xinxinoptix/TypeCaster.h        |  3 ++-
 zenovis/xinxinoptix/optixPathTracer.cpp | 16 ++++++++--------
 zenovis/xinxinoptix/optixPathTracer.h   |  2 +-
 5 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu
index 7e0286cece..a90bb6e5ad 100644
--- a/zenovis/xinxinoptix/PTKernel.cu
+++ b/zenovis/xinxinoptix/PTKernel.cu
@@ -119,6 +119,20 @@ float3 half3_to_float3(ushort3 in)
     return v;
 }
 
+static __inline__ __device__
+ushort1 float_to_half(float in)
+{
+    half x = __float2half(in);
+    return reinterpret_cast<ushort1&>(x);
+}
+
+static __inline__ __device__
+float half_to_float(ushort1 in)
+{
+    half x = reinterpret_cast<half&>(in);
+    return __half2float(x);
+}
+
 extern "C" __global__ void __raygen__rg()
 {
 
@@ -388,7 +402,11 @@ extern "C" __global__ void __raygen__rg()
         const float3 accum_color_prev_d = params.accum_buffer_D[ image_index ];
         const float3 accum_color_prev_s = params.accum_buffer_S[ image_index ];
         const float3 accum_color_prev_t = params.accum_buffer_T[ image_index ];
-        const float3 accum_color_prev_b = half3_to_float3(params.accum_buffer_B[ image_index ]);
+        const float3 accum_color_prev_b = {
+                half_to_float(params.accum_buffer_B[ image_index ]),
+                half_to_float(params.accum_buffer_B[ image_index ]),
+                half_to_float(params.accum_buffer_B[ image_index ]),
+        };
         const float3 accum_mask_prev    = params.frame_buffer_M[ image_index ];
         accum_color   = mix( vec3(accum_color_prev), accum_color, a );
         accum_color_d = mix( vec3(accum_color_prev_d), accum_color_d, a );
@@ -411,7 +429,7 @@ extern "C" __global__ void __raygen__rg()
     params.accum_buffer_D[ image_index ] = make_float3( accum_color_d.x,accum_color_d.y,accum_color_d.z);
     params.accum_buffer_S[ image_index ] = make_float3( accum_color_s.x,accum_color_s.y, accum_color_s.z);
     params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z);
-    params.accum_buffer_B[ image_index ] = float3_to_half3(accum_color_b);
+    params.accum_buffer_B[ image_index ] = float_to_half(accum_color_b.x);
 
     params.frame_buffer[ image_index ] = make_color ( accum_color );
     params.frame_buffer_M[ image_index ] = accum_mask;
diff --git a/zenovis/xinxinoptix/TypeCaster.cpp b/zenovis/xinxinoptix/TypeCaster.cpp
index 1d19df7fc3..6f60aea0bc 100644
--- a/zenovis/xinxinoptix/TypeCaster.cpp
+++ b/zenovis/xinxinoptix/TypeCaster.cpp
@@ -29,4 +29,8 @@ float3 toFloat(ushort3 in) {
         __half2float(y),
         __half2float(z),
     };
+}
+float toFloat(ushort1 in) {
+    half x = reinterpret_cast<half&>(in);
+    return __half2float(x);
 }
\ No newline at end of file
diff --git a/zenovis/xinxinoptix/TypeCaster.h b/zenovis/xinxinoptix/TypeCaster.h
index 3c5db83790..c208adc72a 100644
--- a/zenovis/xinxinoptix/TypeCaster.h
+++ b/zenovis/xinxinoptix/TypeCaster.h
@@ -4,4 +4,5 @@
 
 ushort3 toHalf(float4 in);
 ushort3 toHalf(float3 in);
-float3  toFloat(ushort3 in);
\ No newline at end of file
+float3  toFloat(ushort3 in);
+float  toFloat(ushort1 in);
\ No newline at end of file
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 9dd092136c..f752a5bd80 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -634,7 +634,7 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
             ) );
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_b .reset()),
-        params.width * params.height * sizeof( ushort3 )
+        params.width * params.height * sizeof( ushort1 )
             ) );
     state.params.accum_buffer = (float3*)(CUdeviceptr)state.accum_buffer_p;
 
@@ -653,7 +653,7 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
     state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d;
     state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s;
     state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t;
-    state.params.accum_buffer_B = (ushort3*)(CUdeviceptr)state.accum_buffer_b;
+    state.params.accum_buffer_B = (ushort1*)(CUdeviceptr)state.accum_buffer_b;
     state.params.subframe_index = 0;
 }
 
@@ -3736,13 +3736,13 @@ std::vector<float> optixgetimg_extra2(std::string name, int w, int h) {
         cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_t.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
     else if (name == "background") {
-        std::vector<ushort3> temp_buffer(w * h);
-        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_b.handle, sizeof(ushort3) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        std::vector<ushort1> temp_buffer(w * h);
+        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_b.handle, sizeof(ushort1) * temp_buffer.size(), cudaMemcpyDeviceToHost);
         for (auto i = 0; i < temp_buffer.size(); i++) {
-            float3 v = toFloat(temp_buffer[i]);
-            tex_data[i * 3 + 0] = v.x;
-            tex_data[i * 3 + 1] = v.y;
-            tex_data[i * 3 + 2] = v.z;
+            float v = toFloat(temp_buffer[i]);
+            tex_data[i * 3 + 0] = v;
+            tex_data[i * 3 + 1] = v;
+            tex_data[i * 3 + 2] = v;
         }
     }
     else if (name == "mask") {
diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h
index 4e2e186756..bcf6e90a63 100644
--- a/zenovis/xinxinoptix/optixPathTracer.h
+++ b/zenovis/xinxinoptix/optixPathTracer.h
@@ -158,7 +158,7 @@ struct Params
     float3*      accum_buffer_D;
     float3*      accum_buffer_S;
     float3*      accum_buffer_T;
-    ushort3*     accum_buffer_B;
+    ushort1*     accum_buffer_B;
     uchar4*      frame_buffer;
     float3*      frame_buffer_M;
 

From 0551d2021a4bab2d549565be03d87405c95f5b08 Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Thu, 20 Jun 2024 20:04:18 +0800
Subject: [PATCH 13/24] frame_buffer_M use half3

---
 zenovis/xinxinoptix/PTKernel.cu         |  4 ++--
 zenovis/xinxinoptix/optixPathTracer.cpp | 28 ++++++++++++-------------
 zenovis/xinxinoptix/optixPathTracer.h   |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu
index a90bb6e5ad..b7791706ee 100644
--- a/zenovis/xinxinoptix/PTKernel.cu
+++ b/zenovis/xinxinoptix/PTKernel.cu
@@ -407,7 +407,7 @@ extern "C" __global__ void __raygen__rg()
                 half_to_float(params.accum_buffer_B[ image_index ]),
                 half_to_float(params.accum_buffer_B[ image_index ]),
         };
-        const float3 accum_mask_prev    = params.frame_buffer_M[ image_index ];
+        const float3 accum_mask_prev    = half3_to_float3(params.frame_buffer_M[ image_index ]);
         accum_color   = mix( vec3(accum_color_prev), accum_color, a );
         accum_color_d = mix( vec3(accum_color_prev_d), accum_color_d, a );
         accum_color_s = mix( vec3(accum_color_prev_s), accum_color_s, a );
@@ -432,7 +432,7 @@ extern "C" __global__ void __raygen__rg()
     params.accum_buffer_B[ image_index ] = float_to_half(accum_color_b.x);
 
     params.frame_buffer[ image_index ] = make_color ( accum_color );
-    params.frame_buffer_M[ image_index ] = accum_mask;
+    params.frame_buffer_M[ image_index ] = float3_to_half3(accum_mask);
 
     if (params.denoise) {
         params.albedo_buffer[ image_index ] = float3_to_half3(tmp_albedo);
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index f752a5bd80..b0e5abce31 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -253,7 +253,6 @@ ushort2 halfNormal(float4 in)
 #endif
 
 std::optional<sutil::CUDAOutputBuffer<uchar4>> output_buffer_o;
-std::optional<sutil::CUDAOutputBuffer<float3>> output_buffer_mask;
 using Vertex = float4;
 
 struct PathTracerState
@@ -300,6 +299,7 @@ struct PathTracerState
     raii<CUdeviceptr> accum_buffer_s;
     raii<CUdeviceptr> accum_buffer_t;
     raii<CUdeviceptr> accum_buffer_b;
+    raii<CUdeviceptr> accum_buffer_m;
 
     raii<CUdeviceptr> finite_lights_ptr;
 
@@ -613,7 +613,6 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
     resize_dirty = false;
 
     output_buffer.resize( params.width, params.height );
-    (*output_buffer_mask).resize( params.width, params.height );
 
     // Realloc accumulation buffer
     CUDA_CHECK( cudaMalloc(
@@ -632,6 +631,10 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
         reinterpret_cast<void**>( &state.accum_buffer_t .reset()),
         params.width * params.height * sizeof( float3 )
             ) );
+    CUDA_CHECK( cudaMalloc(
+        reinterpret_cast<void**>( &state.accum_buffer_m .reset()),
+        params.width * params.height * sizeof( ushort3 )
+            ) );
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_b .reset()),
         params.width * params.height * sizeof( ushort1 )
@@ -653,6 +656,7 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
     state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d;
     state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s;
     state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t;
+    state.params.frame_buffer_M = (ushort3*)(CUdeviceptr)state.accum_buffer_m;
     state.params.accum_buffer_B = (ushort1*)(CUdeviceptr)state.accum_buffer_b;
     state.params.subframe_index = 0;
 }
@@ -677,7 +681,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Path
     // Launch
     uchar4* result_buffer_data = output_buffer.map();
     state.params.frame_buffer  = result_buffer_data;
-    state.params.frame_buffer_M = (*output_buffer_mask      ).map();
     state.params.num_lights = lightsWrapper.g_lights.size();
     state.params.denoise = denoise;
     for(int j=0;j<1;j++){
@@ -710,7 +713,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Path
       }
     }
     output_buffer.unmap();
-    (*output_buffer_mask      ).unmap();
 
     try {
         CUDA_SYNC_CHECK();
@@ -1607,14 +1609,6 @@ void optixinit( int argc, char* argv[] )
       );
       output_buffer_o->setStream( 0 );
     }
-    if (!output_buffer_mask) {
-      output_buffer_mask.emplace(
-          output_buffer_type,
-          state.params.width,
-          state.params.height
-      );
-      output_buffer_mask->setStream( 0 );
-    }
 #ifdef OPTIX_BASE_GL
         if (!gl_display_o) {
             gl_display_o.emplace(sutil::BufferImageFormat::UNSIGNED_BYTE4);
@@ -3746,7 +3740,14 @@ std::vector<float> optixgetimg_extra2(std::string name, int w, int h) {
         }
     }
     else if (name == "mask") {
-        std::copy_n((float*) output_buffer_mask->getHostPointer(), tex_data.size(), tex_data.data());
+        std::vector<ushort3> temp_buffer(w * h);
+        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_m.handle, sizeof(ushort3) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        for (auto i = 0; i < temp_buffer.size(); i++) {
+            float3 v = toFloat(temp_buffer[i]);
+            tex_data[i * 3 + 0] = v.x;
+            tex_data[i * 3 + 1] = v.y;
+            tex_data[i * 3 + 2] = v.z;
+        }
     }
     else if (name == "color") {
         cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_p.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
@@ -3978,7 +3979,6 @@ void optixDestroy() {
     OptixUtil::shaderCoreLUT.clear();
 
     output_buffer_o           .reset();
-    output_buffer_mask        .reset();
     g_StaticMeshPieces        .clear();
     g_meshPieces              .clear();
     state = {};
diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h
index bcf6e90a63..78e26f8b88 100644
--- a/zenovis/xinxinoptix/optixPathTracer.h
+++ b/zenovis/xinxinoptix/optixPathTracer.h
@@ -160,7 +160,7 @@ struct Params
     float3*      accum_buffer_T;
     ushort1*     accum_buffer_B;
     uchar4*      frame_buffer;
-    float3*      frame_buffer_M;
+    ushort3*     frame_buffer_M;
 
     float3*      debug_buffer;
     ushort3*     albedo_buffer;

From 28d5df2691561033c1b4b9aa79df8a99b57030a0 Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Mon, 24 Jun 2024 13:58:45 +0800
Subject: [PATCH 14/24] Revert "albedo normal buffer half3"

This reverts commit f39e944f08618193cc76b935bb991da16778da34.
---
 zenovis/xinxinoptix/PTKernel.cu         |  8 +++----
 zenovis/xinxinoptix/optixPathTracer.cpp | 31 ++++++++++---------------
 zenovis/xinxinoptix/optixPathTracer.h   |  4 ++--
 3 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu
index b7791706ee..843ce5bf47 100644
--- a/zenovis/xinxinoptix/PTKernel.cu
+++ b/zenovis/xinxinoptix/PTKernel.cu
@@ -417,10 +417,10 @@ extern "C" __global__ void __raygen__rg()
 
         if (params.denoise) {
 
-            const float3 accum_albedo_prev = half3_to_float3(params.albedo_buffer[ image_index ]);
+            const float3 accum_albedo_prev = params.albedo_buffer[ image_index ];
             tmp_albedo = lerp(accum_albedo_prev, tmp_albedo, a);
 
-            const float3 accum_normal_prev = half3_to_float3(params.normal_buffer[ image_index ]);
+            const float3 accum_normal_prev = params.normal_buffer[ image_index ];
             tmp_normal = lerp(accum_normal_prev, tmp_normal, a);
         }
     }
@@ -435,8 +435,8 @@ extern "C" __global__ void __raygen__rg()
     params.frame_buffer_M[ image_index ] = float3_to_half3(accum_mask);
 
     if (params.denoise) {
-        params.albedo_buffer[ image_index ] = float3_to_half3(tmp_albedo);
-        params.normal_buffer[ image_index ] = float3_to_half3(tmp_normal);
+        params.albedo_buffer[ image_index ] = tmp_albedo;
+        params.normal_buffer[ image_index ] = tmp_normal;
     }
 }
 
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 231e7e3be7..ee314e3efb 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -571,15 +571,15 @@ static void initLaunchParams( PathTracerState& state )
 
     CUDA_CHECK( cudaMallocManaged(
             reinterpret_cast<void**>( &state.albedo_buffer_p.reset()),
-            params.width * params.height * sizeof( ushort3 )
+            params.width * params.height * sizeof( float3 )
             ) );
-    state.params.albedo_buffer = (ushort3*)(CUdeviceptr)state.albedo_buffer_p;
+    state.params.albedo_buffer = (float3*)(CUdeviceptr)state.albedo_buffer_p;
     
     CUDA_CHECK( cudaMallocManaged(
             reinterpret_cast<void**>( &state.normal_buffer_p.reset()),
-            params.width * params.height * sizeof( ushort3 )
+            params.width * params.height * sizeof( float3 )
             ) );
-    state.params.normal_buffer = (ushort3*)(CUdeviceptr)state.normal_buffer_p;
+    state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p;
     
     state.params.frame_buffer = nullptr;  // Will be set when output buffer is mapped
 
@@ -643,15 +643,15 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
 
     CUDA_CHECK( cudaMallocManaged(
                 reinterpret_cast<void**>( &state.albedo_buffer_p.reset()),
-                params.width * params.height * sizeof( ushort3 )
+                params.width * params.height * sizeof( float3 )
                 ) );
-    state.params.albedo_buffer = (ushort3*)(CUdeviceptr)state.albedo_buffer_p;
+    state.params.albedo_buffer = (float3*)(CUdeviceptr)state.albedo_buffer_p;
     
     CUDA_CHECK( cudaMallocManaged(
                 reinterpret_cast<void**>( &state.normal_buffer_p.reset()),
-                params.width * params.height * sizeof( ushort3 )
+                params.width * params.height * sizeof( float3 )
                 ) );
-    state.params.normal_buffer = (ushort3*)(CUdeviceptr)state.normal_buffer_p;
+    state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p;
     
     state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d;
     state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s;
@@ -3867,24 +3867,17 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) {
                 std::string jpg_native_path = zeno::create_directories_when_write_file(path);
                 stbi_write_jpg(jpg_native_path.c_str(), w, h, 4, p, 100);
                 if (denoise) {
-                    std::vector<float3> temp_buffer(w * h);
-                    const ushort3* _albedo_buffer = reinterpret_cast<ushort3 *>(state.albedo_buffer_p.handle);
-                    for (auto i = 0; i < w * h; i++) {
-                        temp_buffer[i] = toFloat(_albedo_buffer[i]);
-                    }
+                    const float* _albedo_buffer = reinterpret_cast<float*>(state.albedo_buffer_p.handle);
                     //SaveEXR(_albedo_buffer, w, h, 4, 0, (path+".albedo.exr").c_str(), nullptr);
                     auto a_path = path + ".albedo.pfm";
                     std::string native_a_path = zeno::create_directories_when_write_file(a_path);
-                    zeno::write_pfm(native_a_path.c_str(), w, h, (float*)temp_buffer.data());
+                    zeno::write_pfm(native_a_path.c_str(), w, h, _albedo_buffer);
 
-                    const ushort3* _normal_buffer = reinterpret_cast<ushort3*>(state.normal_buffer_p.handle);
-                    for (auto i = 0; i < w * h; i++) {
-                        temp_buffer[i] = toFloat(_normal_buffer[i]);
-                    }
+                    const float* _normal_buffer = reinterpret_cast<float*>(state.normal_buffer_p.handle);
                     //SaveEXR(_normal_buffer, w, h, 4, 0, (path+".normal.exr").c_str(), nullptr);
                     auto n_path = path + ".normal.pfm";
                     std::string native_n_path = zeno::create_directories_when_write_file(n_path);
-                    zeno::write_pfm(native_n_path.c_str(), w, h, (float*)temp_buffer.data());
+                    zeno::write_pfm(native_n_path.c_str(), w, h, _normal_buffer);
                 }
             }
         }
diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h
index 78e26f8b88..636366da22 100644
--- a/zenovis/xinxinoptix/optixPathTracer.h
+++ b/zenovis/xinxinoptix/optixPathTracer.h
@@ -163,8 +163,8 @@ struct Params
     ushort3*     frame_buffer_M;
 
     float3*      debug_buffer;
-    ushort3*     albedo_buffer;
-    ushort3*     normal_buffer;
+    float3*      albedo_buffer;
+    float3*      normal_buffer;
 
     unsigned int width;
     unsigned int height;

From 0b105366f00ec6d6d25f8d463bee6ccdf8e93f97 Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Mon, 24 Jun 2024 19:28:18 +0800
Subject: [PATCH 15/24] improve

---
 zenovis/xinxinoptix/ChiefDesignerEXR.h  | 45 ++++++++++++++++++
 zenovis/xinxinoptix/optixPathTracer.cpp | 63 ++++++++++++++++++++++---
 2 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/zenovis/xinxinoptix/ChiefDesignerEXR.h b/zenovis/xinxinoptix/ChiefDesignerEXR.h
index d7c1835176..e72966418f 100644
--- a/zenovis/xinxinoptix/ChiefDesignerEXR.h
+++ b/zenovis/xinxinoptix/ChiefDesignerEXR.h
@@ -161,4 +161,49 @@ inline void SaveMultiLayerEXR(
     file.writePixels (height);
 }
 
+inline void SaveMultiLayerEXR_half(
+    std::vector<half*> pixels
+    , int width
+    , int height
+    , std::vector<std::string> channels
+    , const char* exrFilePath
+) {
+    using namespace Imath;
+    using namespace Imf;
+
+    Header header(width, height);
+    ChannelList channelList;
+
+    const char *std_suffix = "RGB";
+    for (auto channel: channels) {
+        for (int i = 0; i < 3; i++) {
+            std::string name = zeno::format("{}{}", channel, std_suffix[i]);
+            channelList.insert(name, Channel(HALF));
+        }
+    }
+
+    header.channels() = channelList;
+
+    OutputFile file (exrFilePath, header);
+    FrameBuffer frameBuffer;
+
+    std::vector<std::vector<half>> data;
+    for (half *rgb: pixels) {
+        std::vector<half> half_rgb(width * height * 3);
+        for (auto i = 0; i < half_rgb.size(); i++) {
+            half_rgb[i] = rgb[i];
+        }
+        data.push_back(std::move(half_rgb));
+    }
+
+    for (auto i = 0; i < channels.size(); i++) {
+        frameBuffer.insert (zeno::format("{}R", channels[i]), Slice ( HALF, (char*) &data[i][0], sizeof (half) * 3, sizeof (half) * width * 3));
+        frameBuffer.insert (zeno::format("{}G", channels[i]), Slice ( HALF, (char*) &data[i][1], sizeof (half) * 3, sizeof (half) * width * 3));
+        frameBuffer.insert (zeno::format("{}B", channels[i]), Slice ( HALF, (char*) &data[i][2], sizeof (half) * 3, sizeof (half) * width * 3));
+    }
+
+    file.setFrameBuffer (frameBuffer);
+    file.writePixels (height);
+}
+
 }
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index ee314e3efb..ad0d2ffcc8 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -3746,6 +3746,55 @@ std::vector<float> optixgetimg_extra2(std::string name, int w, int h) {
     }
     return tex_data;
 }
+
+std::vector<half> optixgetimg_extra3(std::string name, int w, int h) {
+    std::vector<half> tex_data(w * h * 3);
+    if (name == "diffuse") {
+        std::vector<float> temp_buffer(w * h * 3);
+        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_d.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        for (auto i = 0; i < temp_buffer.size(); i++) {
+            tex_data[i] = temp_buffer[i];
+        }
+    }
+    else if (name == "specular") {
+        std::vector<float> temp_buffer(w * h * 3);
+        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_s.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        for (auto i = 0; i < temp_buffer.size(); i++) {
+            tex_data[i] = temp_buffer[i];
+        }
+    }
+    else if (name == "transmit") {
+        std::vector<float> temp_buffer(w * h * 3);
+        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_t.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        for (auto i = 0; i < temp_buffer.size(); i++) {
+            tex_data[i] = temp_buffer[i];
+        }
+    }
+    else if (name == "background") {
+        std::vector<half> temp_buffer(w * h);
+        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_b.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        for (auto i = 0; i < temp_buffer.size(); i++) {
+            tex_data[i * 3 + 0] = temp_buffer[i];
+            tex_data[i * 3 + 1] = temp_buffer[i];
+            tex_data[i * 3 + 2] = temp_buffer[i];
+        }
+    }
+    else if (name == "mask") {
+        cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_m.handle, sizeof(half) * tex_data.size(), cudaMemcpyDeviceToHost);
+    }
+    else if (name == "color") {
+        std::vector<float> temp_buffer(w * h * 3);
+        cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_p.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        for (auto i = 0; i < temp_buffer.size(); i++) {
+            tex_data[i] = temp_buffer[i];
+        }
+    }
+    else {
+        throw std::runtime_error("invalid optixgetimg_extra name: " + name);
+    }
+    zeno::image_flip_vertical((ushort3*)tex_data.data(), w, h);
+    return tex_data;
+}
 static void save_exr(float3* ptr, int w, int h, std::string path) {
     std::vector<float3> data(w * h);
     std::copy_n(ptr, w * h, data.data());
@@ -3826,14 +3875,14 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) {
         if (enable_output_aov) {
             if (enable_output_exr) {
                 zeno::create_directories_when_write_file(exr_path);
-                SaveMultiLayerEXR(
+                SaveMultiLayerEXR_half(
                         {
-                                optixgetimg_extra2("color", w, h).data(),
-                                optixgetimg_extra2("diffuse", w, h).data(),
-                                optixgetimg_extra2("specular", w, h).data(),
-                                optixgetimg_extra2("transmit", w, h).data(),
-                                optixgetimg_extra2("background", w, h).data(),
-                                optixgetimg_extra2("mask", w, h).data(),
+                                optixgetimg_extra3("color", w, h).data(),
+                                optixgetimg_extra3("diffuse", w, h).data(),
+                                optixgetimg_extra3("specular", w, h).data(),
+                                optixgetimg_extra3("transmit", w, h).data(),
+                                optixgetimg_extra3("background", w, h).data(),
+                                optixgetimg_extra3("mask", w, h).data(),
                         },
                         w,
                         h,

From 4c446cb45c93e940d350a487cf34e41996b598fc Mon Sep 17 00:00:00 2001
From: zhouhang95 <765229842@qq.com>
Date: Mon, 24 Jun 2024 19:39:07 +0800
Subject: [PATCH 16/24] improve

---
 zenovis/xinxinoptix/ChiefDesignerEXR.h | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/zenovis/xinxinoptix/ChiefDesignerEXR.h b/zenovis/xinxinoptix/ChiefDesignerEXR.h
index e72966418f..31e270a318 100644
--- a/zenovis/xinxinoptix/ChiefDesignerEXR.h
+++ b/zenovis/xinxinoptix/ChiefDesignerEXR.h
@@ -187,19 +187,10 @@ inline void SaveMultiLayerEXR_half(
     OutputFile file (exrFilePath, header);
     FrameBuffer frameBuffer;
 
-    std::vector<std::vector<half>> data;
-    for (half *rgb: pixels) {
-        std::vector<half> half_rgb(width * height * 3);
-        for (auto i = 0; i < half_rgb.size(); i++) {
-            half_rgb[i] = rgb[i];
-        }
-        data.push_back(std::move(half_rgb));
-    }
-
     for (auto i = 0; i < channels.size(); i++) {
-        frameBuffer.insert (zeno::format("{}R", channels[i]), Slice ( HALF, (char*) &data[i][0], sizeof (half) * 3, sizeof (half) * width * 3));
-        frameBuffer.insert (zeno::format("{}G", channels[i]), Slice ( HALF, (char*) &data[i][1], sizeof (half) * 3, sizeof (half) * width * 3));
-        frameBuffer.insert (zeno::format("{}B", channels[i]), Slice ( HALF, (char*) &data[i][2], sizeof (half) * 3, sizeof (half) * width * 3));
+        frameBuffer.insert (zeno::format("{}R", channels[i]), Slice ( HALF, (char*) &pixels[i][0], sizeof (half) * 3, sizeof (half) * width * 3));
+        frameBuffer.insert (zeno::format("{}G", channels[i]), Slice ( HALF, (char*) &pixels[i][1], sizeof (half) * 3, sizeof (half) * width * 3));
+        frameBuffer.insert (zeno::format("{}B", channels[i]), Slice ( HALF, (char*) &pixels[i][2], sizeof (half) * 3, sizeof (half) * width * 3));
     }
 
     file.setFrameBuffer (frameBuffer);

From 0ba6b63fd9c54cf4bdcc938488e5aebc08c928d9 Mon Sep 17 00:00:00 2001
From: iaomw <iaomw@live.com>
Date: Tue, 25 Jun 2024 16:37:31 +0800
Subject: [PATCH 17/24] fix warning

---
 zenovis/xinxinoptix/LightTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zenovis/xinxinoptix/LightTree.cpp b/zenovis/xinxinoptix/LightTree.cpp
index f4d99ef417..1787d6e0ce 100644
--- a/zenovis/xinxinoptix/LightTree.cpp
+++ b/zenovis/xinxinoptix/LightTree.cpp
@@ -20,7 +20,7 @@ LightTreeSampler::LightTreeSampler(std::vector<GenericLight> &lights) {
         LightBounds lightBounds = light.bounds();
 
         if (lightBounds.phi > 0) {
-            bvhLights.push_back(std::make_pair(i, lightBounds));
+            bvhLights.push_back(std::make_pair((int)i, lightBounds));
             rootBounds = Union(rootBounds, lightBounds.bounds);
         }
     }

From 15fa735d0d8fb3f78c3f8b58605228f3c8a0174b Mon Sep 17 00:00:00 2001
From: iaomw <iaomw@live.com>
Date: Tue, 25 Jun 2024 16:55:49 +0800
Subject: [PATCH 18/24] improve ptx compiling

---
 zenovis/xinxinoptix/OptiXStuff.h        | 38 ++++++------
 zenovis/xinxinoptix/SDK/sutil/sutil.cpp | 81 ++++++-------------------
 zenovis/xinxinoptix/SDK/sutil/sutil.h   |  2 +-
 zenovis/xinxinoptix/optixPathTracer.cpp | 18 ++----
 4 files changed, 44 insertions(+), 95 deletions(-)

diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h
index 6e0dd2a653..4df7b9ea61 100644
--- a/zenovis/xinxinoptix/OptiXStuff.h
+++ b/zenovis/xinxinoptix/OptiXStuff.h
@@ -76,6 +76,19 @@ inline raii<OptixProgramGroup>              radiance_miss_group      ;
 inline raii<OptixProgramGroup>              occlusion_miss_group     ;
 inline bool isPipelineCreated = false;
 ////end material independent stuffs
+
+inline static auto DefaultCompileOptions() {
+    OptixModuleCompileOptions module_compile_options = {};
+#if defined( NDEBUG )
+    module_compile_options.optLevel   = OPTIX_COMPILE_OPTIMIZATION_DEFAULT;
+    module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
+#else 
+    module_compile_options.optLevel   = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
+    module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
+#endif
+    return module_compile_options;
+}
+
 inline void createContext()
 {
     // Initialize CUDA
@@ -85,7 +98,11 @@ inline void createContext()
     OPTIX_CHECK( optixInit() );
     OptixDeviceContextOptions options = {};
     options.logCallbackFunction       = &context_log_cb;
+#if defined( NDEBUG )
+    options.logCallbackLevel          = 0;
+#else
     options.logCallbackLevel          = 4;
+#endif
     options.validationMode            = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
     OPTIX_CHECK( optixDeviceContextCreate( cu_ctx, &options, &context ) );
     pipeline_compile_options = {};
@@ -98,14 +115,7 @@ inline void createContext()
     pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW | OPTIX_EXCEPTION_FLAG_TRACE_DEPTH | OPTIX_EXCEPTION_FLAG_DEBUG;
     pipeline_compile_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE | OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE | OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
 
-    OptixModuleCompileOptions module_compile_options = {};
-    #if defined( NDEBUG )
-        module_compile_options.optLevel   = OPTIX_COMPILE_OPTIMIZATION_DEFAULT;
-        module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL;
-    #else 
-        module_compile_options.optLevel   = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
-        module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
-    #endif
+    OptixModuleCompileOptions module_compile_options = DefaultCompileOptions();
 
     OptixBuiltinISOptions builtin_is_options {};
 
@@ -169,16 +179,8 @@ static std::vector<char> readData(std::string const& filename)
 
 inline bool createModule(OptixModule &module, OptixDeviceContext &context, const char *source, const char *name, const char *macro=nullptr, tbb::task_group* _c_group = nullptr)
 {
-    OptixModuleCompileOptions module_compile_options = {};
+    OptixModuleCompileOptions module_compile_options = DefaultCompileOptions();
     module_compile_options.maxRegisterCount  = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT;
-#if defined( NDEBUG )
-    module_compile_options.optLevel          = OPTIX_COMPILE_OPTIMIZATION_DEFAULT;
-    module_compile_options.debugLevel        = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL;
-#else
-    module_compile_options.optLevel          = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
-    module_compile_options.debugLevel        = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
-
-#endif
 
     char log[2048];
     size_t sizeof_log = sizeof( log );
@@ -203,7 +205,7 @@ inline bool createModule(OptixModule &module, OptixDeviceContext &context, const
         compilerOptions.push_back(macro);
     }
 
-    const char* input = sutil::getInputData( source, macro, name, inputSize, is_success, nullptr, compilerOptions);
+    const char* input = sutil::getCodePTX( source, macro, name, inputSize, is_success, nullptr, compilerOptions);
 
     if(is_success==false)
     {
diff --git a/zenovis/xinxinoptix/SDK/sutil/sutil.cpp b/zenovis/xinxinoptix/SDK/sutil/sutil.cpp
index c5f72571a6..d8d687724b 100644
--- a/zenovis/xinxinoptix/SDK/sutil/sutil.cpp
+++ b/zenovis/xinxinoptix/SDK/sutil/sutil.cpp
@@ -881,53 +881,12 @@ inline bool getPtxFromCuString( std::string&                    ptx,
                                 const char*                     cu_source,
                                 const char*                     name,
                                 const char**                    log_string,
-                                const std::vector<const char*>& compiler_options)
+                                const std::vector<const char*>& options)
 {
     // Create program
     nvrtcProgram prog;
     NVRTC_CHECK_ERROR( nvrtcCreateProgram( &prog, cu_source, name, getIncFileTab().size(), getIncFileTab().data(), getIncPathTab().data() ) );
 
-    // Gather NVRTC options
-    std::vector<const char*> options;
-
-    //const char *abs_dirs[] = {SAMPLES_ABSOLUTE_INCLUDE_DIRS};
-    //const std::string base_dir = getSampleDir();
-
-    //// Set sample dir as the primary include path
-    //std::string sample_dir;
-    //if( sample_directory )
-    //{
-        //sample_dir = std::string( "-I" ) + base_dir + '/' + sample_directory;
-        //options.push_back( sample_dir.c_str() );
-    //}
-
-    //// Collect include dirs
-    //std::vector<std::string> include_dirs;
-    //const char*              abs_dirs[] = {SAMPLES_ABSOLUTE_INCLUDE_DIRS};
-    //const char*              rel_dirs[] = {SAMPLES_RELATIVE_INCLUDE_DIRS};
-
-    //for( const char* dir : abs_dirs )
-    //{
-        //include_dirs.push_back( std::string( "-I" ) + dir );
-    //}
-    //for( const char* dir : rel_dirs )
-    //{
-        //include_dirs.push_back( "-I" + base_dir + '/' + dir );
-    //}
-    //for( const std::string& dir : include_dirs)
-    //{
-        //options.push_back( dir.c_str() );
-    //}
-    //std::vector<std::string> fuckcpp;
-    //for( const char* dir : abs_dirs )
-    //{
-        //fuckcpp.push_back(std::string( "-I" ) + dir);
-        //options.push_back( fuckcpp.back().c_str() );
-    //}
-    
-    // Collect NVRTC options
-    std::copy( std::begin( compiler_options ), std::end( compiler_options ), std::back_inserter( options ) );
-
     // JIT compile CU to PTX
     const nvrtcResult compileRes = nvrtcCompileProgram( prog, (int)options.size(), options.data() );
 
@@ -1049,13 +1008,13 @@ static void getInputDataFromFile( std::string& ptx, const char* sample_name, con
 
 struct PtxSourceCache
 {
-    std::map<std::string, std::string*> map;
+    std::map< std::string, std::shared_ptr<std::string> > map;
     ~PtxSourceCache()
     {
-        for( std::map<std::string, std::string*>::const_iterator it = map.begin(); it != map.end(); ++it )
-            delete it->second;
+        map = {};
     }
 };
+
 static PtxSourceCache g_ptxSourceCache;
 
 static std::string ridincs(std::string s) {
@@ -1092,37 +1051,31 @@ static const char* getOptixHeader() {
 }
 #endif
 
-const char* getInputData( const char*                     source,
-                          const char*                     macro,
-                          const char*                     name,
-                          size_t&                         dataSize,
-                          bool &                          is_success,
-                          const char**                    log,
-                          const std::vector<const char*>& compilerOptions)
+const char* getCodePTX( const char*                     source,
+                        const char*                     macro,
+                        const char*                     name,
+                        size_t&                         dataSize,
+                        bool &                          is_success,
+                        const char**                    log,
+                        const std::vector<const char*>& compilerOptions)
 {
     if( log )
         *log = NULL;
 
-    std::string *                                 ptx, cu;
-    std::string                                   key  = std::string( source ) + (macro!=nullptr? std::string(macro):"");
-    std::map<std::string, std::string*>::iterator elem = g_ptxSourceCache.map.find( key );
+    std::shared_ptr<std::string> ptx {};
+    std::string key = std::string( source ) + (macro!=nullptr? std::string(macro):"");
 
-    if( elem == g_ptxSourceCache.map.end() )
+    if( g_ptxSourceCache.map.count(key) == 0 )
     {
-        ptx = new std::string();
-#if CUDA_NVRTC_ENABLED
-        //getCuStringFromFile( cu, location, sampleDir, filename );
-        //cu.replace(cu.find("#include <optix.h>\n"), strlen("#include <optix.h>\n"), getOptixHeader());
+        ptx = std::make_shared<std::string>();
         is_success = getPtxFromCuString( *ptx, source, name, log, compilerOptions );
-#else
-        getInputDataFromFile( *ptx, sample, filename );
-#endif
+
         if(is_success==true)
             g_ptxSourceCache.map[key] = ptx;
     }
     else
     {
-        ptx = elem->second;
+        ptx = g_ptxSourceCache.map[key];
         is_success = true;
     }
     dataSize = ptx->size();
diff --git a/zenovis/xinxinoptix/SDK/sutil/sutil.h b/zenovis/xinxinoptix/SDK/sutil/sutil.h
index 107287e83b..6807250940 100644
--- a/zenovis/xinxinoptix/SDK/sutil/sutil.h
+++ b/zenovis/xinxinoptix/SDK/sutil/sutil.h
@@ -144,7 +144,7 @@ SUTILAPI void calculateCameraVariables(
 double SUTILAPI currentTime();
 
 // Get input data, either pre-compiled with NVCC or JIT compiled by NVRTC.
-SUTILAPI const char* getInputData( const char* source,
+SUTILAPI const char* getCodePTX( const char* source,
                                    const char* macro,
                                    const char* name,
                                    size_t&     dataSize,
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index ad38fddad9..8cd1bf4861 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -709,9 +709,7 @@ static void launchSubframe( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Path
                     ) );
 
         //CUDA_SYNC_CHECK();
-
-            /* printf("mama%d\n", std::this_thread::get_id()); */
-            /* fflush(stdout); */
+        
         OPTIX_CHECK( optixLaunch(
                     state.pipeline,
                     0,
@@ -2626,7 +2624,7 @@ void optixupdatematerial(std::vector<std::shared_ptr<ShaderPrepared>> &shaders)
 
             auto shaderCore = std::make_shared<OptixUtil::OptixShaderCore>(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout");
             shaderCore->moduleIS = &OptixUtil::sphere_module;
-            shaderCore->loadProgram(0, "--define-macro=_SPHERE_");
+            shaderCore->loadProgram(1, "--define-macro=_SPHERE_");
             shaderCoreLUT.emplace(std::tuple{"DeflMatShader.cu", ShaderMaker::Sphere}, shaderCore);
         });
 
@@ -2634,16 +2632,12 @@ void optixupdatematerial(std::vector<std::shared_ptr<ShaderPrepared>> &shaders)
             auto shader_string = sutil::lookupIncFile("Light.cu");
 
             auto shaderCore = std::make_shared<OptixUtil::OptixShaderCore>(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout");
-            shaderCore->loadProgram(0);
+            shaderCore->loadProgram(2);
             shaderCoreLUT.emplace(std::tuple{"Light.cu", ShaderMaker::Mesh}, shaderCore);
-        });
-
-        OptixUtil::_compile_group.run([&] () {
-            auto shader_string = sutil::lookupIncFile("Light.cu");
 
-            auto shaderCore = std::make_shared<OptixUtil::OptixShaderCore>(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout");
+            shaderCore = std::make_shared<OptixUtil::OptixShaderCore>(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout");
             shaderCore->moduleIS = &OptixUtil::sphere_module;
-            shaderCore->loadProgram(0);
+            shaderCore->loadProgram(3);
             shaderCoreLUT.emplace(std::tuple{"Light.cu", ShaderMaker::Sphere}, shaderCore);
         });
 
@@ -2652,7 +2646,7 @@ void optixupdatematerial(std::vector<std::shared_ptr<ShaderPrepared>> &shaders)
 
             auto shaderCore = std::make_shared<OptixUtil::OptixShaderCore>(shader_string, 
                                                     "__closesthit__radiance_volume", "__anyhit__occlusion_volume", "__intersection__volume");
-            shaderCore->loadProgram(0);
+            shaderCore->loadProgram(4);
             shaderCoreLUT.emplace(std::tuple{"volume.cu", ShaderMaker::Volume}, shaderCore);
         });
 

From 6942696b7ab0815c62997b0409ccbd98e21fbb08 Mon Sep 17 00:00:00 2001
From: iaomw <iaomw@live.com>
Date: Tue, 25 Jun 2024 17:38:30 +0800
Subject: [PATCH 19/24] reduce optix debug level

---
 zenovis/xinxinoptix/OptiXStuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h
index 4df7b9ea61..310d015263 100644
--- a/zenovis/xinxinoptix/OptiXStuff.h
+++ b/zenovis/xinxinoptix/OptiXStuff.h
@@ -84,7 +84,7 @@ inline static auto DefaultCompileOptions() {
     module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
 #else 
     module_compile_options.optLevel   = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
-    module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
+    module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MODERATE;
 #endif
     return module_compile_options;
 }

From 9ba5ef443c345075f9eb1ab12f2cc67168f927b5 Mon Sep 17 00:00:00 2001
From: iaomw <iaomw@live.com>
Date: Tue, 25 Jun 2024 18:22:47 +0800
Subject: [PATCH 20/24] fix crash for debug build

---
 zenovis/xinxinoptix/optixPathTracer.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 08795f21d0..6575430c19 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -3961,11 +3961,17 @@ void optixCleanup() {
     state.params.sky_strength = 1.0f;
     state.params.sky_texture;
 
-    for (auto& [k, v] : OptixUtil::g_tex) {
+    std::vector<std::string> keys;
+
+    for (auto& [k, _] : OptixUtil::g_tex) {
         if (k != OptixUtil::default_sky_tex) {
-            OptixUtil::removeTexture(k);
+            keys.push_back(k);
         }
     }
+
+    for (auto& k : keys) {
+        OptixUtil::removeTexture(k);
+    }
    
     OptixUtil::sky_tex = OptixUtil::default_sky_tex;
 

From ea8f963b2aba93622a8e5ddca2e223e363769e87 Mon Sep 17 00:00:00 2001
From: Zhou Hang <765229842@qq.com>
Date: Mon, 1 Jul 2024 14:51:49 +0800
Subject: [PATCH 21/24] Improve camera (#1952)

* camera quat

* improve

* roll

* roll ok

* work

* use quat

* rot

* translate

* focus

* refactor

* remove ZxxHappyLookParam

* refactor

* CameraLookToDir

* refactor

* getDepth

* some bug

* fix

* depth-aware

* mouse wheel

* glDepthMask

* snap surface

* blank rot

* blank dolly

* depth-aware button

* fix camera move

* First Person Navigation: movement speed

* statusbarShowMessage

* refactor

* wasd move step

* FPN remove roll

* auto flip

* refactor

* click pos

* pos channel

* optix click pos

* exchange ctrl alt

* Revert "optix click pos"

This reverts commit 7bdc1533e8fd35c3c420b2c66531eb8cebc26c33.

* Revert "pos channel"

This reverts commit 835cfec569640d33b340db590e2b6cab0caec61b.

* pos channel 2

* optix click pos 2

* refactor

* camera reset

* remove view proj matrix
---
 projects/FBX/MayaCamera.cpp                   |  54 ---
 ui/zenoedit/dock/docktabcontent.cpp           |  29 ++
 ui/zenoedit/dock/docktabcontent.h             |   3 +
 ui/zenoedit/nodesys/cameranode.cpp            |  22 +-
 ui/zenoedit/viewport/cameracontrol.cpp        | 431 +++++++++---------
 ui/zenoedit/viewport/cameracontrol.h          |  37 +-
 ui/zenoedit/viewport/displaywidget.cpp        |  23 +-
 ui/zenoedit/viewport/displaywidget.h          |   2 +-
 ui/zenoedit/viewport/optixviewport.cpp        |  16 +-
 ui/zenoedit/viewport/optixviewport.h          |   2 +-
 ui/zenoedit/viewport/viewportwidget.cpp       |  16 +-
 ui/zenoedit/viewport/viewportwidget.h         |   3 +-
 ui/zenoedit/viewport/zenovis.cpp              |   1 -
 ui/zenoedit/viewport/zoptixviewport.cpp       |  16 +-
 ui/zenoedit/viewport/zoptixviewport.h         |   3 +-
 ui/zenoedit/viewportinteraction/transform.cpp |  22 +-
 ui/zenoedit/viewportinteraction/transform.h   |  10 +-
 ui/zenoedit/zenomainwindow.cpp                |   4 +
 ui/zenoedit/zenomainwindow.h                  |   1 +
 zeno/include/zeno/types/CameraObject.h        |  22 +-
 zeno/src/nodes/CameraNodes.cpp                |  48 ++
 zenovis/include/zenovis/Camera.h              |  91 ++--
 zenovis/include/zenovis/RenderEngine.h        |   1 +
 .../include/zenovis/bate/FrameBufferRender.h  |  43 +-
 zenovis/src/Camera.cpp                        | 106 +----
 zenovis/src/bate/GraphicPrimitive.cpp         |   2 +-
 zenovis/src/bate/GraphicRotateHandler.cpp     |   2 +-
 zenovis/src/bate/GraphicScaleHandler.cpp      |   6 +-
 zenovis/src/bate/GraphicTransHandler.cpp      |   2 +-
 zenovis/src/bate/HudGraphicGrid.cpp           |   6 +-
 zenovis/src/bate/RenderEngineBate.cpp         |  27 +-
 zenovis/src/optx/RenderEngineOptx.cpp         |  19 +-
 zenovis/src/zhxx/RenderEngineZhxx.cpp         |   5 +-
 zenovis/xinxinoptix/DeflMatShader.cu          |   1 +
 zenovis/xinxinoptix/PTKernel.cu               |   5 +-
 zenovis/xinxinoptix/TraceStuff.h              |   1 +
 zenovis/xinxinoptix/optixPathTracer.cpp       |  30 +-
 zenovis/xinxinoptix/optixPathTracer.h         |   1 +
 zenovis/xinxinoptix/xinxinoptixapi.h          |   2 +
 39 files changed, 610 insertions(+), 505 deletions(-)

diff --git a/projects/FBX/MayaCamera.cpp b/projects/FBX/MayaCamera.cpp
index f5fc3cb4ed..e100cdec00 100644
--- a/projects/FBX/MayaCamera.cpp
+++ b/projects/FBX/MayaCamera.cpp
@@ -5,7 +5,6 @@
 #include <zeno/utils/log.h>
 
 #include <zeno/zeno.h>
-#include <zeno/utils/eulerangle.h>
 #include <zeno/utils/logger.h>
 #include <zeno/extra/GlobalState.h>
 #include <zeno/types/NumericObject.h>
@@ -32,7 +31,6 @@
 #include <glm/gtx/matrix_decompose.hpp>
 
 #include <fstream>
-#include <regex>
 
 #define SET_CAMERA_DATA                         \
     out_pos = (n->pos);                       \
@@ -88,58 +86,6 @@ ZENO_DEFNODE(CihouMayaCameraFov)({
     {"FBX"},
 });
 
-struct CameraNode: zeno::INode{
-    virtual void apply() override {
-        auto camera = std::make_unique<zeno::CameraObject>();
-
-        camera->pos = get_input2<zeno::vec3f>("pos");
-        camera->up = get_input2<zeno::vec3f>("up");
-        camera->view = get_input2<zeno::vec3f>("view");
-        camera->fov = get_input2<float>("fov");
-        camera->aperture = get_input2<float>("aperture");
-        camera->focalPlaneDistance = get_input2<float>("focalPlaneDistance");
-        camera->userData().set2("frame", get_input2<float>("frame"));
-
-        auto other_props = get_input2<std::string>("other");
-        std::regex reg(",");
-        std::sregex_token_iterator p(other_props.begin(), other_props.end(), reg, -1);
-        std::sregex_token_iterator end;
-        std::vector<float> prop_vals;
-        while (p != end) {
-            prop_vals.push_back(std::stof(*p));
-            p++;
-        }
-        if (prop_vals.size() == 6) {
-            camera->isSet = true;
-            camera->center = {prop_vals[0], prop_vals[1], prop_vals[2]};
-            camera->theta = prop_vals[3];
-            camera->phi = prop_vals[4];
-            camera->radius = prop_vals[5];
-        }
-
-        set_output("camera", std::move(camera));
-    }
-};
-
-ZENO_DEFNODE(CameraNode)({
-    {
-        {"vec3f", "pos", "0,0,5"},
-        {"vec3f", "up", "0,1,0"},
-        {"vec3f", "view", "0,0,-1"},
-        {"float", "fov", "45"},
-        {"float", "aperture", "11"},
-        {"float", "focalPlaneDistance", "2.0"},
-        {"string", "other", ""},
-        {"int", "frame", "0"},
-    },
-    {
-        {"CameraObject", "camera"},
-    },
-    {
-    },
-    {"FBX"},
-});
-
 struct CameraEval: zeno::INode {
 
     glm::quat to_quat(zeno::vec3f up, zeno::vec3f view){
diff --git a/ui/zenoedit/dock/docktabcontent.cpp b/ui/zenoedit/dock/docktabcontent.cpp
index f034cf41d9..8e679b65d0 100644
--- a/ui/zenoedit/dock/docktabcontent.cpp
+++ b/ui/zenoedit/dock/docktabcontent.cpp
@@ -904,6 +904,18 @@ void DockContent_View::initToolbar(QHBoxLayout* pToolLayout)
         pToolLayout->addWidget(m_camera_setting);
     }
 
+    {
+        pToolLayout->addWidget(new ZLineWidget(false, QColor("#121416")));
+        m_depth = new QCheckBox(tr("Depth"));
+        m_depth->setStyleSheet("color: white;");
+        m_depth->setCheckState(Qt::Checked);
+        pToolLayout->addWidget(m_depth);
+        m_FPN = new QCheckBox(tr("FPN"));
+        m_FPN->setStyleSheet("color: white;");
+        pToolLayout->addWidget(m_FPN);
+        m_Reset = new QPushButton(tr("Reset"));
+        pToolLayout->addWidget(m_Reset);
+    }
     pToolLayout->addWidget(new ZLineWidget(false, QColor("#121416")));
     pToolLayout->addWidget(m_screenshoot);
     pToolLayout->addWidget(m_recordVideo);
@@ -941,6 +953,16 @@ void DockContent_View::initConnections()
         });
     }
 
+    connect(m_depth, &QCheckBox::stateChanged, this, [=](int state) {
+        bool bChecked = (state == Qt::Checked);
+        zeno::getSession().userData().set2("viewport-depth-aware-navigation", bChecked);
+    });
+
+    connect(m_FPN, &QCheckBox::stateChanged, this, [=](int state) {
+        bool bChecked = (state == Qt::Checked);
+        zeno::getSession().userData().set2("viewport-FPN-navigation", bChecked);
+    });
+
     if (m_camera_setting) {
         connect(m_camera_setting, &QPushButton::clicked, this, [=](bool bToggled) {
             zenovis::ZOptixCameraSettingInfo info = m_pDisplay->getCamera();
@@ -953,6 +975,13 @@ void DockContent_View::initConnections()
             }
         });
     }
+    if (m_Reset) {
+        connect(m_Reset, &QPushButton::clicked, this, [=](bool bToggled) {
+            auto *scene = m_pDisplay->getZenoVis()->getSession()->get_scene();
+            scene->camera->reset();
+            m_pDisplay->updateFrame();
+        });
+    }
 
     connect(m_smooth_shading, &ZToolBarButton::toggled, this, [=](bool bToggled) {
         m_pDisplay->onCommandDispatched(ZenoMainWindow::ACTION_SMOOTH_SHADING, bToggled);
diff --git a/ui/zenoedit/dock/docktabcontent.h b/ui/zenoedit/dock/docktabcontent.h
index 5a59ae87c1..c173ca61fa 100644
--- a/ui/zenoedit/dock/docktabcontent.h
+++ b/ui/zenoedit/dock/docktabcontent.h
@@ -162,6 +162,9 @@ class DockContent_View : public DockToolbarWidget
     QPushButton *m_camera_setting = nullptr;
     QCheckBox *m_background;
     QCheckBox *m_uv_mode = nullptr;
+    QCheckBox *m_depth = nullptr;
+    QCheckBox *m_FPN = nullptr;
+    QPushButton *m_Reset = nullptr;
 
     QComboBox* m_cbRes;
     QAction* m_pFocus;
diff --git a/ui/zenoedit/nodesys/cameranode.cpp b/ui/zenoedit/nodesys/cameranode.cpp
index 7194cd518c..c4eb08d78a 100644
--- a/ui/zenoedit/nodesys/cameranode.cpp
+++ b/ui/zenoedit/nodesys/cameranode.cpp
@@ -86,21 +86,23 @@ void CameraNode::onEditClicked()
         auto camera = *(scene->camera.get());
 
         INPUT_SOCKET pos = inputs["pos"];
-        vec = {camera.m_lodcenter[0], camera.m_lodcenter[1], camera.m_lodcenter[2]};
+        vec = {camera.m_pos[0], camera.m_pos[1], camera.m_pos[2]};
         info.name = "pos";
         info.oldValue = pos.info.defaultValue;
         info.newValue = QVariant::fromValue(vec);
         pModel->updateSocketDefl(nodeid, info, this->subgIndex(), true);
 
+        auto m_lodup = camera.get_lodup();
+        auto m_lodfront = camera.get_lodfront();
         INPUT_SOCKET up = inputs["up"];
-        vec = {camera.m_lodup[0], camera.m_lodup[1], camera.m_lodup[2]};
+        vec = {m_lodup[0], m_lodup[1], m_lodup[2]};
         info.name = "up";
         info.oldValue = up.info.defaultValue;
         info.newValue = QVariant::fromValue(vec);
         pModel->updateSocketDefl(nodeid, info, this->subgIndex(), true);
 
         INPUT_SOCKET view = inputs["view"];
-        vec = {camera.m_lodfront[0], camera.m_lodfront[1], camera.m_lodfront[2]};
+        vec = {m_lodfront[0], m_lodfront[1], m_lodfront[2]};
         info.name = "view";
         info.oldValue = view.info.defaultValue;
         info.newValue = QVariant::fromValue(vec);
@@ -137,11 +139,11 @@ void CameraNode::onEditClicked()
 
             INPUT_SOCKET other = inputs["other"];
             std::string other_prop;
-            auto center = camera.m_center;
+            auto center = camera.m_pivot;
             other_prop += zeno::format("{},{},{},", center[0], center[1], center[2]);
-            other_prop += zeno::format("{},", camera.m_theta);
-            other_prop += zeno::format("{},", camera.m_phi);
-            other_prop += zeno::format("{},", camera.m_radius);
+            other_prop += zeno::format("{},", 0);
+            other_prop += zeno::format("{},", 0);
+            other_prop += zeno::format("{},", camera.get_radius());
             info.name = "other";
             info.oldValue = other.info.defaultValue;
             info.newValue = QVariant::fromValue(QString(other_prop.c_str()));
@@ -191,10 +193,10 @@ void LightNode::onEditClicked(){
     PARAM_UPDATE_INFO info;
 
     auto camera = *(scene->camera.get());
-    auto original_pos = glm::vec3(camera.m_lodcenter);
+    auto original_pos = glm::vec3(camera.m_pos);
 //    auto pos = glm::normalize(glm::vec3(camProp[0], camProp[1], camProp[2]));
-    auto view = -1.0f * glm::normalize(camera.m_lodfront);
-    auto up = glm::normalize(camera.m_lodup);
+    auto view = -1.0f * glm::normalize(camera.get_lodfront());
+    auto up = glm::normalize(camera.get_lodup());
     auto right = glm::normalize(glm::cross(up, view));
 
     glm::mat3 rotation(right, up, view);
diff --git a/ui/zenoedit/viewport/cameracontrol.cpp b/ui/zenoedit/viewport/cameracontrol.cpp
index 3ee954ef05..6919ba8126 100644
--- a/ui/zenoedit/viewport/cameracontrol.cpp
+++ b/ui/zenoedit/viewport/cameracontrol.cpp
@@ -7,6 +7,8 @@
 #include "nodesview/zenographseditor.h"
 #include <zeno/types/UserData.h>
 #include "settings/zenosettingsmanager.h"
+#include "glm/gtx/quaternion.hpp"
+#include "zeno/core/Session.h"
 #include <cmath>
 
 
@@ -32,38 +34,29 @@ void CameraControl::setRes(QVector2D res) {
     m_res = res;
 }
 
-float CameraControl::getRoll() const {
+glm::vec3 CameraControl::getPos() const {
     auto *scene = m_zenovis->getSession()->get_scene();
-    return scene->camera->m_roll;
+    return scene->camera->getPos();
 }
-void CameraControl::setRoll(float roll) {
+void CameraControl::setPos(glm::vec3 value) {
     auto *scene = m_zenovis->getSession()->get_scene();
-    scene->camera->m_roll = roll;
+    scene->camera->setPos(value);
 }
-
-float CameraControl::getTheta() const {
-    auto *scene = m_zenovis->getSession()->get_scene();
-    return scene->camera->m_theta;
-}
-void CameraControl::setTheta(float theta) {
-    auto *scene = m_zenovis->getSession()->get_scene();
-    scene->camera->m_theta = theta;
-}
-float CameraControl::getPhi() const {
+glm::vec3 CameraControl::getPivot() const {
     auto *scene = m_zenovis->getSession()->get_scene();
-    return scene->camera->m_phi;
+    return scene->camera->getPivot();
 }
-void CameraControl::setPhi(float phi) {
+void CameraControl::setPivot(glm::vec3 value) {
     auto *scene = m_zenovis->getSession()->get_scene();
-    scene->camera->m_phi = phi;
+    scene->camera->setPivot(value);
 }
-zeno::vec3f CameraControl::getCenter() const {
+glm::quat CameraControl::getRotation() {
     auto *scene = m_zenovis->getSession()->get_scene();
-    return scene->camera->m_center;
+    return scene->camera->m_rotation;
 }
-void CameraControl::setCenter(zeno::vec3f center) {
+void CameraControl::setRotation(glm::quat value) {
     auto *scene = m_zenovis->getSession()->get_scene();
-    scene->camera->m_center = center;
+    scene->camera->m_rotation = value;
 }
 bool CameraControl::getOrthoMode() const {
     auto *scene = m_zenovis->getSession()->get_scene();
@@ -75,11 +68,7 @@ void CameraControl::setOrthoMode(bool orthoMode) {
 }
 float CameraControl::getRadius() const {
     auto *scene = m_zenovis->getSession()->get_scene();
-    return scene->camera->m_radius;
-}
-void CameraControl::setRadius(float radius) {
-    auto *scene = m_zenovis->getSession()->get_scene();
-    scene->camera->m_radius = radius;
+    return scene->camera->get_radius();
 }
 
 float CameraControl::getFOV() const {
@@ -115,40 +104,26 @@ void CameraControl::fakeMousePressEvent(QMouseEvent *event)
     auto scene = m_zenovis->getSession()->get_scene();
     if (event->button() == Qt::MiddleButton) {
         middle_button_pressed = true;
+        if (zeno::getSession().userData().get2<bool>("viewport-depth-aware-navigation", true)) {
+            m_hit_posWS = scene->renderMan->getEngine()->getClickedPos(event->x(), event->y());
+            if (m_hit_posWS.has_value()) {
+                scene->camera->setPivot(m_hit_posWS.value());
+            }
+        }
     }
     auto m_picker = this->m_picker.lock();
     auto m_transformer = this->m_transformer.lock();
-    if (scene->camera->m_need_sync) {
-        scene->camera->m_need_sync = false;
-        if (bool(m_picker) && scene->camera->m_auto_radius) {
-            m_picker->set_picked_depth_callback([&] (float depth, int x, int y) {
-                if (depth < 0.001f) {
-                    return;
-                }
-                glm::vec4 ndc = {0, 0, depth, 1};
-                glm::vec4 posCS = glm::inverse(scene->camera->m_proj) * ndc;
-                glm::vec4 posVS = posCS / posCS.w;
-                glm::vec4 pWS = glm::inverse(scene->camera->m_view) * posVS;
-                glm::vec3 p3WS = glm::vec3(pWS.x, pWS.y, pWS.z);
-                setRadius(glm::length(scene->camera->m_lodcenter - p3WS));
-                setCenter({p3WS.x, p3WS.y, p3WS.z});
-            });
-            int mid_x = int(this->res().x() * 0.5);
-            int mid_y = int(this->res().y() * 0.5);
-            m_picker->pick_depth(mid_x, mid_y);
-        }
-    }
     int button = Qt::NoButton;
     ZenoSettingsManager& settings = ZenoSettingsManager::GetInstance();
     settings.getViewShortCut(ShortCut_MovingView, button);
     settings.getViewShortCut(ShortCut_RotatingView, button);
     bool bTransform = false;
-    auto front = scene->camera->m_lodfront;
-    auto dir = screenToWorldRay(event->x() / res().x(), event->y() / res().y());
+    auto front = scene->camera->get_lodfront();
+    auto dir = screenPosToRayWS(event->x() / res().x(), event->y() / res().y());
     if (m_transformer)
     {
         if (event->buttons() & Qt::LeftButton && !scene->selected.empty() && m_transformer->isTransformMode() &&
-            m_transformer->clickedAnyHandler(realPos(), dir, front))
+            m_transformer->clickedAnyHandler(getPos(), dir, front))
         {
             bTransform = true;
         }
@@ -165,62 +140,24 @@ void CameraControl::fakeMousePressEvent(QMouseEvent *event)
     }
 }
 
-void CameraControl::lookTo(int dir) {
-    if (dir < 0 || dir > 6)
-        return;
-    auto x_axis = QVector3D(1, 0, 0);
-    auto y_axis = QVector3D(0, 1, 0);
-    auto z_axis = QVector3D(0, 0, 1);
-
+void CameraControl::lookTo(zenovis::CameraLookToDir dir) {
     ZASSERT_EXIT(m_zenovis);
-    auto c = getCenter();
-    QVector3D center = {c[0], c[1], c[2]};
-    auto radius = getRadius();
 
     switch (dir) {
-    case 0:
-        // front view
-        setTheta(0);
-        setPhi(0);
-        m_zenovis->updateCameraFront(center + z_axis * radius, -z_axis, y_axis);
+    case zenovis::CameraLookToDir::front_view:
         break;
-    case 1:
-        // right view
-        setTheta(0);
-        setPhi(-glm::pi<float>() / 2);
-        m_zenovis->updateCameraFront(center + x_axis * radius, -x_axis, y_axis);
+    case zenovis::CameraLookToDir::right_view:
         break;
-    case 2:
-        // top view
-        setTheta(-glm::pi<float>() / 2);
-        setPhi(0);
-        m_zenovis->updateCameraFront(center + y_axis * radius, -z_axis, y_axis);
+    case zenovis::CameraLookToDir::top_view:
         break;
-    case 3:
-        // back view
-        setTheta(0);
-        setPhi(glm::pi<float>());
-        m_zenovis->updateCameraFront(center - z_axis * radius, z_axis, y_axis);
+    case zenovis::CameraLookToDir::back_view:
         break;
-    case 4:
-        // left view
-        setTheta(0);
-        setPhi(glm::pi<float>() / 2);
-        m_zenovis->updateCameraFront(center - x_axis * radius, x_axis, y_axis);
+    case zenovis::CameraLookToDir::left_view:
         break;
-    case 5:
-        // bottom view
-        setTheta(glm::pi<float>() / 2);
-        setPhi(0);
-        m_zenovis->updateCameraFront(center - y_axis * radius, y_axis, z_axis);
+    case zenovis::CameraLookToDir::bottom_view:
+        break;
+    case zenovis::CameraLookToDir::back_to_origin:
         break;
-    case 6:
-        // back to origin
-        setCenter({0, 0, 0});
-        setRadius(5);
-        setTheta(0);
-        setPhi(0);
-        m_zenovis->updateCameraFront(center, -z_axis, y_axis);
     default: break;
     }
     setOrthoMode(true);
@@ -311,37 +248,37 @@ void CameraControl::fakeMouseMoveEvent(QMouseEvent *event)
     if (m_transformer) {
         bTransform = m_transformer->isTransforming();
         // check if hover a handler
-        auto front = scene->camera->m_lodfront;
-        auto dir = screenToWorldRay(event->x() / res().x(), event->y() / res().y());
+        auto front = scene->camera->get_lodfront();
+        auto dir = screenPosToRayWS(event->x() / res().x(), event->y() / res().y());
         if (!scene->selected.empty() && !(event->buttons() & Qt::LeftButton)) {
-            m_transformer->hoveredAnyHandler(realPos(), dir, front);
+            m_transformer->hoveredAnyHandler(getPos(), dir, front);
         }
     }
 
-    if (!bTransform && ctrl_pressed && (event->buttons() & Qt::MiddleButton)) {
-        float ratio = QApplication::desktop()->devicePixelRatio();
-        float dx = xpos - m_lastMidButtonPos.x(), dy = ypos - m_lastMidButtonPos.y();
-        dx *= ratio / m_res[0];
-        dy *= ratio / m_res[1];
-        float cos_t = cos(getTheta());
-        float sin_t = sin(getTheta());
-        float cos_p = cos(getPhi());
-        float sin_p = sin(getPhi());
-        QVector3D back(cos_t * sin_p, sin_t, -cos_t * cos_p);
-        QVector3D delta = -back * dy;
-        auto c = getCenter();
-        QVector3D center = {c[0], c[1], c[2]};
-        center += delta * getRadius();
-        setCenter({float(center.x()), float(center.y()), float(center.z())});
+    if (!bTransform && alt_pressed && (event->buttons() & Qt::MiddleButton)) {
+        // zoom
+        if (zeno::getSession().userData().get2<bool>("viewport-FPN-navigation", false) == false) {
+            float dy = ypos - m_lastMidButtonPos.y();
+            auto step = 0.99f;
+            float scale = glm::pow(step, -dy);
+            auto pos = getPos();
+            auto pivot = getPivot();
+            auto new_pos = (pos - pivot) * scale + pivot;
+            setPos(new_pos);
+        }
         m_lastMidButtonPos = QPointF(xpos, ypos);
     }
-    else if (!bTransform && alt_pressed && (event->buttons() & Qt::MiddleButton)) {
+    else if (!bTransform && ctrl_pressed && (event->buttons() & Qt::MiddleButton)) {
+        // rot roll
+        float step = 1.0f;
         float ratio = QApplication::desktop()->devicePixelRatio();
         float dy = ypos - m_lastMidButtonPos.y();
-        dy *= ratio / m_res[1];
-        float roll = getRoll();
-        roll += dy;
-        setRoll(roll);
+        dy *= ratio / m_res[1] * step;
+        {
+            auto rot = getRotation();
+            rot = rot * glm::angleAxis(dy, glm::vec3(0, 0, 1));
+            setRotation(rot);
+        }
         m_lastMidButtonPos = QPointF(xpos, ypos);
     }
     else if (!bTransform && (event->buttons() & (rotateButton | moveButton))) {
@@ -352,41 +289,61 @@ void CameraControl::fakeMouseMoveEvent(QMouseEvent *event)
         //bool shift_pressed = event->modifiers() & Qt::ShiftModifier;
         Qt::KeyboardModifiers modifiers = event->modifiers();
         if ((moveKey == modifiers) && (event->buttons() & moveButton)) {
-            float cos_t = cos(getTheta());
-            float sin_t = sin(getTheta());
-            float cos_p = cos(getPhi());
-            float sin_p = sin(getPhi());
-            QVector3D back(cos_t * sin_p, sin_t, -cos_t * cos_p);
-            QVector3D up(-sin_t * sin_p, cos_t, sin_t * cos_p);
-            QVector3D right = QVector3D::crossProduct(up, back);
-            up = QVector3D::crossProduct(back, right);
-            right.normalize();
-            up.normalize();
-            QVector3D delta = right * dx + up * dy;
-            auto c = getCenter();
-            QVector3D center = {c[0], c[1], c[2]};
-            if (getOrthoMode()) {
-                delta = (right * dx * m_res[0] / m_res[1] + up * dy) * 2;
-            }
-            center += delta * getRadius();
-            setCenter({float(center.x()), float(center.y()), float(center.z())});
-        } else if ((rotateKey == modifiers) && (event->buttons() & rotateButton)) {
-            setOrthoMode(false);
-            setTheta(getTheta() - dy * M_PI);
-            if (int(abs(getTheta()) / M_PI) % 2 == 0) {
-                if (glm::fract(abs(getTheta()) / M_PI) < 0.5) {
-                    setPhi(getPhi() + dx * M_PI);
-                }
-                else {
-                    setPhi(getPhi() - dx * M_PI);
+            // translate
+            if (m_hit_posWS.has_value()) {
+                auto ray = screenPosToRayWS(event->x() / res().x(), event->y() / res().y());
+                auto new_pos = intersectRayPlane(m_hit_posWS.value(), ray * (-1.0f), getPos(), getViewDir());
+                if (new_pos.has_value()) {
+                    auto diff = new_pos.value() - getPos();
+                    setPivot(getPivot() + diff);
+                    setPos(new_pos.value());
                 }
             }
             else {
-                if (glm::fract(abs(getTheta()) / M_PI) < 0.5) {
-                    setPhi(getPhi() - dx * M_PI);
+                auto left = getRightDir() * -1.0f;
+                auto up = getUpDir();
+                auto delta = left * dx + up * dy;
+                if (getOrthoMode()) {
+                    delta = (left * dx * float(m_res[0]) / float(m_res[1]) + up * dy) * 2.0f;
+                }
+                auto diff = delta * getRadius();
+                setPivot(getPivot() + diff);
+                auto new_pos = getPos() + diff;
+                setPos(new_pos);
+            }
+        } else if ((rotateKey == modifiers) && (event->buttons() & rotateButton)) {
+            float step = 4.0f;
+            dx *= step;
+            if (getUpDir().y < 0) {
+                dx *= -1;
+            }
+            dy *= step;
+            // rot yaw pitch
+            setOrthoMode(false);
+            {
+                auto rot = getRotation();
+                auto beforeMat = glm::toMat3(rot);
+                rot = glm::angleAxis(-dx, glm::vec3(0, 1, 0)) * rot;
+                rot = rot * glm::angleAxis(-dy, glm::vec3(1, 0, 0));
+                setRotation(rot);
+                auto afterMat = glm::toMat3(rot);
+                if (zeno::getSession().userData().get2<bool>("viewport-FPN-navigation", false)) {
+                    if (glm::abs(glm::dot(getRightDir(), {0, 1, 0})) > 0.01) {
+                        auto right_dir = glm::cross(getViewDir(), {0, 1, 0});
+                        auto up_dir = glm::cross(right_dir, getViewDir());
+                        glm::mat3 rotation;
+                        rotation[0] = right_dir;
+                        rotation[1] = up_dir;
+                        rotation[2] = -getViewDir();
+                        setRotation(glm::quat_cast(rotation));
+                    };
+                    setPivot(getPos());
                 }
                 else {
-                    setPhi(getPhi() + dx * M_PI);
+                    auto pos = getPos();
+                    auto pivot = getPivot();
+                    auto new_pos = afterMat * glm::inverse(beforeMat) * (pos - pivot) + pivot;
+                    setPos(new_pos);
                 }
             }
         }
@@ -395,8 +352,7 @@ void CameraControl::fakeMouseMoveEvent(QMouseEvent *event)
         if (m_transformer)
         {
             if (m_transformer->isTransforming()) {
-                auto dir = screenToWorldRay(event->pos().x() / res().x(), event->pos().y() / res().y());
-                auto camera_pos = realPos();
+                auto dir = screenPosToRayWS(event->pos().x() / res().x(), event->pos().y() / res().y());
 
                 // mouse pos
                 auto mouse_pos = glm::vec2(xpos, ypos);
@@ -407,8 +363,8 @@ void CameraControl::fakeMouseMoveEvent(QMouseEvent *event)
                 mouse_start[0] = (2 * mouse_start[0] / res().x()) - 1;
                 mouse_start[1] = 1 - (2 * mouse_start[1] / res().y());
 
-                auto vp = scene->camera->m_proj * scene->camera->m_view;
-                m_transformer->transform(camera_pos, dir, mouse_start, mouse_pos, scene->camera->m_lodfront, vp);
+                auto vp = scene->camera->get_proj_matrix() * scene->camera->get_view_matrix();
+                m_transformer->transform(getPos(), dir, mouse_start, mouse_pos, scene->camera->get_lodfront(), vp);
                 zenoApp->getMainWindow()->updateViewport();
             } else {
                 float min_x = std::min((float)m_boundRectStartPos.x(), (float)event->x()) / m_res.x();
@@ -462,7 +418,53 @@ void CameraControl::fakeWheelEvent(QWheelEvent *event) {
         float temp = getDisPlane() + delta * 0.05;
         setDisPlane(temp >= 0.05 ? temp : 0.05);
     } else if (scaleKey == 0 || event->modifiers() & scaleKey){
-        setRadius(getRadius() * scale);
+        if (zeno::getSession().userData().get2<bool>("viewport-FPN-navigation", false)) {
+            auto FPN_move_speed = zeno::getSession().userData().get2<int>("viewport-FPN-move-speed", 0);
+            FPN_move_speed += dy > 0? 1: -1;
+            zeno::getSession().userData().set2("viewport-FPN-move-speed", FPN_move_speed);
+            auto pMainWindow = zenoApp->getMainWindow();
+            if (pMainWindow) {
+                pMainWindow->statusbarShowMessage(zeno::format("First Person Navigation: movement speed level: {}", FPN_move_speed), 10000);
+            }
+        }
+        else {
+            auto pos = getPos();
+            if (zeno::getSession().userData().get2<bool>("viewport-depth-aware-navigation", true)) {
+                auto session = m_zenovis->getSession();
+                auto scene = session->get_scene();
+                auto hit_posWS = scene->renderMan->getEngine()->getClickedPos(event->x(), event->y());
+                if (hit_posWS.has_value()) {
+                    auto pivot = hit_posWS.value();
+                    auto new_pos = (pos - pivot) * scale + pivot;
+                    setPos(new_pos);
+                }
+                else {
+                    auto posOnFloorWS = screenHitOnFloorWS(event->x() / res().x(), event->y() / res().y());
+                    auto pivot = posOnFloorWS;
+                    if (dot((pivot - pos), getViewDir()) > 0) {
+                        auto translate = (pivot - pos) * (1 - scale);
+                        if (glm::length(translate) < 0.01) {
+                            translate = glm::normalize(translate) * 0.01f;
+                        }
+                        auto new_pos = translate + pos;
+                        setPos(new_pos);
+                    }
+                    else {
+                        auto translate = screenPosToRayWS(event->x() / res().x(), event->y() / res().y()) * getPos().y * (1 - scale);
+                        if (getPos().y < 0) {
+                            translate *= -1;
+                        }
+                        auto new_pos = translate + pos;
+                        setPos(new_pos);
+                    }
+                }
+            }
+            else {
+                auto pivot = getPivot();
+                auto new_pos = (pos - pivot) * scale + pivot;
+                setPos(new_pos);
+            }
+        }
     }
     updatePerspective();
 
@@ -521,67 +523,69 @@ void CameraControl::fakeMouseDoubleClickEvent(QMouseEvent *event)
         }
     }
 }
-//void CameraControl::fakeMouseDoubleClickEvent(QMouseEvent* event) {
-void CameraControl::setKeyFrame() {
-    //todo
-}
 
 void CameraControl::focus(QVector3D center, float radius) {
-    setCenter({float(center.x()), float(center.y()), float(center.z())});
+    setPivot({float(center.x()), float(center.y()), float(center.z())});
     if (getFOV() >= 1e-6)
         radius /= (getFOV() / 45.0f);
-    setRadius(radius);
+    auto dir = getRotation() * glm::vec3(0, 0, 1) * radius;
+    setPos(getPivot() + dir);
     updatePerspective();
 }
 
 QVector3D CameraControl::realPos() const {
-    float cos_t = cos(getTheta());
-    float sin_t = sin(getTheta());
-    float cos_p = cos(getPhi());
-    float sin_p = sin(getPhi());
-    QVector3D back(cos_t * sin_p, sin_t, -cos_t * cos_p);
-    auto c = getCenter();
-    QVector3D center = {c[0], c[1], c[2]};
-    return center - back * getRadius();
+    auto p = getPos();
+    return {p[0], p[1], p[2]};
+}
+
+// ����������ƽ��Ľ���
+std::optional<glm::vec3> CameraControl::intersectRayPlane(
+        glm::vec3 ray_origin
+        , glm::vec3 ray_direction
+        , glm::vec3 plane_point
+        , glm::vec3 plane_normal
+) {
+    // �������߷����ƽ�淨�����ĵ��
+    float denominator = glm::dot(plane_normal, ray_direction);
+
+    // �������ӽ���0��˵��������ƽ��ƽ�л���ƽ����
+    if (glm::abs(denominator) < 1e-6f) {
+        return std::nullopt; // ���ؿգ���ʾû�н���
+    }
+
+    // ����������㵽ƽ����һ�������
+    glm::vec3 diff = plane_point - ray_origin;
+
+    // ����tֵ
+    float t = glm::dot(diff, plane_normal) / denominator;
+
+    // ���t < 0��˵���������������֮ǰ�����ؿ�
+
+    if (t < 0) {
+        return std::nullopt;
+    }
+
+    // ���㽻��
+    glm::vec3 intersection = ray_origin + t * ray_direction;
+
+    return intersection;
 }
 
 // x, y from [0, 1]
-QVector3D CameraControl::screenToWorldRay(float x, float y) const {
-    float cos_t = cos(getTheta());
-    float sin_t = sin(getTheta());
-    float cos_p = cos(getPhi());
-    float sin_p = sin(getPhi());
-    QVector3D back(cos_t * sin_p, sin_t, -cos_t * cos_p);
-    QVector3D up(-sin_t * sin_p, cos_t, sin_t * cos_p);
-    QVector3D right = QVector3D::crossProduct(up, back);
-    up = QVector3D::crossProduct(back, right);
-    right.normalize();
-    up.normalize();
-    QMatrix4x4 view;
-    view.setToIdentity();
-    auto c = getCenter();
-    QVector3D center = {c[0], c[1], c[2]};
-    view.lookAt(realPos(), center, up);
+glm::vec3 CameraControl::screenPosToRayWS(float x, float y)  {
     x = (x - 0.5) * 2;
     y = (y - 0.5) * (-2);
     float v = std::tan(glm::radians(getFOV()) * 0.5f);
     float aspect = res().x() / res().y();
-    auto dir = QVector3D(v * x * aspect, v * y, -1);
-    dir = dir.normalized();
-    dir = view.inverted().mapVector(dir);
-    return dir;
-}
-
-QVariant CameraControl::hitOnFloor(float x, float y) const {
-    auto dir = screenToWorldRay(x, y);
-    auto pos = realPos();
-    float t = (0 - pos.y()) / dir.y();
-    if (t > 0) {
-        auto p = pos + dir * t;
-        return p;
-    } else {
-        return {};
-    }
+    auto dir = glm::normalize(glm::vec3(v * x * aspect, v * y, -1));
+    return getRotation() * dir;
+}
+
+glm::vec3 CameraControl::screenHitOnFloorWS(float x, float y) {
+    auto dir = screenPosToRayWS(x, y);
+    auto pos = getPos();
+    float t = (0 - pos.y) / dir.y;
+    return pos + dir * t;
 }
 
 void CameraControl::fakeMouseReleaseEvent(QMouseEvent *event) {
@@ -770,48 +774,37 @@ bool CameraControl::fakeKeyPressEvent(int uKey) {
     if (!middle_button_pressed) {
         return false;
     }
-    float cos_t = cos(getTheta());
-    float sin_t = sin(getTheta());
-    float cos_p = cos(getPhi());
-    float sin_p = sin(getPhi());
-    zeno::vec3f back(cos_t * sin_p, sin_t, -cos_t * cos_p);
-    zeno::vec3f up(-sin_t * sin_p, cos_t, sin_t * cos_p);
-    zeno::vec3f left = zeno::cross(up, back);
-    auto center = getCenter();
-    float step = 1.0f;
+    float step = glm::pow(1.2f, float(zeno::getSession().userData().get2<int>("viewport-FPN-move-speed", 0)));
 
     bool processed = false;
     if (uKey == Qt::Key_Q) {
-        setCenter(center + zeno::vec3f(0, -1, 0) * step);
+        setPos(getPos() - getUpDir() * step);
         processed = true;
     }
     else if (uKey == Qt::Key_E) {
-        setCenter(center + zeno::vec3f(0, 1, 0) * step);
+        setPos(getPos() + getUpDir() * step);
         processed = true;
     }
     else if (uKey == Qt::Key_W) {
-        setCenter(center + back * step);
+        setPos(getPos() + getViewDir() * step);
         processed = true;
     }
     else if (uKey == Qt::Key_S) {
-        setCenter(center - back * step);
+        setPos(getPos() - getViewDir() * step);
         processed = true;
     }
     else if (uKey == Qt::Key_A) {
-        setCenter(center + left * step);
+        setPos(getPos() - getRightDir() * step);
         processed = true;
     }
     else if (uKey == Qt::Key_D) {
-        setCenter(center - left * step);
+        setPos(getPos() + getRightDir() * step);
         processed = true;
     }
     if (processed) {
         updatePerspective();
-        return true;
-    }
-    else {
-        return false;
     }
+    return processed;
 }
 
 bool CameraControl::fakeKeyReleaseEvent(int uKey) {
diff --git a/ui/zenoedit/viewport/cameracontrol.h b/ui/zenoedit/viewport/cameracontrol.h
index 61cfc6c75b..cf01ee4b6f 100644
--- a/ui/zenoedit/viewport/cameracontrol.h
+++ b/ui/zenoedit/viewport/cameracontrol.h
@@ -5,6 +5,7 @@
 #include <QtWidgets>
 #include <viewportinteraction/picker.h>
 #include <viewportinteraction/transform.h>
+#include <zenovis/Camera.h>
 
 class Zenovis;
 
@@ -19,18 +20,15 @@ class CameraControl : public QObject
     void setRes(QVector2D res);
     QVector2D res() const { return m_res; }
 
-    float getRoll() const;
-    void setRoll(float roll);
-    float getTheta() const;
-    void setTheta(float theta);
-    float getPhi() const;
-    void setPhi(float phi);
-    zeno::vec3f getCenter() const;
-    void setCenter(zeno::vec3f center);
+    glm::vec3 getPos() const;
+    void setPos(glm::vec3 value);
+    glm::vec3 getPivot() const;
+    void setPivot(glm::vec3 value);
+    glm::quat getRotation();
+    void setRotation(glm::quat value);
     bool getOrthoMode() const;
     void setOrthoMode(bool OrthoMode);
     float getRadius() const;
-    void setRadius(float radius);
     float getFOV() const;
     void setFOV(float fov);
     float getAperture() const;
@@ -38,7 +36,6 @@ class CameraControl : public QObject
     float getDisPlane() const;
     void setDisPlane(float disPlane);
     void updatePerspective();
-    void setKeyFrame();
 
     bool fakeKeyPressEvent(int uKey);
     bool fakeKeyReleaseEvent(int uKey);
@@ -48,21 +45,35 @@ class CameraControl : public QObject
     void fakeWheelEvent(QWheelEvent* event);
     void fakeMouseDoubleClickEvent(QMouseEvent* event);
     void focus(QVector3D center, float radius);
+    [[deprecated]]
     QVector3D realPos() const;
-    QVector3D screenToWorldRay(float x, float y) const;
-    QVariant hitOnFloor(float x, float y) const;
-    void lookTo(int dir);
+    glm::vec3 screenPosToRayWS(float x, float y);
+    glm::vec3 screenHitOnFloorWS(float x, float y);
+    glm::vec3 getViewDir() {
+        return getRotation() * glm::vec3(0, 0, -1);
+    };
+    glm::vec3 getUpDir() {
+        return getRotation() * glm::vec3(0, 1, 0);
+    };
+    glm::vec3 getRightDir() {
+        return getRotation() * glm::vec3(1, 0, 0);
+    };
+    void lookTo(zenovis::CameraLookToDir dir);
     void clearTransformer();
     void changeTransformOperation(const QString& node);
     void changeTransformOperation(int mode);
     void changeTransformCoordSys();
     void resizeTransformHandler(int dir);
+    std::optional<glm::vec3> intersectRayPlane(
+            glm::vec3 ray_origin, glm::vec3 ray_direction,
+            glm::vec3 plane_point, glm::vec3 plane_normal);
 
 private:
     QPointF m_lastMidButtonPos;
     QPoint m_boundRectStartPos;
     QVector2D m_res;
     QSet<int> m_pressedKeys;
+    std::optional<glm::vec3> m_hit_posWS;
 
     std::weak_ptr<zeno::Picker> m_picker;
     std::weak_ptr<zeno::FakeTransformer> m_transformer;
diff --git a/ui/zenoedit/viewport/displaywidget.cpp b/ui/zenoedit/viewport/displaywidget.cpp
index 46fcc826e1..bfd4a1cc7a 100644
--- a/ui/zenoedit/viewport/displaywidget.cpp
+++ b/ui/zenoedit/viewport/displaywidget.cpp
@@ -327,7 +327,7 @@ std::tuple<int, int, bool> DisplayWidget::getOriginWindowSizeInfo()
     return originWindowSizeInfo;
 }
 
-void DisplayWidget::cameraLookTo(int dir)
+void DisplayWidget::cameraLookTo(zenovis::CameraLookToDir dir)
 {
     if (m_bGLView)
         m_glView->cameraLookTo(dir);
@@ -794,30 +794,30 @@ void DisplayWidget::onDockViewAction(bool triggered)
     switch (viewType)
     {
         case ACTION_ORIGIN_VIEW:
-            cameraLookTo(viewType);
+            cameraLookTo(zenovis::CameraLookToDir::back_to_origin);
             break;
         case ACTION_FRONT_VIEW: {
-            cameraLookTo(viewType);
+            cameraLookTo(zenovis::CameraLookToDir::front_view);
             break;
         }
         case ACTION_BACK_VIEW: {
-            cameraLookTo(viewType);
+            cameraLookTo(zenovis::CameraLookToDir::back_view);
             break;
         }
         case ACTION_RIGHT_VIEW: {
-            cameraLookTo(viewType);
+            cameraLookTo(zenovis::CameraLookToDir::right_view);
             break;
         }
         case ACTION_LEFT_VIEW: {
-            cameraLookTo(viewType);
+            cameraLookTo(zenovis::CameraLookToDir::left_view);
             break;
         }
         case ACTION_TOP_VIEW: {
-            cameraLookTo(viewType);
+            cameraLookTo(zenovis::CameraLookToDir::top_view);
             break;
         }
         case ACTION_BOTTOM_VIEW: {
-            cameraLookTo(viewType);
+            cameraLookTo(zenovis::CameraLookToDir::bottom_view);
             break;
         }
     }
@@ -1384,10 +1384,11 @@ void DisplayWidget::onNodeSelected(const QModelIndex &subgIdx, const QModelIndex
                 ZASSERT_EXIT(pZenovis && pZenovis->getSession());
                 auto scene = pZenovis->getSession()->get_scene();
                 auto fov = scene->camera->m_fov;
-                auto cz = glm::length(scene->camera->m_lodcenter);
+                auto cz = glm::length(scene->camera->m_pos);
                 if (depth != 0) {
                     cz = scene->camera->inf_z_near / depth;
                 }
+                zeno::log_info("click depth {}", depth);
                 auto w = scene->camera->m_nx;
                 auto h = scene->camera->m_ny;
                 // zeno::log_info("fov: {}", fov);
@@ -1399,11 +1400,11 @@ void DisplayWidget::onNodeSelected(const QModelIndex &subgIdx, const QModelIndex
                 auto cx = u * tan(glm::radians(fov) / 2) * w / h * cz;
                 // zeno::log_info("cx: {}, cy: {}, cz: {}", cx, cy, -cz);
                 glm::vec4 cc = {cx, cy, -cz, 1};
-                auto wc = glm::inverse(scene->camera->m_view) * cc;
+                auto wc = glm::inverse(scene->camera->get_view_matrix()) * cc;
                 wc /= wc.w;
                 // zeno::log_info("wx: {}, wy: {}, wz: {}", word_coord.x, word_coord.y, word_coord.z);
                 auto points = zeno::NodeSyncMgr::GetInstance().getInputValString(nodes[0], "points");
-                zeno::log_info("fetch {}", points.c_str());
+                zeno::log_info("fetch {}", wc);
                 points += std::to_string(wc.x) + " " + std::to_string(wc.y) + " " + std::to_string(wc.z) + " ";
                 zeno::NodeSyncMgr::GetInstance().updateNodeInputString(node_location, "points", points);
             };
diff --git a/ui/zenoedit/viewport/displaywidget.h b/ui/zenoedit/viewport/displaywidget.h
index 076a483a2f..01a5330a49 100644
--- a/ui/zenoedit/viewport/displaywidget.h
+++ b/ui/zenoedit/viewport/displaywidget.h
@@ -60,7 +60,7 @@ class DisplayWidget : public QWidget
     bool isCurrent();
     void setLoopPlaying(bool enable);
     std::tuple<int, int, bool> getOriginWindowSizeInfo();
-    void cameraLookTo(int dir);
+    void cameraLookTo(zenovis::CameraLookToDir dir);
 protected:
     void mouseReleaseEvent(QMouseEvent* event) override;
 public slots:
diff --git a/ui/zenoedit/viewport/optixviewport.cpp b/ui/zenoedit/viewport/optixviewport.cpp
index d26c5416c7..4a443a7379 100644
--- a/ui/zenoedit/viewport/optixviewport.cpp
+++ b/ui/zenoedit/viewport/optixviewport.cpp
@@ -474,7 +474,7 @@ void ZOptixViewport::setRenderSeparately(bool updateLightCameraOnly, bool update
     emit sig_setRenderSeparately(updateLightCameraOnly, updateMatlOnly);
 }
 
-void ZOptixViewport::cameraLookTo(int dir)
+void ZOptixViewport::cameraLookTo(zenovis::CameraLookToDir dir)
 {
     m_camera->lookTo(dir);
 }
@@ -692,26 +692,26 @@ void ZOptixViewport::keyPressEvent(QKeyEvent* event)
 
     key = settings.getShortCut(ShortCut_FrontView);
     if (uKey == key)
-        this->cameraLookTo(0);
+        this->cameraLookTo(zenovis::CameraLookToDir::front_view);
     key = settings.getShortCut(ShortCut_RightView);
     if (uKey == key)
-        this->cameraLookTo(1);
+        this->cameraLookTo(zenovis::CameraLookToDir::right_view);
     key = settings.getShortCut(ShortCut_VerticalView);
     if (uKey == key)
-        this->cameraLookTo(2);
+        this->cameraLookTo(zenovis::CameraLookToDir::top_view);
     key = settings.getShortCut(ShortCut_InitViewPos);
     if (uKey == key)
-        this->cameraLookTo(6);
+        this->cameraLookTo(zenovis::CameraLookToDir::back_to_origin);
 
     key = settings.getShortCut(ShortCut_BackView);
     if (uKey == key)
-        this->cameraLookTo(3);
+        this->cameraLookTo(zenovis::CameraLookToDir::back_view);
     key = settings.getShortCut(ShortCut_LeftView);
     if (uKey == key)
-        this->cameraLookTo(4);
+        this->cameraLookTo(zenovis::CameraLookToDir::left_view);
     key = settings.getShortCut(ShortCut_UpwardView);
     if (uKey == key)
-        this->cameraLookTo(5);
+        this->cameraLookTo(zenovis::CameraLookToDir::bottom_view);
 
     key = settings.getShortCut(ShortCut_InitHandler);
     if (uKey == key)
diff --git a/ui/zenoedit/viewport/optixviewport.h b/ui/zenoedit/viewport/optixviewport.h
index e9864b38bb..b512a41fce 100644
--- a/ui/zenoedit/viewport/optixviewport.h
+++ b/ui/zenoedit/viewport/optixviewport.h
@@ -66,7 +66,7 @@ class ZOptixViewport : public QWidget
     ~ZOptixViewport();
     void setSimpleRenderOption();
     void setRenderSeparately(bool updateLightCameraOnly, bool updateMatlOnly);
-    void cameraLookTo(int dir);
+    void cameraLookTo(zenovis::CameraLookToDir dir);
     void updateCameraProp(float aperture, float disPlane, UI_VECTYPE skipParam = UI_VECTYPE());
     void updatePerspective();
     void setCameraRes(const QVector2D& res);
diff --git a/ui/zenoedit/viewport/viewportwidget.cpp b/ui/zenoedit/viewport/viewportwidget.cpp
index 07b5e47f2e..c74098f937 100644
--- a/ui/zenoedit/viewport/viewportwidget.cpp
+++ b/ui/zenoedit/viewport/viewportwidget.cpp
@@ -300,7 +300,7 @@ void ViewportWidget::mouseDoubleClickEvent(QMouseEvent* event) {
     update();
 }
 //void ViewportWidget::mouseDoubleClickEvent(QMouseEvent* event) {
-void ViewportWidget::cameraLookTo(int dir) {
+void ViewportWidget::cameraLookTo(zenovis::CameraLookToDir dir) {
      m_camera->lookTo(dir);
 }
 
@@ -378,26 +378,26 @@ void ViewportWidget::keyPressEvent(QKeyEvent *event)
 
     key = settings.getShortCut(ShortCut_FrontView);
     if (uKey == key)
-        this->cameraLookTo(0);
+        this->cameraLookTo(zenovis::CameraLookToDir::front_view);
     key = settings.getShortCut(ShortCut_RightView);
     if (uKey == key)
-        this->cameraLookTo(1);
+        this->cameraLookTo(zenovis::CameraLookToDir::right_view);
     key = settings.getShortCut(ShortCut_VerticalView);
     if (uKey == key)
-        this->cameraLookTo(2);
+        this->cameraLookTo(zenovis::CameraLookToDir::top_view);
     key = settings.getShortCut(ShortCut_InitViewPos);
     if (uKey == key)
-        this->cameraLookTo(6);
+        this->cameraLookTo(zenovis::CameraLookToDir::back_to_origin);
 
     key = settings.getShortCut(ShortCut_BackView);
     if (uKey == key)
-        this->cameraLookTo(3);
+        this->cameraLookTo(zenovis::CameraLookToDir::back_view);
     key = settings.getShortCut(ShortCut_LeftView);
     if (uKey == key)
-        this->cameraLookTo(4);
+        this->cameraLookTo(zenovis::CameraLookToDir::left_view);
     key = settings.getShortCut(ShortCut_UpwardView);
     if (uKey == key)
-        this->cameraLookTo(5);
+        this->cameraLookTo(zenovis::CameraLookToDir::bottom_view);
 
     key = settings.getShortCut(ShortCut_InitHandler);
     if (uKey == key)
diff --git a/ui/zenoedit/viewport/viewportwidget.h b/ui/zenoedit/viewport/viewportwidget.h
index af8e7a55af..4734470ebc 100644
--- a/ui/zenoedit/viewport/viewportwidget.h
+++ b/ui/zenoedit/viewport/viewportwidget.h
@@ -9,6 +9,7 @@
 
 #include <viewportinteraction/transform.h>
 #include <viewportinteraction/picker.h>
+#include "zenovis/Camera.h"
 
 class ZTimeline;
 class ZenoMainWindow;
@@ -40,7 +41,7 @@ class ViewportWidget : public QGLWidget
     void setSafeFrames(bool bLock, int nx, int ny);
     void updatePerspective();
     void updateCameraProp(float aperture, float disPlane);
-    void cameraLookTo(int dir);
+    void cameraLookTo(zenovis::CameraLookToDir dir);
     void clearTransformer();
     void changeTransformOperation(const QString& node);
     void changeTransformOperation(int mode);
diff --git a/ui/zenoedit/viewport/zenovis.cpp b/ui/zenoedit/viewport/zenovis.cpp
index 423e87da16..e0b7b7eea5 100644
--- a/ui/zenoedit/viewport/zenovis.cpp
+++ b/ui/zenoedit/viewport/zenovis.cpp
@@ -174,7 +174,6 @@ int Zenovis::setCurrentFrameId(int frameid)
         if (m_camera_keyframe && m_camera_control) {
             PerspectiveInfo r;
             if (m_camera_keyframe->queryFrame(frameid, r)) {
-                m_camera_control->setKeyFrame();
                 m_camera_control->updatePerspective();
             }
         }
diff --git a/ui/zenoedit/viewport/zoptixviewport.cpp b/ui/zenoedit/viewport/zoptixviewport.cpp
index 5429d43c96..be5188d963 100644
--- a/ui/zenoedit/viewport/zoptixviewport.cpp
+++ b/ui/zenoedit/viewport/zoptixviewport.cpp
@@ -78,7 +78,7 @@ void ZOptixProcViewport::setRenderSeparately(bool updateLightCameraOnly, bool up
     scene->drawOptions->updateMatlOnly = updateMatlOnly;
 }
 
-void ZOptixProcViewport::cameraLookTo(int dir)
+void ZOptixProcViewport::cameraLookTo(zenovis::CameraLookToDir dir)
 {
     m_camera->lookTo(dir);
 }
@@ -283,26 +283,26 @@ void ZOptixProcViewport::keyPressEvent(QKeyEvent* event)
 
     key = settings.getShortCut(ShortCut_FrontView);
     if (uKey == key)
-        this->cameraLookTo(0);
+        this->cameraLookTo(zenovis::CameraLookToDir::front_view);
     key = settings.getShortCut(ShortCut_RightView);
     if (uKey == key)
-        this->cameraLookTo(1);
+        this->cameraLookTo(zenovis::CameraLookToDir::right_view);
     key = settings.getShortCut(ShortCut_VerticalView);
     if (uKey == key)
-        this->cameraLookTo(2);
+        this->cameraLookTo(zenovis::CameraLookToDir::top_view);
     key = settings.getShortCut(ShortCut_InitViewPos);
     if (uKey == key)
-        this->cameraLookTo(6);
+        this->cameraLookTo(zenovis::CameraLookToDir::back_to_origin);
 
     key = settings.getShortCut(ShortCut_BackView);
     if (uKey == key)
-        this->cameraLookTo(3);
+        this->cameraLookTo(zenovis::CameraLookToDir::back_view);
     key = settings.getShortCut(ShortCut_LeftView);
     if (uKey == key)
-        this->cameraLookTo(4);
+        this->cameraLookTo(zenovis::CameraLookToDir::left_view);
     key = settings.getShortCut(ShortCut_UpwardView);
     if (uKey == key)
-        this->cameraLookTo(5);
+        this->cameraLookTo(zenovis::CameraLookToDir::bottom_view);
 
     key = settings.getShortCut(ShortCut_InitHandler);
     if (uKey == key)
diff --git a/ui/zenoedit/viewport/zoptixviewport.h b/ui/zenoedit/viewport/zoptixviewport.h
index cbe8b7ca7d..2785bf6f6f 100644
--- a/ui/zenoedit/viewport/zoptixviewport.h
+++ b/ui/zenoedit/viewport/zoptixviewport.h
@@ -3,6 +3,7 @@
 
 #include <QtWidgets>
 #include "optixviewport.h"
+#include "zenovis/Camera.h"
 
 class Zenovis;
 class CameraControl;
@@ -16,7 +17,7 @@ class ZOptixProcViewport : public QWidget
     ~ZOptixProcViewport();
     void setSimpleRenderOption();
     void setRenderSeparately(bool updateLightCameraOnly, bool updateMatlOnly);
-    void cameraLookTo(int dir);
+    void cameraLookTo(zenovis::CameraLookToDir dir);
     void updateViewport();
     void updateCameraProp(float aperture, float disPlane);
     void updatePerspective();
diff --git a/ui/zenoedit/viewportinteraction/transform.cpp b/ui/zenoedit/viewportinteraction/transform.cpp
index cac59fb4b4..6e2a1d1d11 100644
--- a/ui/zenoedit/viewportinteraction/transform.cpp
+++ b/ui/zenoedit/viewportinteraction/transform.cpp
@@ -150,26 +150,22 @@ bool FakeTransformer::calcTransformStart(glm::vec3 ori, glm::vec3 dir, glm::vec3
     return true;
 }
 
-bool FakeTransformer::clickedAnyHandler(QVector3D ori, QVector3D dir, glm::vec3 front) {
+bool FakeTransformer::clickedAnyHandler(glm::vec3 ori, glm::vec3 dir, glm::vec3 front) {
     if (!m_handler) return false;
-    auto ray_ori = QVec3ToGLMVec3(ori);
-    auto ray_dir = QVec3ToGLMVec3(dir);
-    m_operation_mode = m_handler->handleClick(ray_ori, ray_dir);
-    if (!calcTransformStart(ray_ori, ray_dir, front)) return false;
+    m_operation_mode = m_handler->handleClick(ori, dir);
+    if (!calcTransformStart(ori, dir, front)) return false;
     return m_operation_mode != zenovis::INTERACT_NONE;
 }
 
-bool FakeTransformer::hoveredAnyHandler(QVector3D ori, QVector3D dir, glm::vec3 front)
+bool FakeTransformer::hoveredAnyHandler(glm::vec3 ori, glm::vec3 dir, glm::vec3 front)
 {
     if (!m_handler) return false;
-    auto ray_ori = QVec3ToGLMVec3(ori);
-    auto ray_dir = QVec3ToGLMVec3(dir);
-    int mode = m_handler->handleHover(ray_ori, ray_dir);
-    if (!calcTransformStart(ray_ori, ray_dir, front)) return false;
+    int mode = m_handler->handleHover(ori, dir);
+    if (!calcTransformStart(ori, dir, front)) return false;
     return mode != zenovis::INTERACT_NONE;
 }
 
-void FakeTransformer::transform(QVector3D camera_pos, QVector3D ray_dir, glm::vec2 mouse_start, glm::vec2 mouse_pos, glm::vec3 front, glm::mat4 vp) {
+void FakeTransformer::transform(glm::vec3 camera_pos, glm::vec3 ray_dir, glm::vec2 mouse_start, glm::vec2 mouse_pos, glm::vec3 front, glm::mat4 vp) {
     if (m_operation == NONE) return;
 
     auto pZenovis = m_viewport->getZenoVis();
@@ -179,8 +175,8 @@ void FakeTransformer::transform(QVector3D camera_pos, QVector3D ray_dir, glm::ve
     auto scene = sess->get_scene();
     ZASSERT_EXIT(scene);
 
-    auto ori = QVec3ToGLMVec3(camera_pos);
-    auto dir = QVec3ToGLMVec3(ray_dir);
+    auto ori = camera_pos;
+    auto dir = ray_dir;
 
     auto x_axis = glm::vec3(1, 0, 0);
     auto y_axis = glm::vec3(0, 1, 0);
diff --git a/ui/zenoedit/viewportinteraction/transform.h b/ui/zenoedit/viewportinteraction/transform.h
index 04d37b715f..7532e2eb7b 100644
--- a/ui/zenoedit/viewportinteraction/transform.h
+++ b/ui/zenoedit/viewportinteraction/transform.h
@@ -31,9 +31,9 @@ class FakeTransformer {
     void removeObject(const std::string& name);
     void removeObject(const std::unordered_set<std::string>& names);
     bool calcTransformStart(glm::vec3 ori, glm::vec3 dir, glm::vec3 front);
-    bool clickedAnyHandler(QVector3D ori, QVector3D dir, glm::vec3 front);
-    bool hoveredAnyHandler(QVector3D ori, QVector3D dir, glm::vec3 front);
-    void transform(QVector3D camera_pos, QVector3D ray_dir, glm::vec2 mouse_start, glm::vec2 mouse_pos, glm::vec3 front, glm::mat4 vp);
+    bool clickedAnyHandler(glm::vec3 ori, glm::vec3 dir, glm::vec3 front);
+    bool hoveredAnyHandler(glm::vec3 ori, glm::vec3 dir, glm::vec3 front);
+    void transform(glm::vec3 camera_pos, glm::vec3 ray_dir, glm::vec2 mouse_start, glm::vec2 mouse_pos, glm::vec3 front, glm::mat4 vp);
     void startTransform();
     void endTransform(bool moved);
     bool isTransforming() const;
@@ -67,10 +67,6 @@ class FakeTransformer {
 
     // 把FakeTransform上的SRT应用到primitive上
     void doTransform();
-
-    static glm::vec3 QVec3ToGLMVec3(QVector3D QVec3) {
-        return {QVec3.x(), QVec3.y(), QVec3.z()};
-    }
     void markObjectInteractive(const std::string& obj_name);
     void unmarkObjectInteractive(const std::string& obj_name);
     void markObjectsInteractive();
diff --git a/ui/zenoedit/zenomainwindow.cpp b/ui/zenoedit/zenomainwindow.cpp
index 1f087c95e3..f2005aa311 100644
--- a/ui/zenoedit/zenomainwindow.cpp
+++ b/ui/zenoedit/zenomainwindow.cpp
@@ -2285,6 +2285,10 @@ void ZenoMainWindow::doFrameUpdate(int frame) {
     }
 }
 
+void ZenoMainWindow::statusbarShowMessage(const std::string& text, int timeout) const {
+    m_ui->statusbar->showMessage(text.c_str(), timeout);
+}
+
 static bool openFileAndExportAsZsl(const char *inPath, const char *outPath) {
     auto pGraphs = zenoApp->graphsManagment();
     IGraphsModel* pModel = pGraphs->openZsgFile(inPath);
diff --git a/ui/zenoedit/zenomainwindow.h b/ui/zenoedit/zenomainwindow.h
index 2ccae1f137..2a0bc06689 100644
--- a/ui/zenoedit/zenomainwindow.h
+++ b/ui/zenoedit/zenomainwindow.h
@@ -56,6 +56,7 @@ class ZenoMainWindow : public QMainWindow
     bool isOnlyOptixWindow() const;
     bool isRecordByCommandLine() const;
     void openFileAndUpdateParam(const QString& path, const QString& paramJson);
+    void statusbarShowMessage(const std::string& text, int timeout = 0) const;
 
     QLineEdit* selected = nullptr;
     ZenoLights* lightPanel = nullptr;
diff --git a/zeno/include/zeno/types/CameraObject.h b/zeno/include/zeno/types/CameraObject.h
index f30b5eb9bc..0e9e9eee4e 100644
--- a/zeno/include/zeno/types/CameraObject.h
+++ b/zeno/include/zeno/types/CameraObject.h
@@ -2,8 +2,24 @@
 
 #include <zeno/core/IObject.h>
 #include <zeno/utils/vec.h>
+#include <glm/mat4x4.hpp>
+#include <glm/vec3.hpp>
+#include <glm/gtx/quaternion.hpp>
+#include <optional>
 
 namespace zeno {
+static glm::quat from_theta_phi(float theta, float phi) {
+    float cos_t = glm::cos(theta), sin_t = glm::sin(theta);
+    float cos_p = glm::cos(phi), sin_p = glm::sin(phi);
+    glm::vec3 front(cos_t * sin_p, sin_t, -cos_t * cos_p);
+    glm::vec3 up(-sin_t * sin_p, cos_t, sin_t * cos_p);
+    glm::vec3 right = glm::cross(front, up);
+    glm::mat3 rotation;
+    rotation[0] = right;
+    rotation[1] = up;
+    rotation[2] = -front;
+    return glm::quat_cast(rotation);
+}
 
 struct CameraData {
     vec3f pos{0, 0, 1};
@@ -16,11 +32,7 @@ struct CameraData {
     float aperture{0.0f};
     float focalPlaneDistance{2.0f};
 
-    bool isSet = false;
-    vec3f center{0, 0, 0};
-    float radius{1};
-    float theta{};
-    float phi{};
+    std::optional<vec3f> pivot = std::nullopt;
 };
 
 struct CameraObject : IObjectClone<CameraObject>, CameraData {
diff --git a/zeno/src/nodes/CameraNodes.cpp b/zeno/src/nodes/CameraNodes.cpp
index d53973bf4a..97563d4871 100644
--- a/zeno/src/nodes/CameraNodes.cpp
+++ b/zeno/src/nodes/CameraNodes.cpp
@@ -10,8 +10,56 @@
 #include <glm/glm.hpp>
 #include <glm/gtx/quaternion.hpp>
 #include "zeno/extra/TempNode.h"
+#include <regex>
 
 namespace zeno {
+struct CameraNode: zeno::INode{
+    virtual void apply() override {
+        auto camera = std::make_unique<zeno::CameraObject>();
+
+        camera->pos = get_input2<zeno::vec3f>("pos");
+        camera->up = get_input2<zeno::vec3f>("up");
+        camera->view = get_input2<zeno::vec3f>("view");
+        camera->fov = get_input2<float>("fov");
+        camera->aperture = get_input2<float>("aperture");
+        camera->focalPlaneDistance = get_input2<float>("focalPlaneDistance");
+        camera->userData().set2("frame", get_input2<float>("frame"));
+
+        auto other_props = get_input2<std::string>("other");
+        std::regex reg(",");
+        std::sregex_token_iterator p(other_props.begin(), other_props.end(), reg, -1);
+        std::sregex_token_iterator end;
+        std::vector<float> prop_vals;
+        while (p != end) {
+            prop_vals.push_back(std::stof(*p));
+            p++;
+        }
+        if (prop_vals.size() == 6) {
+            camera->pivot = {prop_vals[0], prop_vals[1], prop_vals[2]};
+        }
+
+        set_output("camera", std::move(camera));
+    }
+};
+
+ZENO_DEFNODE(CameraNode)({
+     {
+         {"vec3f", "pos", "0,0,5"},
+         {"vec3f", "up", "0,1,0"},
+         {"vec3f", "view", "0,0,-1"},
+         {"float", "fov", "45"},
+         {"float", "aperture", "11"},
+         {"float", "focalPlaneDistance", "2.0"},
+         {"string", "other", ""},
+         {"int", "frame", "0"},
+     },
+     {
+         {"CameraObject", "camera"},
+     },
+     {
+     },
+     {"FBX"},
+ });
 
 struct MakeCamera : INode {
     virtual void apply() override {
diff --git a/zenovis/include/zenovis/Camera.h b/zenovis/include/zenovis/Camera.h
index c6b1983006..be015a2f66 100644
--- a/zenovis/include/zenovis/Camera.h
+++ b/zenovis/include/zenovis/Camera.h
@@ -7,6 +7,15 @@
 #include <zeno/types/CameraObject.h>
 
 namespace zenovis {
+enum class CameraLookToDir {
+    front_view,
+    right_view,
+    top_view,
+    back_view,
+    left_view,
+    bottom_view,
+    back_to_origin,
+};
 
 namespace opengl {
     class Program;
@@ -22,7 +31,6 @@ struct ZOptixCameraSettingInfo {
 struct Camera {
     float inf_z_near = 0.001f;
     int m_nx{512}, m_ny{512};
-    glm::mat4x4 m_view{1}, m_proj{1};
 
     float m_near = 0.01f;
     float m_far = 20000.0f;
@@ -33,39 +41,44 @@ struct Camera {
     float m_dof = -1.f;
     float m_safe_frames = 0;
 
-    glm::vec3 m_lodcenter{0, 0, -1};
-    glm::vec3 m_lodfront{0, 0, 1};
-    glm::vec3 m_lodup{0, 1, 0};
+    glm::vec3 m_pos{0, 0, 5};
+    glm::vec3 m_pivot = {};
+    glm::quat m_rotation = {1, 0, 0, 0};
 
-    bool m_need_sync = false;
     bool m_block_window = false;
-    bool m_auto_radius = false;
-
-    float m_theta = 0;
-    float m_phi = 0;
-    float m_roll = 0;
-    zeno::vec3f m_center = {};
+public:
+    void reset() {
+        m_pos = {0, 0, 5};
+        m_pivot = {};
+        m_rotation = {1, 0, 0, 0};
+        updateMatrix();
+    }
+    glm::vec3 get_lodfront() {
+        return m_rotation * glm::vec3(0, 0, -1);
+    }
+    glm::vec3 get_lodup() {
+        return m_rotation * glm::vec3(0, 1, 0);
+    }
     bool m_ortho_mode = false;
-    float m_radius = 5;
+    float get_radius() {
+        return glm::distance(m_pos, m_pivot);
+    }
+    glm::vec3 getPos() {
+        return m_pos;
+    }
+    void setPos(glm::vec3 value) {
+        m_pos = value;
+    }
+    glm::vec3 getPivot() {
+        return m_pivot;
+    }
+    void setPivot(glm::vec3 value) {
+        m_pivot = value;
+    }
 
     zeno::vec2i viewport_offset = {};
     ZOptixCameraSettingInfo zOptixCameraSettingInfo = {};
 
-    // only used in real-shader
-    struct ZxxHappyLookParam {
-        float cx = 0;
-        float cy = 0;
-        float cz = 0;
-        float theta = 0;
-        float phi = 0;
-        float radius = 0;
-        float fov = 0;
-        bool ortho_mode = false;
-        float aperture = 0;
-        float focalPlaneDistance = 0;
-    };
-    struct ZxxHappyLookParam m_zxx;
-
     float getAspect() const {
         return (float)m_nx / (float)m_ny;
     }
@@ -77,11 +90,31 @@ struct Camera {
     bool is_locked_window() const;
     void setCamera(zeno::CameraData const &cam);
     void setPhysicalCamera(float aperture, float shutter_speed, float iso, bool aces, bool exposure);
-    void placeCamera(glm::vec3 pos, glm::vec3 front, glm::vec3 up);
-    void lookCamera(float cx, float cy, float cz, float theta, float phi, float radius, bool ortho_mode, float fov, float aperture, float focalPlaneDistance);
+    void placeCamera(glm::vec3 pos, glm::vec3 view, glm::vec3 up);
+    void placeCamera(glm::vec3 pos, glm::quat rotation);
     void focusCamera(float cx, float cy, float cz, float radius);
     void set_program_uniforms(opengl::Program *pro);
     void updateMatrix();
+    glm::mat4x4 get_view_matrix() {
+        return glm::lookAt(m_pos, m_pos + get_lodfront(), get_lodup());
+    }
+    static glm::mat4 MakeInfReversedZProjRH(float fovY_radians, float aspectWbyH, float zNear) {
+        float f = 1.0f / tan(fovY_radians / 2.0f);
+        return glm::mat4(
+                f / aspectWbyH, 0.0f,  0.0f,  0.0f,
+                0.0f,    f,  0.0f,  0.0f,
+                0.0f, 0.0f,  0.0f, -1.0f,
+                0.0f, 0.0f, zNear,  0.0f);
+    }
+    glm::mat4x4 get_proj_matrix() {
+        if (m_ortho_mode) {
+            auto radius = get_radius();
+            return glm::orthoZO(-radius * getAspect(), radius * getAspect(), -radius,
+                radius, m_far, m_near);
+        } else {
+            return MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), inf_z_near);
+        }
+    }
 };
 
 } // namespace zenovis
diff --git a/zenovis/include/zenovis/RenderEngine.h b/zenovis/include/zenovis/RenderEngine.h
index 84c45a5c12..32e65c6e0f 100644
--- a/zenovis/include/zenovis/RenderEngine.h
+++ b/zenovis/include/zenovis/RenderEngine.h
@@ -19,6 +19,7 @@ struct RenderEngine {
     virtual void cleanupWhenExit() = 0;
 
     virtual ~RenderEngine() = default;
+    virtual std::optional<glm::vec3> getClickedPos(int x, int y) { return {}; }
 };
 
 class RenderManager {
diff --git a/zenovis/include/zenovis/bate/FrameBufferRender.h b/zenovis/include/zenovis/bate/FrameBufferRender.h
index 36b99ba256..3a8ce31e8c 100644
--- a/zenovis/include/zenovis/bate/FrameBufferRender.h
+++ b/zenovis/include/zenovis/bate/FrameBufferRender.h
@@ -71,9 +71,10 @@ struct FrameBufferRender {
 
     unique_ptr<FBO> fbo;
     unique_ptr<Texture> picking_texture;
-    unique_ptr<RenderObject> depth_rbo;
+    unique_ptr<Texture> depth_texture;
 
     unique_ptr<FBO> intermediate_fbo;
+    unique_ptr<Texture> screen_depth_tex;
     unique_ptr<Texture> screen_tex;
     
     unique_ptr<VAO> quad_vao;
@@ -124,14 +125,15 @@ struct FrameBufferRender {
         CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE, picking_texture->tex, 0));
 
         // generate depth texture
-        depth_rbo = make_unique<RenderObject>();
-        CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, depth_rbo->rbo));
-        CHECK_GL(glRenderbufferStorageMultisample(GL_RENDERBUFFER, samples, GL_DEPTH_COMPONENT32F, w, h));
-        CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, 0));
-        CHECK_GL(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth_rbo->rbo));
+        depth_texture = make_unique<Texture>();
+        depth_texture->target = GL_TEXTURE_2D_MULTISAMPLE;
+        CHECK_GL(glBindTexture(depth_texture->target, depth_texture->tex));
+        CHECK_GL(glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, GL_DEPTH_COMPONENT, w, h, GL_TRUE));
+        CHECK_GL(glBindTexture(depth_texture->target, 0));
+        CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D_MULTISAMPLE, depth_texture->tex, 0));
 
         // check fbo
-        if(!fbo->complete()) printf("fbo error\n");
+        if(!fbo->complete()) zeno::log_error("fbo error");
 
         // unbind fbo & texture
         CHECK_GL(glBindTexture(GL_TEXTURE_2D, 0));
@@ -139,13 +141,20 @@ struct FrameBufferRender {
 
         intermediate_fbo = make_unique<FBO>();
         screen_tex = make_unique<Texture>();
+        screen_depth_tex = make_unique<Texture>();
         intermediate_fbo->bind();
         CHECK_GL(glBindTexture(GL_TEXTURE_2D, screen_tex->tex));
         CHECK_GL(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, w, h, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL));
         CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST));
         CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST));
-        CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, screen_tex->tex, 0));	// we only need a color buffer
-        if(!intermediate_fbo->complete()) printf("fbo error\n");
+        CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, screen_tex->tex, 0));
+
+        CHECK_GL(glBindTexture(GL_TEXTURE_2D, screen_depth_tex->tex));
+        CHECK_GL(glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, w, h, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL));
+        CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST));
+        CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST));
+        CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, screen_depth_tex->tex, 0));
+        if(!intermediate_fbo->complete()) zeno::log_error("fbo error");
         CHECK_GL(glBindTexture(GL_TEXTURE_2D, 0));
         intermediate_fbo->unbind();
     }
@@ -153,9 +162,10 @@ struct FrameBufferRender {
     void destroy_buffers() {
         fbo.reset();
         picking_texture.reset();
-        depth_rbo.reset();
+        depth_texture.reset();
         intermediate_fbo.reset();
         screen_tex.reset();
+        screen_depth_tex.reset();
     }
     void bind() {
         // enable framebuffer writing
@@ -170,7 +180,7 @@ struct FrameBufferRender {
         // 2. now blit multisampled buffer(s) to normal colorbuffer of intermediate FBO. Image is stored in screenTexture
         CHECK_GL(glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo->fbo));
         CHECK_GL(glBindFramebuffer(GL_DRAW_FRAMEBUFFER, intermediate_fbo->fbo));
-        CHECK_GL(glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_COLOR_BUFFER_BIT, GL_NEAREST));
+        CHECK_GL(glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST));
 
         // 3. now render quad with scene's visuals as its texture image
         CHECK_GL(glBindFramebuffer(GL_FRAMEBUFFER, 0));
@@ -187,5 +197,16 @@ struct FrameBufferRender {
         CHECK_GL(glDrawArrays(GL_TRIANGLES, 0, 6));
         glEnable(GL_MULTISAMPLE);
     }
+    float getDepth(int x, int y) {
+        if (!intermediate_fbo->complete()) return 0;
+        intermediate_fbo->bind();
+        CHECK_GL(glBindFramebuffer(GL_READ_FRAMEBUFFER, intermediate_fbo->fbo));
+
+        float depth;
+        CHECK_GL(glReadPixels(x, h - y - 1, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &depth));
+
+        intermediate_fbo->unbind();
+        return depth;
+    }
 };
 }
\ No newline at end of file
diff --git a/zenovis/src/Camera.cpp b/zenovis/src/Camera.cpp
index 232d820946..7706ac558a 100644
--- a/zenovis/src/Camera.cpp
+++ b/zenovis/src/Camera.cpp
@@ -14,42 +14,15 @@ void Camera::setCamera(zeno::CameraData const &cam) {
             glm::vec3(cam.pos[0], cam.pos[1], cam.pos[2]),
             glm::vec3(cam.view[0], cam.view[1], cam.view[2]),
             glm::vec3(cam.up[0], cam.up[1], cam.up[2]));
-    //this->m_dof = cam.dof;
     this->m_aperture = cam.aperture;
     this->focalPlaneDistance = cam.focalPlaneDistance;
 
-//    zeno::log_info("radius {}", m_zxx.radius);
-
-    if (cam.isSet) {
-        m_center = cam.center;
-        m_theta = cam.theta;
-        m_phi = cam.phi;
-        m_radius = cam.radius;
+    if (cam.pivot.has_value()) {
+        this->m_pivot = zeno::vec_to_other<glm::vec3>(cam.pivot.value());
     }
     else {
-        auto view = zeno::normalize(cam.view);
-        zeno::vec3f center = cam.pos + m_radius * zeno::normalize(cam.view);
-        float theta = M_PI_2 - glm::acos(zeno::dot(view, zeno::vec3f(0, 1, 0)));
-        float phi = M_PI_2 + std::atan2(view[2], view[0]);
-//        zeno::log_info("theta: {}", theta);
-//        zeno::log_info("phi: {}", phi);
-
-        m_center = center;
-        m_theta = theta;
-        m_phi = phi;
-
-        float cos_t = glm::cos(m_theta), sin_t = glm::sin(m_theta);
-        float cos_p = glm::cos(m_phi), sin_p = glm::sin(m_phi);
-        glm::vec3 front(cos_t * sin_p, sin_t, -cos_t * cos_p);
-        glm::vec3 up(-sin_t * sin_p, cos_t, sin_t * cos_p);
-        glm::vec3 left = glm::cross(up, front);
-        float map_to_up = glm::dot(up, zeno::vec_to_other<glm::vec3>(cam.up));
-        float map_to_left = glm::dot(left, zeno::vec_to_other<glm::vec3>(cam.up));
-        m_roll = glm::atan(map_to_left, map_to_up);
+        this->m_pivot = zeno::vec_to_other<glm::vec3>(cam.pos);
     }
-
-    this->m_auto_radius = !cam.isSet;
-    this->m_need_sync = true;
 }
 
 void Camera::setPhysicalCamera(float aperture, float shutter_speed, float iso, bool aces, bool exposure) {
@@ -60,54 +33,36 @@ void Camera::setPhysicalCamera(float aperture, float shutter_speed, float iso, b
     this->zOptixCameraSettingInfo.exposure = exposure;
 }
 
-static glm::mat4 MakeInfReversedZProjRH(float fovY_radians, float aspectWbyH, float zNear) {
-    float f = 1.0f / tan(fovY_radians / 2.0f);
-    return glm::mat4(
-            f / aspectWbyH, 0.0f,  0.0f,  0.0f,
-            0.0f,    f,  0.0f,  0.0f,
-            0.0f, 0.0f,  0.0f, -1.0f,
-            0.0f, 0.0f, zNear,  0.0f);
+void Camera::placeCamera(glm::vec3 pos, glm::vec3 view, glm::vec3 up) {
+    auto right = glm::cross(glm::normalize(view), glm::normalize(up));
+    glm::mat3 rotation;
+    rotation[0] = right;
+    rotation[1] = up;
+    rotation[2] = -view;
+
+    Camera::placeCamera(pos, glm::quat_cast(rotation));
 }
-void Camera::placeCamera(glm::vec3 pos, glm::vec3 front, glm::vec3 up) {
-    front = glm::normalize(front);
-    up = glm::normalize(up);
-
-    m_lodcenter = pos;
-    m_lodfront = front;
-    m_lodup = up;
-
-    m_view = glm::lookAt(m_lodcenter, m_lodcenter + m_lodfront, m_lodup);
-    if (m_ortho_mode) {
-        auto radius = m_radius;
-        m_proj = glm::orthoZO(-radius * getAspect(), radius * getAspect(), -radius,
-                radius, m_far, m_near);
-    } else {
-        m_proj = MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), inf_z_near);
-    }
+
+void Camera::placeCamera(glm::vec3 pos, glm::quat rotation) {
+    m_pos = pos;
+    m_rotation = rotation;
 }
 
 void Camera::updateMatrix() {
-    auto center = zeno::vec_to_other<glm::vec3>(m_center) ;
-    float cos_t = glm::cos(m_theta), sin_t = glm::sin(m_theta);
-    float cos_p = glm::cos(m_phi), sin_p = glm::sin(m_phi);
-    glm::vec3 front(cos_t * sin_p, sin_t, -cos_t * cos_p);
-    glm::vec3 up(-sin_t * sin_p, cos_t, sin_t * cos_p);
-    glm::vec3 left = glm::cross(up, front);
-    up = glm::cos(m_roll) * up + glm::sin(m_roll) * left;
+    auto center = zeno::vec_to_other<glm::vec3>(m_pivot) ;
 
     if (!m_ortho_mode) {
         m_near = 0.05f;
-        m_far = 20000.0f * std::max(1.0f, (float)m_radius / 10000.f);
-        placeCamera(center - front * m_radius, front, up);
+        m_far = 20000.0f * std::max(1.0f, get_radius() / 10000.f);
+        placeCamera(getPos(), m_rotation);
     } else {
-        placeCamera(center - front * m_radius * 0.4f, front, up);
+        placeCamera(getPos(), m_rotation);
     }
 }
 
 void Camera::setResolution(int nx, int ny) {
     m_nx = nx;
     m_ny = ny;
-    m_proj = MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), inf_z_near);
 }
 void Camera::setResolutionInfo(bool block, int nx, int ny)
 {
@@ -129,30 +84,13 @@ bool Camera::is_locked_window() const {
 
 void Camera::focusCamera(float cx, float cy, float cz, float radius) {
     auto center = glm::vec3(cx, cy, cz);
-    placeCamera(center - m_lodfront * radius, m_lodfront, m_lodup);
-}
-void Camera::lookCamera(float cx, float cy, float cz, float theta, float phi, float radius, bool ortho_mode, float fov, float aperture, float focalPlaneDistance) {
-    m_zxx.cx = cx;
-    m_zxx.cy = cy;
-    m_zxx.cz = cz;
-    m_zxx.theta = theta;
-    m_zxx.phi = phi;
-    m_zxx.radius = radius;
-    m_zxx.fov = fov;
-    m_zxx.ortho_mode = ortho_mode;
-    m_zxx.aperture = aperture;
-    m_zxx.focalPlaneDistance = focalPlaneDistance;
-
-    m_ortho_mode = ortho_mode;
-    m_aperture = aperture;
-    this->focalPlaneDistance = focalPlaneDistance;
-
-    updateMatrix();
+    placeCamera(center - get_lodfront() * radius, m_rotation);
 }
 
 void Camera::set_program_uniforms(opengl::Program *pro) {
     pro->use();
-
+    auto m_view = get_view_matrix();
+    auto m_proj = get_proj_matrix();
     auto vp = m_proj * m_view;
     pro->set_uniform("mVP", vp);
     pro->set_uniform("mInvVP", glm::inverse(vp));
diff --git a/zenovis/src/bate/GraphicPrimitive.cpp b/zenovis/src/bate/GraphicPrimitive.cpp
index 2e6c376df5..8c35b8cb69 100644
--- a/zenovis/src/bate/GraphicPrimitive.cpp
+++ b/zenovis/src/bate/GraphicPrimitive.cpp
@@ -711,7 +711,7 @@ struct ZhxxGraphicPrimitive final : IGraphicDraw {
             triObj.prog->set_uniformi("mRenderWireframe", false);
             triObj.prog->set_uniformi("mCustomColor", custom_color);
             {
-                auto camera_center = scene->camera->m_lodcenter;
+                auto camera_center = scene->camera->m_pos;
                 triObj.prog->set_uniform("mCameraCenter", camera_center);
             }
 
diff --git a/zenovis/src/bate/GraphicRotateHandler.cpp b/zenovis/src/bate/GraphicRotateHandler.cpp
index 6216739bed..33bd4ab7a6 100644
--- a/zenovis/src/bate/GraphicRotateHandler.cpp
+++ b/zenovis/src/bate/GraphicRotateHandler.cpp
@@ -83,7 +83,7 @@ struct RotateHandler final : IGraphicHandler {
     }
 
     void draw() override {
-        auto dist = glm::distance(scene->camera->m_lodcenter, glm::vec3(center[0], center[1], center[2]));
+        auto dist = glm::distance(scene->camera->m_pos, glm::vec3(center[0], center[1], center[2]));
 
         bound = dist / 5.0f * scale;
 
diff --git a/zenovis/src/bate/GraphicScaleHandler.cpp b/zenovis/src/bate/GraphicScaleHandler.cpp
index c7f49a6e07..256cdd023c 100644
--- a/zenovis/src/bate/GraphicScaleHandler.cpp
+++ b/zenovis/src/bate/GraphicScaleHandler.cpp
@@ -86,7 +86,7 @@ struct ScaleHandler final : IGraphicHandler {
     }
 
     void draw() override {
-        auto dist = glm::distance(scene->camera->m_lodcenter, glm::vec3(center[0], center[1], center[2]));
+        auto dist = glm::distance(scene->camera->m_pos, glm::vec3(center[0], center[1], center[2]));
 
         bound = dist / 5.0f * scale;
 
@@ -142,7 +142,7 @@ struct ScaleHandler final : IGraphicHandler {
         }
         // xyz
         if (mode == INTERACT_NONE || mode == INTERACT_XYZ) {
-            const auto& view = scene->camera->m_view;
+            const auto& view = scene->camera->get_view_matrix();
             // http://www.opengl-tutorial.org/cn/intermediate-tutorials/billboards-particles/billboards/
             // always face camera
             // This is equivalent to mlutiplying (1,0,0) and (0,1,0) by inverse(ViewMatrix).
@@ -162,7 +162,7 @@ struct ScaleHandler final : IGraphicHandler {
         auto z_axis = glm::vec3(0, 0, 1);
 
         auto model_matrix = glm::translate(zeno::vec_to_other<glm::vec3>(center));
-        const auto& view = scene->camera->m_view;
+        const auto& view = scene->camera->get_view_matrix();
         
         float t;
 
diff --git a/zenovis/src/bate/GraphicTransHandler.cpp b/zenovis/src/bate/GraphicTransHandler.cpp
index 9315234c45..4e5a3712b8 100644
--- a/zenovis/src/bate/GraphicTransHandler.cpp
+++ b/zenovis/src/bate/GraphicTransHandler.cpp
@@ -79,7 +79,7 @@ struct TransHandler final : IGraphicHandler {
     }
 
     void draw() override {
-        auto dist = glm::distance(scene->camera->m_lodcenter, glm::vec3(center[0], center[1], center[2]));
+        auto dist = glm::distance(scene->camera->m_pos, glm::vec3(center[0], center[1], center[2]));
 
         bound = dist / 5.0f * scale;
 
diff --git a/zenovis/src/bate/HudGraphicGrid.cpp b/zenovis/src/bate/HudGraphicGrid.cpp
index 78215214c9..23d1e4e0f8 100644
--- a/zenovis/src/bate/HudGraphicGrid.cpp
+++ b/zenovis/src/bate/HudGraphicGrid.cpp
@@ -120,9 +120,9 @@ struct GraphicGrid final : IGraphicDraw {
         scene->camera->set_program_uniforms(prog);
 
         {
-            auto camera_radius = glm::length(scene->camera->m_lodcenter);
-            auto camera_center = scene->camera->m_lodcenter
-                + scene->camera->m_lodfront * camera_radius;
+            auto camera_radius = glm::length(scene->camera->m_pos);
+            auto camera_center = scene->camera->m_pos
+                + scene->camera->get_lodfront() * camera_radius;
             camera_radius *= scene->camera->m_fov / 45.f;
             float level = std::max(std::log(camera_radius) / std::log(5.0f) - 1.0f, -1.0f);
             auto grid_scale = std::pow(5.f, std::floor(level));
diff --git a/zenovis/src/bate/RenderEngineBate.cpp b/zenovis/src/bate/RenderEngineBate.cpp
index 0bda2c7c4d..8e01892518 100644
--- a/zenovis/src/bate/RenderEngineBate.cpp
+++ b/zenovis/src/bate/RenderEngineBate.cpp
@@ -67,6 +67,7 @@ struct RenderEngineBate : RenderEngine {
 //        }
         primHighlight->draw();
         if (scene->drawOptions->show_grid) {
+            glDepthMask(GL_FALSE);
             for (auto const &hudgra : hudGraphics) {
                 hudgra->draw();
             }
@@ -84,6 +85,7 @@ struct RenderEngineBate : RenderEngine {
                     *scene->camera = backup;
                 }
             }
+            glDepthMask(GL_TRUE);
         }
         if (!scene->selected.empty() && scene->drawOptions->handler) {
             CHECK_GL(glClear(GL_DEPTH_BUFFER_BIT));
@@ -92,7 +94,6 @@ struct RenderEngineBate : RenderEngine {
         if (!record) {
             fbr->unbind();
             fbr->draw_to_screen();
-            fbr->destroy_buffers();
         }
     }
 
@@ -110,6 +111,30 @@ struct RenderEngineBate : RenderEngine {
         primHighlight = nullptr;
         fbr = nullptr;
     }
+    std::optional<glm::vec3> getClickedPos(int x, int y) override {
+        auto depth = fbr->getDepth(x, y);
+        if (depth == 0) {
+            return {};
+        }
+//        zeno::log_info("depth: {}", depth);
+
+        auto fov = scene->camera->m_fov;
+        float cz = scene->camera->inf_z_near / depth;
+        auto w = scene->camera->m_nx;
+        auto h = scene->camera->m_ny;
+//        zeno::log_info("{} {} {} {}", x, y, w, h);
+//        zeno::log_info("fov: {}", fov);
+//        zeno::log_info("w: {}, h: {}", w, h);
+        auto u = (2.0 * x / w) - 1;
+        auto v = 1 - (2.0 * y / h);
+//        zeno::log_info("u: {}, v: {}", u, v);
+        auto cy = v * tan(glm::radians(fov) / 2) * cz;
+        auto cx = u * tan(glm::radians(fov) / 2) * w / h * cz;
+        glm::vec4 cc = {cx, cy, -cz, 1};
+        auto wc = glm::inverse(scene->camera->get_view_matrix()) * cc;
+        wc /= wc.w;
+        return glm::vec3(wc);
+    }
 };
 
 static auto definer = RenderManager::registerRenderEngine<RenderEngineBate>("bate");
diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp
index cbd63ed2c3..bfae7ae3af 100644
--- a/zenovis/src/optx/RenderEngineOptx.cpp
+++ b/zenovis/src/optx/RenderEngineOptx.cpp
@@ -931,6 +931,15 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy {
     bool meshNeedUpdate = true;
     bool matNeedUpdate = true;
     bool staticNeedUpdate = true;
+    std::optional<glm::vec3> getClickedPos(int x, int y) override {
+        glm::vec3 posWS = xinxinoptix::get_click_pos(x, y);
+        if (posWS == glm::vec3()) {
+            return {};
+        }
+        auto const &cam = *scene->camera;
+        posWS += cam.m_pos;
+        return posWS;
+    }
 
     auto setupState() {
         return std::tuple{
@@ -987,7 +996,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy {
         graphicsMan->load_shader_uniforms(scene->objectsMan->pairs());
     }
 
-#define MY_CAM_ID(cam) cam.m_nx, cam.m_ny, cam.m_lodup, cam.m_lodfront, cam.m_lodcenter, cam.m_fov, cam.focalPlaneDistance, cam.m_aperture
+#define MY_CAM_ID(cam) cam.m_nx, cam.m_ny, cam.m_rotation, cam.m_pos, cam.m_fov, cam.focalPlaneDistance, cam.m_aperture
 #define MY_SIZE_ID(cam) cam.m_nx, cam.m_ny
     std::optional<decltype(std::tuple{MY_CAM_ID(std::declval<Camera>())})> oldcamid;
     std::optional<decltype(std::tuple{MY_SIZE_ID(std::declval<Camera>())})> oldsizeid;
@@ -1120,9 +1129,9 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy {
 
         if (sizeNeedUpdate || camNeedUpdate) {
             zeno::log_debug("[zeno-optix] updating camera");
-
-            auto lodright = glm::normalize(glm::cross(cam.m_lodfront, cam.m_lodup));
-            auto lodup = glm::normalize(glm::cross(lodright, cam.m_lodfront));
+            auto lodright = cam.m_rotation * glm::vec3(1, 0, 0);
+            auto lodup = cam.m_rotation * glm::vec3(0, 1, 0);
+            auto lodfront = cam.m_rotation * glm::vec3(0, 0, -1);
 
             std::random_device rd;
             std::mt19937 gen(rd());
@@ -1131,7 +1140,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy {
             xinxinoptix::set_outside_random_number(dis(gen));
         
             xinxinoptix::set_perspective(glm::value_ptr(lodright), glm::value_ptr(lodup),
-                                        glm::value_ptr(cam.m_lodfront), glm::value_ptr(cam.m_lodcenter),
+                                        glm::value_ptr(lodfront), glm::value_ptr(cam.m_pos),
                                         cam.getAspect(), cam.m_fov, cam.focalPlaneDistance, cam.m_aperture);
             xinxinoptix::set_physical_camera_param(
                 cam.zOptixCameraSettingInfo.aperture,
diff --git a/zenovis/src/zhxx/RenderEngineZhxx.cpp b/zenovis/src/zhxx/RenderEngineZhxx.cpp
index f95e8f0649..df49189ed5 100644
--- a/zenovis/src/zhxx/RenderEngineZhxx.cpp
+++ b/zenovis/src/zhxx/RenderEngineZhxx.cpp
@@ -85,7 +85,6 @@ struct RenderEngineZhxx : RenderEngine, zeno::disable_copy {
         auto guard = setupState();
         auto const &cam = *scene->camera;
         auto const &opt = *scene->drawOptions;
-        auto const &zxx = cam.m_zxx;
 
         if (!giWasEnable && opt.enable_gi) {
             giNeedUpdate = true;
@@ -106,8 +105,8 @@ struct RenderEngineZhxx : RenderEngine, zeno::disable_copy {
         zenvis::setDOF(cam.m_dof);
         zenvis::setAperature(cam.m_aperture);
         zenvis::set_window_size(cam.m_nx, cam.m_ny);
-        zenvis::look_perspective(zxx.cx, zxx.cy, zxx.cz, zxx.theta,
-                zxx.phi, zxx.radius, zxx.fov, zxx.ortho_mode);
+//        zenvis::look_perspective(zxx.cx, zxx.cy, zxx.cz, zxx.theta,
+//                zxx.phi, zxx.radius, zxx.fov, zxx.ortho_mode);
         int targetFBO = 0;
         CHECK_GL(glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &targetFBO));
         CHECK_GL(glClearColor(scene->drawOptions->bgcolor.r, scene->drawOptions->bgcolor.g,
diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu
index b79153f4f0..4750b192ca 100644
--- a/zenovis/xinxinoptix/DeflMatShader.cu
+++ b/zenovis/xinxinoptix/DeflMatShader.cu
@@ -441,6 +441,7 @@ extern "C" __global__ void __closesthit__radiance()
     //MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs);
     MatOutput mats = optixDirectCall<MatOutput, cudaTextureObject_t[], float4*, const MatInput&>( rt_data->dc_index, rt_data->textures, rt_data->uniforms, attrs );
     prd->mask_value = mats.mask_value;
+    prd->click_pos = P;
 
     if (prd->test_distance) {
     
diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu
index 843ce5bf47..fdcbd92ca6 100644
--- a/zenovis/xinxinoptix/PTKernel.cu
+++ b/zenovis/xinxinoptix/PTKernel.cu
@@ -178,6 +178,7 @@ extern "C" __global__ void __raygen__rg()
     float3 tmp_normal{};
     unsigned int sobolseed = subframe_index;
     float3 mask_value = make_float3( 0.0f );
+    float3 click_pos = make_float3( 0.0f );
 
     do{
         // The center of each pixel is at fraction (0.5,0.5)
@@ -262,6 +263,7 @@ extern "C" __global__ void __raygen__rg()
         prd.direction = ray_direction;
         prd.samplePdf = 1.0f;
         prd.mask_value = make_float3( 0.0f );
+        prd.click_pos = make_float3( 0.0f );
 
         prd.depth = 0;
         prd.diffDepth = 0;
@@ -288,6 +290,7 @@ extern "C" __global__ void __raygen__rg()
         prd.alphaHit = false;
 
         traceRadiance(params.handle, ray_origin, ray_direction, _tmin_, prd.maxDistance, &prd, _mask_);
+        click_pos = prd.click_pos;
         float3 m = prd.mask_value;
         mask_value = mask_value + m;
 
@@ -430,9 +433,9 @@ extern "C" __global__ void __raygen__rg()
     params.accum_buffer_S[ image_index ] = make_float3( accum_color_s.x,accum_color_s.y, accum_color_s.z);
     params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z);
     params.accum_buffer_B[ image_index ] = float_to_half(accum_color_b.x);
-
     params.frame_buffer[ image_index ] = make_color ( accum_color );
     params.frame_buffer_M[ image_index ] = float3_to_half3(accum_mask);
+    params.frame_buffer_P[ image_index ] = float3_to_half3(click_pos);
 
     if (params.denoise) {
         params.albedo_buffer[ image_index ] = tmp_albedo;
diff --git a/zenovis/xinxinoptix/TraceStuff.h b/zenovis/xinxinoptix/TraceStuff.h
index 18808a7dec..794a91b3ea 100644
--- a/zenovis/xinxinoptix/TraceStuff.h
+++ b/zenovis/xinxinoptix/TraceStuff.h
@@ -105,6 +105,7 @@ struct RadiancePRD
     unsigned char adepth;
     bool         alphaHit;
     vec3         mask_value;
+    vec3         click_pos;
     unsigned char max_depth;
 
     uint16_t lightmask = EverythingMask;
diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp
index 6575430c19..f4ccbd84c2 100644
--- a/zenovis/xinxinoptix/optixPathTracer.cpp
+++ b/zenovis/xinxinoptix/optixPathTracer.cpp
@@ -299,6 +299,7 @@ struct PathTracerState
     raii<CUdeviceptr> accum_buffer_s;
     raii<CUdeviceptr> accum_buffer_t;
     raii<CUdeviceptr> accum_buffer_b;
+    raii<CUdeviceptr> frame_buffer_p;
     raii<CUdeviceptr> accum_buffer_m;
 
     raii<CUdeviceptr> finite_lights_ptr;
@@ -635,6 +636,10 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
         reinterpret_cast<void**>( &state.accum_buffer_m .reset()),
         params.width * params.height * sizeof( ushort3 )
             ) );
+    CUDA_CHECK( cudaMalloc(
+        reinterpret_cast<void**>( &state.frame_buffer_p .reset()),
+        params.width * params.height * sizeof( ushort3 )
+            ) );
     CUDA_CHECK( cudaMalloc(
         reinterpret_cast<void**>( &state.accum_buffer_b .reset()),
         params.width * params.height * sizeof( ushort1 )
@@ -657,6 +662,7 @@ static void handleResize( sutil::CUDAOutputBuffer<uchar4>& output_buffer, Params
     state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s;
     state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t;
     state.params.frame_buffer_M = (ushort3*)(CUdeviceptr)state.accum_buffer_m;
+    state.params.frame_buffer_P = (ushort3*)(CUdeviceptr)state.frame_buffer_p;
     state.params.accum_buffer_B = (ushort1*)(CUdeviceptr)state.accum_buffer_b;
     state.params.subframe_index = 0;
 }
@@ -3629,7 +3635,6 @@ void set_window_size(int nx, int ny) {
     camera_changed = true;
     resize_dirty = true;
 }
-
 void set_physical_camera_param(float aperture, float shutter_speed, float iso, bool aces, bool exposure) {
     state.params.physical_camera_aperture = aperture;
     state.params.physical_camera_shutter_speed = shutter_speed;
@@ -3732,6 +3737,16 @@ std::vector<float> optixgetimg_extra2(std::string name, int w, int h) {
             tex_data[i * 3 + 2] = v.z;
         }
     }
+    else if (name == "pos") {
+        std::vector<ushort3> temp_buffer(w * h);
+        cudaMemcpy(temp_buffer.data(), (void*)state.frame_buffer_p.handle, sizeof(ushort3) * temp_buffer.size(), cudaMemcpyDeviceToHost);
+        for (auto i = 0; i < temp_buffer.size(); i++) {
+            float3 v = toFloat(temp_buffer[i]);
+            tex_data[i * 3 + 0] = v.x;
+            tex_data[i * 3 + 1] = v.y;
+            tex_data[i * 3 + 2] = v.z;
+        }
+    }
     else if (name == "color") {
         cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_p.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
@@ -3776,6 +3791,9 @@ std::vector<half> optixgetimg_extra3(std::string name, int w, int h) {
     else if (name == "mask") {
         cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_m.handle, sizeof(half) * tex_data.size(), cudaMemcpyDeviceToHost);
     }
+    else if (name == "pos") {
+        cudaMemcpy(tex_data.data(), (void*)state.frame_buffer_p.handle, sizeof(half) * tex_data.size(), cudaMemcpyDeviceToHost);
+    }
     else if (name == "color") {
         std::vector<float> temp_buffer(w * h * 3);
         cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_p.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost);
@@ -3789,6 +3807,16 @@ std::vector<half> optixgetimg_extra3(std::string name, int w, int h) {
     zeno::image_flip_vertical((ushort3*)tex_data.data(), w, h);
     return tex_data;
 }
+
+glm::vec3 get_click_pos(int x, int y) {
+    int w = state.params.width;
+    int h = state.params.height;
+    auto frame_buffer_pos = optixgetimg_extra2("pos", w, h);
+    auto index = x + (h - 1 - y) * w;
+    auto posWS = ((glm::vec3*)frame_buffer_pos.data())[index];
+    return posWS;
+}
+
 static void save_exr(float3* ptr, int w, int h, std::string path) {
     std::vector<float3> data(w * h);
     std::copy_n(ptr, w * h, data.data());
diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h
index 636366da22..a5c4e83d75 100644
--- a/zenovis/xinxinoptix/optixPathTracer.h
+++ b/zenovis/xinxinoptix/optixPathTracer.h
@@ -161,6 +161,7 @@ struct Params
     ushort1*     accum_buffer_B;
     uchar4*      frame_buffer;
     ushort3*     frame_buffer_M;
+    ushort3*     frame_buffer_P;
 
     float3*      debug_buffer;
     float3*      albedo_buffer;
diff --git a/zenovis/xinxinoptix/xinxinoptixapi.h b/zenovis/xinxinoptix/xinxinoptixapi.h
index a75af3a313..51c54320ca 100644
--- a/zenovis/xinxinoptix/xinxinoptixapi.h
+++ b/zenovis/xinxinoptix/xinxinoptixapi.h
@@ -6,6 +6,7 @@
 #include <map>
 #include <set>
 
+#include <glm/glm.hpp>
 #include "optixSphere.h"
 #include "zeno/utils/vec.h"
 #include "zeno/types/LightObject.h"
@@ -66,6 +67,7 @@ void load_object(std::string const &key, std::string const &mtlid, const std::st
 void unload_object(std::string const &key);
 void load_inst(const std::string &key, const std::string &instID, const std::string &onbType, std::size_t numInsts, const float *pos, const float *nrm, const float *uv, const float *clr, const float *tang);
 void unload_inst(const std::string &key);
+glm::vec3 get_click_pos(int x, int y);
 
 struct LightDat {
     std::vector<float> v0;

From 3874253c6992bc9e06f026e460d1acbe1668d230 Mon Sep 17 00:00:00 2001
From: iaomw <iaomw@live.com>
Date: Tue, 2 Jul 2024 15:57:55 +0800
Subject: [PATCH 22/24] Texture BlockCompression (#1951)

* update stbi

* bc3 + bc4

* BCX
---
 zeno/include/zeno/types/TextureObject.h |     9 +
 zeno/src/nodes/mtl/MakeTexture.cpp      |     2 +
 zeno/src/nodes/mtl/ShaderTexture.cpp    |     4 +-
 zenovis/src/optx/RenderEngineOptx.cpp   |     8 +-
 zenovis/stbi/include/stb_dxt.h          |   719 ++
 zenovis/stbi/include/stb_image.h        |  3163 +++--
 zenovis/stbi/include/tinyexr.h          | 13315 ----------------------
 zenovis/stbi/src/stbi.c                 |     3 +
 zenovis/stbi/src/tinyexr.cpp            |     2 -
 zenovis/xinxinoptix/BCX.h               |    80 +
 zenovis/xinxinoptix/CMakeLists.txt      |     2 +
 zenovis/xinxinoptix/OptiXStuff.h        |   142 +-
 12 files changed, 3121 insertions(+), 14328 deletions(-)
 create mode 100644 zenovis/stbi/include/stb_dxt.h
 delete mode 100644 zenovis/stbi/include/tinyexr.h
 delete mode 100644 zenovis/stbi/src/tinyexr.cpp
 create mode 100644 zenovis/xinxinoptix/BCX.h

diff --git a/zeno/include/zeno/types/TextureObject.h b/zeno/include/zeno/types/TextureObject.h
index 6131e4db18..229d587707 100644
--- a/zeno/include/zeno/types/TextureObject.h
+++ b/zeno/include/zeno/types/TextureObject.h
@@ -35,6 +35,8 @@ namespace zeno
         TexFilterEnum minFilter;
         TexFilterEnum magFilter;
 
+        bool blockCompression;
+
         size_t serializeSize()
         {
             size_t size{0};
@@ -49,6 +51,7 @@ namespace zeno
             size += sizeof(minFilter);
             size += sizeof(magFilter);
 
+            size += sizeof(blockCompression);
             return size;
         }
 
@@ -75,6 +78,9 @@ namespace zeno
             memcpy(str.data() + i, &magFilter, sizeof(magFilter));
             i += sizeof(magFilter);
 
+            memcpy(str.data() + i, &blockCompression, sizeof(blockCompression));
+            i += sizeof(blockCompression);
+
             return str;
         }
         
@@ -103,6 +109,9 @@ namespace zeno
             memcpy(&(tex.magFilter), str.data() + i, sizeof(magFilter));
             i += sizeof(magFilter);
 
+            memcpy(&(tex.blockCompression), str.data() + i, sizeof(blockCompression));
+            i += sizeof(blockCompression);
+
             return tex;
         }
 
diff --git a/zeno/src/nodes/mtl/MakeTexture.cpp b/zeno/src/nodes/mtl/MakeTexture.cpp
index d5dd75e2c5..322f575bdd 100644
--- a/zeno/src/nodes/mtl/MakeTexture.cpp
+++ b/zeno/src/nodes/mtl/MakeTexture.cpp
@@ -88,6 +88,7 @@ namespace zeno
 
 #undef SET_TEX_FILTER
 
+			tex->blockCompression = get_input2<bool>("blockCompression");
 			set_output("tex", std::move(tex));
 		}
 	};
@@ -102,6 +103,7 @@ namespace zeno
 				{(std::string) "enum " + texWrapping, "wrapT", "REPEAT"},
 				{(std::string) "enum " + texFiltering, "minFilter", "LINEAR"},
 				{(std::string) "enum " + texFiltering, "magFilter", "LINEAR"},
+				{"bool", "blockCompression", "false"}
 			},
 			{
 				{"texture", "tex"},
diff --git a/zeno/src/nodes/mtl/ShaderTexture.cpp b/zeno/src/nodes/mtl/ShaderTexture.cpp
index 59e3692122..4ec10b750a 100644
--- a/zeno/src/nodes/mtl/ShaderTexture.cpp
+++ b/zeno/src/nodes/mtl/ShaderTexture.cpp
@@ -262,6 +262,7 @@ struct SmartTexture2D : ShaderNodeClone<SmartTexture2D>
             stbi_flip_vertically_on_write(false);
             stbi_write_png(tex->path.c_str(), width, height, 3, col.data(), 0);
         }
+        tex->blockCompression = get_input2<bool>("blockCompression");
 
     #define SET_TEX_WRAP(TEX, WRAP)                                    \
         if (WRAP == "REPEAT")                                          \
@@ -351,7 +352,8 @@ ZENDEFNODE(SmartTexture2D, {
         {"vec2f", "uvtiling", "1,1"},
         {"vec4f", "value", "0,0,0,0"},
         {"enum float vec2 vec3 vec4 R G B A", "type", "vec3"},
-        {"enum raw srgb normal_map", "post_process", "raw"}
+        {"enum raw srgb normal_map", "post_process", "raw"},
+        {"bool", "blockCompression", "false"}
     },
     {
         {"shader", "out"},
diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp
index bfae7ae3af..3b808a3401 100644
--- a/zenovis/src/optx/RenderEngineOptx.cpp
+++ b/zenovis/src/optx/RenderEngineOptx.cpp
@@ -1227,14 +1227,14 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy {
 
             // Auto unload unused texure
             {
-                std::set<std::string> realNeedTexPaths;
+                std::map<std::string, bool> realNeedTexPaths;
                 for(auto const &[matkey, mtldet] : matMap) {
                     if (mtldet->parameters.find("vol") != std::string::npos
                         || cachedMeshesMaterials.count(mtldet->mtlidkey) > 0
                         || cachedSphereMaterials.count(mtldet->mtlidkey) > 0) 
                     {
                         for(auto& tex: mtldet->tex2Ds) {
-                            realNeedTexPaths.insert(tex->path);
+                            realNeedTexPaths.insert( {tex->path, tex->blockCompression} );
                         }
                     }
                     
@@ -1245,7 +1245,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy {
                     //     realNeedTexPaths.emplace_back(ld.profileKey);
                     // }
                     if (ld.textureKey.size()) {
-                        realNeedTexPaths.insert(ld.textureKey);
+                        realNeedTexPaths.insert( {ld.textureKey, false});
                     }
                 }
                 std::vector<std::string> needToRemoveTexPaths;
@@ -1265,7 +1265,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy {
                     OptixUtil::removeTexture(need_remove_tex);
                 }
                 for (const auto& realNeedTexPath: realNeedTexPaths) {
-                    OptixUtil::addTexture(realNeedTexPath);
+                    OptixUtil::addTexture(realNeedTexPath.first, realNeedTexPath.second);
                 }
             }
             for(auto const &[matkey, mtldet] : matMap)
diff --git a/zenovis/stbi/include/stb_dxt.h b/zenovis/stbi/include/stb_dxt.h
new file mode 100644
index 0000000000..6150a87f08
--- /dev/null
+++ b/zenovis/stbi/include/stb_dxt.h
@@ -0,0 +1,719 @@
+// stb_dxt.h - v1.12 - DXT1/DXT5 compressor - public domain
+// original by fabian "ryg" giesen - ported to C by stb
+// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
+//
+// USAGE:
+//   call stb_compress_dxt_block() for every block (you must pad)
+//     source should be a 4x4 block of RGBA data in row-major order;
+//     Alpha channel is not stored if you specify alpha=0 (but you
+//     must supply some constant alpha in the alpha channel).
+//     You can turn on dithering and "high quality" using mode.
+//
+// version history:
+//   v1.12  - (ryg) fix bug in single-color table generator
+//   v1.11  - (ryg) avoid racy global init, better single-color tables, remove dither
+//   v1.10  - (i.c) various small quality improvements
+//   v1.09  - (stb) update documentation re: surprising alpha channel requirement
+//   v1.08  - (stb) fix bug in dxt-with-alpha block
+//   v1.07  - (stb) bc4; allow not using libc; add STB_DXT_STATIC
+//   v1.06  - (stb) fix to known-broken 1.05
+//   v1.05  - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
+//   v1.04  - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
+//            single color match fix (allow for inexact color interpolation);
+//            optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
+//   v1.03  - (stb) endianness support
+//   v1.02  - (stb) fix alpha encoding bug
+//   v1.01  - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
+//   v1.00  - (stb) first release
+//
+// contributors:
+//   Rich Geldreich (more accurate index selection)
+//   Kevin Schmidt (#defines for "freestanding" compilation)
+//   github:ppiastucki (BC4 support)
+//   Ignacio Castano - improve DXT endpoint quantization
+//   Alan Hickman - static table initialization
+//
+// LICENSE
+//
+//   See end of file for license information.
+
+#ifndef STB_INCLUDE_STB_DXT_H
+#define STB_INCLUDE_STB_DXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef STB_DXT_STATIC
+#define STBDDEF static
+#else
+#define STBDDEF extern
+#endif
+
+// compression mode (bitflags)
+#define STB_DXT_NORMAL    0
+#define STB_DXT_DITHER    1   // use dithering. was always dubious, now deprecated. does nothing!
+#define STB_DXT_HIGHQUAL  2   // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
+
+STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
+STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
+STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
+
+#define STB_COMPRESS_DXT_BLOCK
+
+#ifdef __cplusplus
+}
+#endif
+#endif // STB_INCLUDE_STB_DXT_H
+
+#ifdef STB_DXT_IMPLEMENTATION
+
+// configuration options for DXT encoder. set them in the project/makefile or just define
+// them at the top.
+
+// STB_DXT_USE_ROUNDING_BIAS
+//     use a rounding bias during color interpolation. this is closer to what "ideal"
+//     interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
+//     implicitly had this turned on.
+//
+//     in case you're targeting a specific type of hardware (e.g. console programmers):
+//     NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
+//     to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
+//     you also see "(a*5 + b*3) / 8" on some old GPU designs.
+// #define STB_DXT_USE_ROUNDING_BIAS
+
+#include <stdlib.h>
+
+#if !defined(STBD_FABS)
+#include <math.h>
+#endif
+
+#ifndef STBD_FABS
+#define STBD_FABS(x)          fabs(x)
+#endif
+
+static const unsigned char stb__OMatch5[256][2] = {
+   {  0,  0 }, {  0,  0 }, {  0,  1 }, {  0,  1 }, {  1,  0 }, {  1,  0 }, {  1,  0 }, {  1,  1 },
+   {  1,  1 }, {  1,  1 }, {  1,  2 }, {  0,  4 }, {  2,  1 }, {  2,  1 }, {  2,  1 }, {  2,  2 },
+   {  2,  2 }, {  2,  2 }, {  2,  3 }, {  1,  5 }, {  3,  2 }, {  3,  2 }, {  4,  0 }, {  3,  3 },
+   {  3,  3 }, {  3,  3 }, {  3,  4 }, {  3,  4 }, {  3,  4 }, {  3,  5 }, {  4,  3 }, {  4,  3 },
+   {  5,  2 }, {  4,  4 }, {  4,  4 }, {  4,  5 }, {  4,  5 }, {  5,  4 }, {  5,  4 }, {  5,  4 },
+   {  6,  3 }, {  5,  5 }, {  5,  5 }, {  5,  6 }, {  4,  8 }, {  6,  5 }, {  6,  5 }, {  6,  5 },
+   {  6,  6 }, {  6,  6 }, {  6,  6 }, {  6,  7 }, {  5,  9 }, {  7,  6 }, {  7,  6 }, {  8,  4 },
+   {  7,  7 }, {  7,  7 }, {  7,  7 }, {  7,  8 }, {  7,  8 }, {  7,  8 }, {  7,  9 }, {  8,  7 },
+   {  8,  7 }, {  9,  6 }, {  8,  8 }, {  8,  8 }, {  8,  9 }, {  8,  9 }, {  9,  8 }, {  9,  8 },
+   {  9,  8 }, { 10,  7 }, {  9,  9 }, {  9,  9 }, {  9, 10 }, {  8, 12 }, { 10,  9 }, { 10,  9 },
+   { 10,  9 }, { 10, 10 }, { 10, 10 }, { 10, 10 }, { 10, 11 }, {  9, 13 }, { 11, 10 }, { 11, 10 },
+   { 12,  8 }, { 11, 11 }, { 11, 11 }, { 11, 11 }, { 11, 12 }, { 11, 12 }, { 11, 12 }, { 11, 13 },
+   { 12, 11 }, { 12, 11 }, { 13, 10 }, { 12, 12 }, { 12, 12 }, { 12, 13 }, { 12, 13 }, { 13, 12 },
+   { 13, 12 }, { 13, 12 }, { 14, 11 }, { 13, 13 }, { 13, 13 }, { 13, 14 }, { 12, 16 }, { 14, 13 },
+   { 14, 13 }, { 14, 13 }, { 14, 14 }, { 14, 14 }, { 14, 14 }, { 14, 15 }, { 13, 17 }, { 15, 14 },
+   { 15, 14 }, { 16, 12 }, { 15, 15 }, { 15, 15 }, { 15, 15 }, { 15, 16 }, { 15, 16 }, { 15, 16 },
+   { 15, 17 }, { 16, 15 }, { 16, 15 }, { 17, 14 }, { 16, 16 }, { 16, 16 }, { 16, 17 }, { 16, 17 },
+   { 17, 16 }, { 17, 16 }, { 17, 16 }, { 18, 15 }, { 17, 17 }, { 17, 17 }, { 17, 18 }, { 16, 20 },
+   { 18, 17 }, { 18, 17 }, { 18, 17 }, { 18, 18 }, { 18, 18 }, { 18, 18 }, { 18, 19 }, { 17, 21 },
+   { 19, 18 }, { 19, 18 }, { 20, 16 }, { 19, 19 }, { 19, 19 }, { 19, 19 }, { 19, 20 }, { 19, 20 },
+   { 19, 20 }, { 19, 21 }, { 20, 19 }, { 20, 19 }, { 21, 18 }, { 20, 20 }, { 20, 20 }, { 20, 21 },
+   { 20, 21 }, { 21, 20 }, { 21, 20 }, { 21, 20 }, { 22, 19 }, { 21, 21 }, { 21, 21 }, { 21, 22 },
+   { 20, 24 }, { 22, 21 }, { 22, 21 }, { 22, 21 }, { 22, 22 }, { 22, 22 }, { 22, 22 }, { 22, 23 },
+   { 21, 25 }, { 23, 22 }, { 23, 22 }, { 24, 20 }, { 23, 23 }, { 23, 23 }, { 23, 23 }, { 23, 24 },
+   { 23, 24 }, { 23, 24 }, { 23, 25 }, { 24, 23 }, { 24, 23 }, { 25, 22 }, { 24, 24 }, { 24, 24 },
+   { 24, 25 }, { 24, 25 }, { 25, 24 }, { 25, 24 }, { 25, 24 }, { 26, 23 }, { 25, 25 }, { 25, 25 },
+   { 25, 26 }, { 24, 28 }, { 26, 25 }, { 26, 25 }, { 26, 25 }, { 26, 26 }, { 26, 26 }, { 26, 26 },
+   { 26, 27 }, { 25, 29 }, { 27, 26 }, { 27, 26 }, { 28, 24 }, { 27, 27 }, { 27, 27 }, { 27, 27 },
+   { 27, 28 }, { 27, 28 }, { 27, 28 }, { 27, 29 }, { 28, 27 }, { 28, 27 }, { 29, 26 }, { 28, 28 },
+   { 28, 28 }, { 28, 29 }, { 28, 29 }, { 29, 28 }, { 29, 28 }, { 29, 28 }, { 30, 27 }, { 29, 29 },
+   { 29, 29 }, { 29, 30 }, { 29, 30 }, { 30, 29 }, { 30, 29 }, { 30, 29 }, { 30, 30 }, { 30, 30 },
+   { 30, 30 }, { 30, 31 }, { 30, 31 }, { 31, 30 }, { 31, 30 }, { 31, 30 }, { 31, 31 }, { 31, 31 },
+};
+static const unsigned char stb__OMatch6[256][2] = {
+   {  0,  0 }, {  0,  1 }, {  1,  0 }, {  1,  1 }, {  1,  1 }, {  1,  2 }, {  2,  1 }, {  2,  2 },
+   {  2,  2 }, {  2,  3 }, {  3,  2 }, {  3,  3 }, {  3,  3 }, {  3,  4 }, {  4,  3 }, {  4,  4 },
+   {  4,  4 }, {  4,  5 }, {  5,  4 }, {  5,  5 }, {  5,  5 }, {  5,  6 }, {  6,  5 }, {  6,  6 },
+   {  6,  6 }, {  6,  7 }, {  7,  6 }, {  7,  7 }, {  7,  7 }, {  7,  8 }, {  8,  7 }, {  8,  8 },
+   {  8,  8 }, {  8,  9 }, {  9,  8 }, {  9,  9 }, {  9,  9 }, {  9, 10 }, { 10,  9 }, { 10, 10 },
+   { 10, 10 }, { 10, 11 }, { 11, 10 }, {  8, 16 }, { 11, 11 }, { 11, 12 }, { 12, 11 }, {  9, 17 },
+   { 12, 12 }, { 12, 13 }, { 13, 12 }, { 11, 16 }, { 13, 13 }, { 13, 14 }, { 14, 13 }, { 12, 17 },
+   { 14, 14 }, { 14, 15 }, { 15, 14 }, { 14, 16 }, { 15, 15 }, { 15, 16 }, { 16, 14 }, { 16, 15 },
+   { 17, 14 }, { 16, 16 }, { 16, 17 }, { 17, 16 }, { 18, 15 }, { 17, 17 }, { 17, 18 }, { 18, 17 },
+   { 20, 14 }, { 18, 18 }, { 18, 19 }, { 19, 18 }, { 21, 15 }, { 19, 19 }, { 19, 20 }, { 20, 19 },
+   { 20, 20 }, { 20, 20 }, { 20, 21 }, { 21, 20 }, { 21, 21 }, { 21, 21 }, { 21, 22 }, { 22, 21 },
+   { 22, 22 }, { 22, 22 }, { 22, 23 }, { 23, 22 }, { 23, 23 }, { 23, 23 }, { 23, 24 }, { 24, 23 },
+   { 24, 24 }, { 24, 24 }, { 24, 25 }, { 25, 24 }, { 25, 25 }, { 25, 25 }, { 25, 26 }, { 26, 25 },
+   { 26, 26 }, { 26, 26 }, { 26, 27 }, { 27, 26 }, { 24, 32 }, { 27, 27 }, { 27, 28 }, { 28, 27 },
+   { 25, 33 }, { 28, 28 }, { 28, 29 }, { 29, 28 }, { 27, 32 }, { 29, 29 }, { 29, 30 }, { 30, 29 },
+   { 28, 33 }, { 30, 30 }, { 30, 31 }, { 31, 30 }, { 30, 32 }, { 31, 31 }, { 31, 32 }, { 32, 30 },
+   { 32, 31 }, { 33, 30 }, { 32, 32 }, { 32, 33 }, { 33, 32 }, { 34, 31 }, { 33, 33 }, { 33, 34 },
+   { 34, 33 }, { 36, 30 }, { 34, 34 }, { 34, 35 }, { 35, 34 }, { 37, 31 }, { 35, 35 }, { 35, 36 },
+   { 36, 35 }, { 36, 36 }, { 36, 36 }, { 36, 37 }, { 37, 36 }, { 37, 37 }, { 37, 37 }, { 37, 38 },
+   { 38, 37 }, { 38, 38 }, { 38, 38 }, { 38, 39 }, { 39, 38 }, { 39, 39 }, { 39, 39 }, { 39, 40 },
+   { 40, 39 }, { 40, 40 }, { 40, 40 }, { 40, 41 }, { 41, 40 }, { 41, 41 }, { 41, 41 }, { 41, 42 },
+   { 42, 41 }, { 42, 42 }, { 42, 42 }, { 42, 43 }, { 43, 42 }, { 40, 48 }, { 43, 43 }, { 43, 44 },
+   { 44, 43 }, { 41, 49 }, { 44, 44 }, { 44, 45 }, { 45, 44 }, { 43, 48 }, { 45, 45 }, { 45, 46 },
+   { 46, 45 }, { 44, 49 }, { 46, 46 }, { 46, 47 }, { 47, 46 }, { 46, 48 }, { 47, 47 }, { 47, 48 },
+   { 48, 46 }, { 48, 47 }, { 49, 46 }, { 48, 48 }, { 48, 49 }, { 49, 48 }, { 50, 47 }, { 49, 49 },
+   { 49, 50 }, { 50, 49 }, { 52, 46 }, { 50, 50 }, { 50, 51 }, { 51, 50 }, { 53, 47 }, { 51, 51 },
+   { 51, 52 }, { 52, 51 }, { 52, 52 }, { 52, 52 }, { 52, 53 }, { 53, 52 }, { 53, 53 }, { 53, 53 },
+   { 53, 54 }, { 54, 53 }, { 54, 54 }, { 54, 54 }, { 54, 55 }, { 55, 54 }, { 55, 55 }, { 55, 55 },
+   { 55, 56 }, { 56, 55 }, { 56, 56 }, { 56, 56 }, { 56, 57 }, { 57, 56 }, { 57, 57 }, { 57, 57 },
+   { 57, 58 }, { 58, 57 }, { 58, 58 }, { 58, 58 }, { 58, 59 }, { 59, 58 }, { 59, 59 }, { 59, 59 },
+   { 59, 60 }, { 60, 59 }, { 60, 60 }, { 60, 60 }, { 60, 61 }, { 61, 60 }, { 61, 61 }, { 61, 61 },
+   { 61, 62 }, { 62, 61 }, { 62, 62 }, { 62, 62 }, { 62, 63 }, { 63, 62 }, { 63, 63 }, { 63, 63 },
+};
+
+static int stb__Mul8Bit(int a, int b)
+{
+  int t = a*b + 128;
+  return (t + (t >> 8)) >> 8;
+}
+
+static void stb__From16Bit(unsigned char *out, unsigned short v)
+{
+   int rv = (v & 0xf800) >> 11;
+   int gv = (v & 0x07e0) >>  5;
+   int bv = (v & 0x001f) >>  0;
+
+   // expand to 8 bits via bit replication
+   out[0] = (rv * 33) >> 2;
+   out[1] = (gv * 65) >> 4;
+   out[2] = (bv * 33) >> 2;
+   out[3] = 0;
+}
+
+static unsigned short stb__As16Bit(int r, int g, int b)
+{
+   return (unsigned short)((stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31));
+}
+
+// linear interpolation at 1/3 point between a and b, using desired rounding type
+static int stb__Lerp13(int a, int b)
+{
+#ifdef STB_DXT_USE_ROUNDING_BIAS
+   // with rounding bias
+   return a + stb__Mul8Bit(b-a, 0x55);
+#else
+   // without rounding bias
+   // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
+   return (2*a + b) / 3;
+#endif
+}
+
+// lerp RGB color
+static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
+{
+   out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]);
+   out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]);
+   out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]);
+}
+
+/****************************************************************************/
+
+static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
+{
+   stb__From16Bit(color+ 0, c0);
+   stb__From16Bit(color+ 4, c1);
+   stb__Lerp13RGB(color+ 8, color+0, color+4);
+   stb__Lerp13RGB(color+12, color+4, color+0);
+}
+
+// The color matching function
+static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color)
+{
+   unsigned int mask = 0;
+   int dirr = color[0*4+0] - color[1*4+0];
+   int dirg = color[0*4+1] - color[1*4+1];
+   int dirb = color[0*4+2] - color[1*4+2];
+   int dots[16];
+   int stops[4];
+   int i;
+   int c0Point, halfPoint, c3Point;
+
+   for(i=0;i<16;i++)
+      dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
+
+   for(i=0;i<4;i++)
+      stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
+
+   // think of the colors as arranged on a line; project point onto that line, then choose
+   // next color out of available ones. we compute the crossover points for "best color in top
+   // half"/"best in bottom half" and then the same inside that subinterval.
+   //
+   // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
+   // but it's very close and a lot faster.
+   // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
+
+   c0Point   = (stops[1] + stops[3]);
+   halfPoint = (stops[3] + stops[2]);
+   c3Point   = (stops[2] + stops[0]);
+
+   for (i=15;i>=0;i--) {
+      int dot = dots[i]*2;
+      mask <<= 2;
+
+      if(dot < halfPoint)
+         mask |= (dot < c0Point) ? 1 : 3;
+      else
+         mask |= (dot < c3Point) ? 2 : 0;
+   }
+
+   return mask;
+}
+
+// The color optimization function. (Clever code, part 1)
+static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
+{
+  int mind,maxd;
+  unsigned char *minp, *maxp;
+  double magn;
+  int v_r,v_g,v_b;
+  static const int nIterPower = 4;
+  float covf[6],vfr,vfg,vfb;
+
+  // determine color distribution
+  int cov[6];
+  int mu[3],min[3],max[3];
+  int ch,i,iter;
+
+  for(ch=0;ch<3;ch++)
+  {
+    const unsigned char *bp = ((const unsigned char *) block) + ch;
+    int muv,minv,maxv;
+
+    muv = minv = maxv = bp[0];
+    for(i=4;i<64;i+=4)
+    {
+      muv += bp[i];
+      if (bp[i] < minv) minv = bp[i];
+      else if (bp[i] > maxv) maxv = bp[i];
+    }
+
+    mu[ch] = (muv + 8) >> 4;
+    min[ch] = minv;
+    max[ch] = maxv;
+  }
+
+  // determine covariance matrix
+  for (i=0;i<6;i++)
+     cov[i] = 0;
+
+  for (i=0;i<16;i++)
+  {
+    int r = block[i*4+0] - mu[0];
+    int g = block[i*4+1] - mu[1];
+    int b = block[i*4+2] - mu[2];
+
+    cov[0] += r*r;
+    cov[1] += r*g;
+    cov[2] += r*b;
+    cov[3] += g*g;
+    cov[4] += g*b;
+    cov[5] += b*b;
+  }
+
+  // convert covariance matrix to float, find principal axis via power iter
+  for(i=0;i<6;i++)
+    covf[i] = cov[i] / 255.0f;
+
+  vfr = (float) (max[0] - min[0]);
+  vfg = (float) (max[1] - min[1]);
+  vfb = (float) (max[2] - min[2]);
+
+  for(iter=0;iter<nIterPower;iter++)
+  {
+    float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
+    float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
+    float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
+
+    vfr = r;
+    vfg = g;
+    vfb = b;
+  }
+
+  magn = STBD_FABS(vfr);
+  if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
+  if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
+
+   if(magn < 4.0f) { // too small, default to luminance
+      v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
+      v_g = 587;
+      v_b = 114;
+   } else {
+      magn = 512.0 / magn;
+      v_r = (int) (vfr * magn);
+      v_g = (int) (vfg * magn);
+      v_b = (int) (vfb * magn);
+   }
+
+   minp = maxp = block;
+   mind = maxd = block[0]*v_r + block[1]*v_g + block[2]*v_b;
+   // Pick colors at extreme points
+   for(i=1;i<16;i++)
+   {
+      int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
+
+      if (dot < mind) {
+         mind = dot;
+         minp = block+i*4;
+      }
+
+      if (dot > maxd) {
+         maxd = dot;
+         maxp = block+i*4;
+      }
+   }
+
+   *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
+   *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
+}
+
+static const float stb__midpoints5[32] = {
+   0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
+   0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f
+};
+
+static const float stb__midpoints6[64] = {
+   0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f, 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f,
+   0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f, 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f,
+   0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f, 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f,
+   0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f, 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f
+};
+
+static unsigned short stb__Quantize5(float x)
+{
+   unsigned short q;
+   x = x < 0 ? 0 : x > 1 ? 1 : x;  // saturate
+   q = (unsigned short)(x * 31);
+   q += (x > stb__midpoints5[q]);
+   return q;
+}
+
+static unsigned short stb__Quantize6(float x)
+{
+   unsigned short q;
+   x = x < 0 ? 0 : x > 1 ? 1 : x;  // saturate
+   q = (unsigned short)(x * 63);
+   q += (x > stb__midpoints6[q]);
+   return q;
+}
+
+// The refinement function. (Clever code, part 2)
+// Tries to optimize colors to suit block contents better.
+// (By solving a least squares system via normal equations+Cramer's rule)
+static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
+{
+   static const int w1Tab[4] = { 3,0,2,1 };
+   static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
+   // ^some magic to save a lot of multiplies in the accumulating loop...
+   // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
+
+   float f;
+   unsigned short oldMin, oldMax, min16, max16;
+   int i, akku = 0, xx,xy,yy;
+   int At1_r,At1_g,At1_b;
+   int At2_r,At2_g,At2_b;
+   unsigned int cm = mask;
+
+   oldMin = *pmin16;
+   oldMax = *pmax16;
+
+   if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
+   {
+      // yes, linear system would be singular; solve using optimal
+      // single-color match on average color
+      int r = 8, g = 8, b = 8;
+      for (i=0;i<16;++i) {
+         r += block[i*4+0];
+         g += block[i*4+1];
+         b += block[i*4+2];
+      }
+
+      r >>= 4; g >>= 4; b >>= 4;
+
+      max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
+      min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
+   } else {
+      At1_r = At1_g = At1_b = 0;
+      At2_r = At2_g = At2_b = 0;
+      for (i=0;i<16;++i,cm>>=2) {
+         int step = cm&3;
+         int w1 = w1Tab[step];
+         int r = block[i*4+0];
+         int g = block[i*4+1];
+         int b = block[i*4+2];
+
+         akku    += prods[step];
+         At1_r   += w1*r;
+         At1_g   += w1*g;
+         At1_b   += w1*b;
+         At2_r   += r;
+         At2_g   += g;
+         At2_b   += b;
+      }
+
+      At2_r = 3*At2_r - At1_r;
+      At2_g = 3*At2_g - At1_g;
+      At2_b = 3*At2_b - At1_b;
+
+      // extract solutions and decide solvability
+      xx = akku >> 16;
+      yy = (akku >> 8) & 0xff;
+      xy = (akku >> 0) & 0xff;
+
+      f = 3.0f / 255.0f / (xx*yy - xy*xy);
+
+      max16 =  stb__Quantize5((At1_r*yy - At2_r * xy) * f) << 11;
+      max16 |= stb__Quantize6((At1_g*yy - At2_g * xy) * f) << 5;
+      max16 |= stb__Quantize5((At1_b*yy - At2_b * xy) * f) << 0;
+
+      min16 =  stb__Quantize5((At2_r*xx - At1_r * xy) * f) << 11;
+      min16 |= stb__Quantize6((At2_g*xx - At1_g * xy) * f) << 5;
+      min16 |= stb__Quantize5((At2_b*xx - At1_b * xy) * f) << 0;
+   }
+
+   *pmin16 = min16;
+   *pmax16 = max16;
+   return oldMin != min16 || oldMax != max16;
+}
+
+// Color block compression
+static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
+{
+   unsigned int mask;
+   int i;
+   int refinecount;
+   unsigned short max16, min16;
+   unsigned char color[4*4];
+
+   refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
+
+   // check if block is constant
+   for (i=1;i<16;i++)
+      if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
+         break;
+
+   if(i == 16) { // constant color
+      int r = block[0], g = block[1], b = block[2];
+      mask  = 0xaaaaaaaa;
+      max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
+      min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
+   } else {
+      // first step: PCA+map along principal axis
+      stb__OptimizeColorsBlock(block,&max16,&min16);
+      if (max16 != min16) {
+         stb__EvalColors(color,max16,min16);
+         mask = stb__MatchColorsBlock(block,color);
+      } else
+         mask = 0;
+
+      // third step: refine (multiple times if requested)
+      for (i=0;i<refinecount;i++) {
+         unsigned int lastmask = mask;
+
+         if (stb__RefineBlock(block,&max16,&min16,mask)) {
+            if (max16 != min16) {
+               stb__EvalColors(color,max16,min16);
+               mask = stb__MatchColorsBlock(block,color);
+            } else {
+               mask = 0;
+               break;
+            }
+         }
+
+         if(mask == lastmask)
+            break;
+      }
+  }
+
+  // write the color block
+  if(max16 < min16)
+  {
+     unsigned short t = min16;
+     min16 = max16;
+     max16 = t;
+     mask ^= 0x55555555;
+  }
+
+  dest[0] = (unsigned char) (max16);
+  dest[1] = (unsigned char) (max16 >> 8);
+  dest[2] = (unsigned char) (min16);
+  dest[3] = (unsigned char) (min16 >> 8);
+  dest[4] = (unsigned char) (mask);
+  dest[5] = (unsigned char) (mask >> 8);
+  dest[6] = (unsigned char) (mask >> 16);
+  dest[7] = (unsigned char) (mask >> 24);
+}
+
+// Alpha block compression (this is easy for a change)
+static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src, int stride)
+{
+   int i,dist,bias,dist4,dist2,bits,mask;
+
+   // find min/max color
+   int mn,mx;
+   mn = mx = src[0];
+
+   for (i=1;i<16;i++)
+   {
+      if (src[i*stride] < mn) mn = src[i*stride];
+      else if (src[i*stride] > mx) mx = src[i*stride];
+   }
+
+   // encode them
+   dest[0] = (unsigned char)mx;
+   dest[1] = (unsigned char)mn;
+   dest += 2;
+
+   // determine bias and emit color indices
+   // given the choice of mx/mn, these indices are optimal:
+   // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
+   dist = mx-mn;
+   dist4 = dist*4;
+   dist2 = dist*2;
+   bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
+   bias -= mn * 7;
+   bits = 0,mask=0;
+
+   for (i=0;i<16;i++) {
+      int a = src[i*stride]*7 + bias;
+      int ind,t;
+
+      // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
+      t = (a >= dist4) ? -1 : 0; ind =  t & 4; a -= dist4 & t;
+      t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
+      ind += (a >= dist);
+
+      // turn linear scale into DXT index (0/1 are extremal pts)
+      ind = -ind & 7;
+      ind ^= (2 > ind);
+
+      // write index
+      mask |= ind << bits;
+      if((bits += 3) >= 8) {
+         *dest++ = (unsigned char)mask;
+         mask >>= 8;
+         bits -= 8;
+      }
+   }
+}
+
+void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
+{
+   unsigned char data[16][4];
+   if (alpha) {
+      int i;
+      stb__CompressAlphaBlock(dest,(unsigned char*) src+3, 4);
+      dest += 8;
+      // make a new copy of the data in which alpha is opaque,
+      // because code uses a fast test for color constancy
+      memcpy(data, src, 4*16);
+      for (i=0; i < 16; ++i)
+         data[i][3] = 255;
+      src = &data[0][0];
+   }
+
+   stb__CompressColorBlock(dest,(unsigned char*) src,mode);
+}
+
+void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
+{
+   stb__CompressAlphaBlock(dest,(unsigned char*) src, 1);
+}
+
+void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
+{
+   stb__CompressAlphaBlock(dest,(unsigned char*) src,2);
+   stb__CompressAlphaBlock(dest + 8,(unsigned char*) src+1,2);
+}
+#endif // STB_DXT_IMPLEMENTATION
+
+// Compile with STB_DXT_IMPLEMENTATION and STB_DXT_GENERATE_TABLES
+// defined to generate the tables above.
+#ifdef STB_DXT_GENERATE_TABLES
+#include <stdio.h>
+
+int main()
+{
+   int i, j;
+   const char *omatch_names[] = { "stb__OMatch5", "stb__OMatch6" };
+   int dequant_mults[2] = { 33*4, 65 }; // .4 fixed-point dequant multipliers
+
+   // optimal endpoint tables
+   for (i = 0; i < 2; ++i) {
+      int dequant = dequant_mults[i];
+      int size = i ? 64 : 32;
+      printf("static const unsigned char %s[256][2] = {\n", omatch_names[i]);
+      for (int j = 0; j < 256; ++j) {
+         int mn, mx;
+         int best_mn = 0, best_mx = 0;
+         int best_err = 256 * 100;
+         for (mn=0;mn<size;mn++) {
+            for (mx=0;mx<size;mx++) {
+               int mine = (mn * dequant) >> 4;
+               int maxe = (mx * dequant) >> 4;
+               int err = abs(stb__Lerp13(maxe, mine) - j) * 100;
+
+               // DX10 spec says that interpolation must be within 3% of "correct" result,
+               // add this as error term. Normally we'd expect a random distribution of
+               // +-1.5% error, but nowhere in the spec does it say that the error has to be
+               // unbiased - better safe than sorry.
+               err += abs(maxe - mine) * 3;
+
+               if(err < best_err) {
+                  best_mn = mn;
+                  best_mx = mx;
+                  best_err = err;
+               }
+            }
+         }
+         if ((j % 8) == 0) printf("  "); // 2 spaces, third is done below
+         printf(" { %2d, %2d },", best_mx, best_mn);
+         if ((j % 8) == 7) printf("\n");
+      }
+      printf("};\n");
+   }
+
+   return 0;
+}
+#endif
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/zenovis/stbi/include/stb_image.h b/zenovis/stbi/include/stb_image.h
index c891d775bb..9eedabedc4 100644
--- a/zenovis/stbi/include/stb_image.h
+++ b/zenovis/stbi/include/stb_image.h
@@ -1,5 +1,5 @@
-/* stb_image - v2.12 - public domain image loader - http://nothings.org/stb_image.h
-                                     no warranty implied; use at your own risk
+/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb
+                                  no warranty implied; use at your own risk
 
    Do this:
       #define STB_IMAGE_IMPLEMENTATION
@@ -21,7 +21,7 @@
           avoid problematic images and only need the trivial interface
 
       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
-      PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
+      PNG 1/2/4/8/16-bit-per-channel
 
       TGA (not sure what subset, if a subset)
       BMP non-1bpp, non-RLE
@@ -42,136 +42,37 @@
    Full documentation under "DOCUMENTATION" below.
 
 
-   Revision 2.00 release notes:
-
-      - Progressive JPEG is now supported.
-
-      - PPM and PGM binary formats are now supported, thanks to Ken Miller.
-
-      - x86 platforms now make use of SSE2 SIMD instructions for
-        JPEG decoding, and ARM platforms can use NEON SIMD if requested.
-        This work was done by Fabian "ryg" Giesen. SSE2 is used by
-        default, but NEON must be enabled explicitly; see docs.
-
-        With other JPEG optimizations included in this version, we see
-        2x speedup on a JPEG on an x86 machine, and a 1.5x speedup
-        on a JPEG on an ARM machine, relative to previous versions of this
-        library. The same results will not obtain for all JPGs and for all
-        x86/ARM machines. (Note that progressive JPEGs are significantly
-        slower to decode than regular JPEGs.) This doesn't mean that this
-        is the fastest JPEG decoder in the land; rather, it brings it
-        closer to parity with standard libraries. If you want the fastest
-        decode, look elsewhere. (See "Philosophy" section of docs below.)
-
-        See final bullet items below for more info on SIMD.
-
-      - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing
-        the memory allocator. Unlike other STBI libraries, these macros don't
-        support a context parameter, so if you need to pass a context in to
-        the allocator, you'll have to store it in a global or a thread-local
-        variable.
-
-      - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and
-        STBI_NO_LINEAR.
-            STBI_NO_HDR:     suppress implementation of .hdr reader format
-            STBI_NO_LINEAR:  suppress high-dynamic-range light-linear float API
-
-      - You can suppress implementation of any of the decoders to reduce
-        your code footprint by #defining one or more of the following
-        symbols before creating the implementation.
-
-            STBI_NO_JPEG
-            STBI_NO_PNG
-            STBI_NO_BMP
-            STBI_NO_PSD
-            STBI_NO_TGA
-            STBI_NO_GIF
-            STBI_NO_HDR
-            STBI_NO_PIC
-            STBI_NO_PNM   (.ppm and .pgm)
-
-      - You can request *only* certain decoders and suppress all other ones
-        (this will be more forward-compatible, as addition of new decoders
-        doesn't require you to disable them explicitly):
-
-            STBI_ONLY_JPEG
-            STBI_ONLY_PNG
-            STBI_ONLY_BMP
-            STBI_ONLY_PSD
-            STBI_ONLY_TGA
-            STBI_ONLY_GIF
-            STBI_ONLY_HDR
-            STBI_ONLY_PIC
-            STBI_ONLY_PNM   (.ppm and .pgm)
-
-         Note that you can define multiples of these, and you will get all
-         of them ("only x" and "only y" is interpreted to mean "only x&y").
-
-       - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
-         want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
-
-      - Compilation of all SIMD code can be suppressed with
-            #define STBI_NO_SIMD
-        It should not be necessary to disable SIMD unless you have issues
-        compiling (e.g. using an x86 compiler which doesn't support SSE
-        intrinsics or that doesn't support the method used to detect
-        SSE2 support at run-time), and even those can be reported as
-        bugs so I can refine the built-in compile-time checking to be
-        smarter.
-
-      - The old STBI_SIMD system which allowed installing a user-defined
-        IDCT etc. has been removed. If you need this, don't upgrade. My
-        assumption is that almost nobody was doing this, and those who
-        were will find the built-in SIMD more satisfactory anyway.
-
-      - RGB values computed for JPEG images are slightly different from
-        previous versions of stb_image. (This is due to using less
-        integer precision in SIMD.) The C code has been adjusted so
-        that the same RGB values will be computed regardless of whether
-        SIMD support is available, so your app should always produce
-        consistent results. But these results are slightly different from
-        previous versions. (Specifically, about 3% of available YCbCr values
-        will compute different RGB results from pre-1.49 versions by +-1;
-        most of the deviating values are one smaller in the G channel.)
-
-      - If you must produce consistent results with previous versions of
-        stb_image, #define STBI_JPEG_OLD and you will get the same results
-        you used to; however, you will not get the SIMD speedups for
-        the YCbCr-to-RGB conversion step (although you should still see
-        significant JPEG speedup from the other changes).
-
-        Please note that STBI_JPEG_OLD is a temporary feature; it will be
-        removed in future versions of the library. It is only intended for
-        near-term back-compatibility use.
-
-
-   Latest revision history:
+LICENSE
+
+  See end of file for license information.
+
+RECENT REVISION HISTORY:
+
+      2.30  (2024-05-31) avoid erroneous gcc warning
+      2.29  (2023-05-xx) optimizations
+      2.28  (2023-01-29) many error fixes, security errors, just tons of stuff
+      2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
+      2.26  (2020-07-13) many minor fixes
+      2.25  (2020-02-02) fix warnings
+      2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
+      2.23  (2019-08-11) fix clang static analysis warning
+      2.22  (2019-03-04) gif fixes, fix warnings
+      2.21  (2019-02-25) fix typo in comment
+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+      2.19  (2018-02-11) fix warning
+      2.18  (2018-01-30) fix warnings
+      2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
+      2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
+      2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
                          RGB-format JPEG; remove white matting in PSD;
-                         allocate large structures on the stack; 
+                         allocate large structures on the stack;
                          correct channel count for PNG & BMP
       2.10  (2016-01-22) avoid warning introduced in 2.09
       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
-      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
-      2.07  (2015-09-13) partial animated GIF support
-                         limited 16-bit PSD support
-                         minor bugs, code cleanup, and compiler warnings
-      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
-      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
-      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
-      2.03  (2015-04-12) additional corruption checking
-                         stbi_set_flip_vertically_on_load
-                         fix NEON support; fix mingw support
-      2.02  (2015-01-19) fix incorrect assert, fix warning
-      2.01  (2015-01-17) fix various warnings
-      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
-      2.00  (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
-                         progressive JPEG
-                         PGM/PPM support
-                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
-                         STBI_NO_*, STBI_ONLY_*
-                         GIF bugfix
 
    See end of file for full revision history.
 
@@ -186,34 +87,43 @@
     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
-    urraka@github (animated gif)           Junggon Kim (PNM comments)
-                                           Daniel Gibson (16-bit TGA)
-
- Optimizations & bugfixes
-    Fabian "ryg" Giesen
-    Arseny Kapoulkine
+    github:urraka (animated gif)           Junggon Kim (PNM comments)
+    Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
+                                           socks-the-fox (16-bit PNG)
+                                           Jeremy Sawicki (handle all ImageNet JPGs)
+ Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
+    Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
+    Arseny Kapoulkine                      Simon Breuss (16-bit PNM)
+    John-Mark Allen
+    Carmelo J Fdez-Aguera
 
  Bug & warning fixes
-    Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
-    Christpher Lloyd        Martin Golini      Jerry Jansson      Joseph Thomson
-    Dave Moore              Roy Eltham         Hayaki Saito       Phil Jordan
-    Won Chun                Luke Graham        Johan Duparc       Nathan Reed
-    the Horde3D community   Thomas Ruf         Ronny Chevalier    Nick Verigakis
-    Janez Zemva             John Bartholomew   Michal Cichon      svdijk@github
-    Jonathan Blow           Ken Hamada         Tero Hanninen      Baldur Karlsson
-    Laurent Gomila          Cort Stratton      Sergio Gonzalez    romigrou@github
-    Aruelien Pocheville     Thibault Reuille   Cass Everitt       Matthew Gregan
-    Ryamond Barbiero        Paul Du Bois       Engin Manap        snagar@github
-    Michaelangel007@github  Oriol Ferrer Mesia socks-the-fox
-    Blazej Dariusz Roszkowski
-
-
-LICENSE
-
-This software is dual-licensed to the public domain and under the following
-license: you are granted a perpetual, irrevocable license to copy, modify,
-publish, and distribute this file as you see fit.
-
+    Marc LeBlanc            David Woo          Guillaume George     Martins Mozeiko
+    Christpher Lloyd        Jerry Jansson      Joseph Thomson       Blazej Dariusz Roszkowski
+    Phil Jordan                                Dave Moore           Roy Eltham
+    Hayaki Saito            Nathan Reed        Won Chun
+    Luke Graham             Johan Duparc       Nick Verigakis       the Horde3D community
+    Thomas Ruf              Ronny Chevalier                         github:rlyeh
+    Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou
+    Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk
+    Eugene Golushkov        Laurent Gomila     Cort Stratton        github:snagar
+    Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex
+    Cass Everitt            Ryamond Barbiero                        github:grim210
+    Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw
+    Philipp Wiesemann       Dale Weiler        Oriol Ferrer Mesia   github:phprus
+    Josh Tobin              Neil Bickford      Matthew Gregan       github:poppolopoppo
+    Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji
+    Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007
+                            Brad Weinberger    Matvey Cherevko      github:mosra
+    Luca Sas                Alexander Veselov  Zack Middleton       [reserved]
+    Ryan C. Gordon          [reserved]                              [reserved]
+                     DO NOT ADD YOUR NAME HERE
+
+                     Jacko Dirks
+
+  To add your name to the credits, pick a random blank space in the middle and fill it.
+  80% of merge conflicts on stb PRs are due to people adding their name at the end
+  of the credits.
 */
 
 #ifndef STBI_INCLUDE_STB_IMAGE_H
@@ -222,10 +132,8 @@ publish, and distribute this file as you see fit.
 // DOCUMENTATION
 //
 // Limitations:
-//    - no 16-bit-per-channel PNG
 //    - no 12-bit-per-channel JPEG
 //    - no JPEGs with arithmetic coding
-//    - no 1-bit BMP
 //    - GIF always returns *comp=4
 //
 // Basic usage (see HDR discussion below for HDR usage):
@@ -235,13 +143,13 @@ publish, and distribute this file as you see fit.
 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
 //    // ... replace '0' with '1'..'4' to force that many components per pixel
 //    // ... but 'n' will always be the number that it would have been if you said 0
-//    stbi_image_free(data)
+//    stbi_image_free(data);
 //
 // Standard parameters:
-//    int *x       -- outputs image width in pixels
-//    int *y       -- outputs image height in pixels
-//    int *comp    -- outputs # of image components in image file
-//    int req_comp -- if non-zero, # of image components requested in result
+//    int *x                 -- outputs image width in pixels
+//    int *y                 -- outputs image height in pixels
+//    int *channels_in_file  -- outputs # of image components in image file
+//    int desired_channels   -- if non-zero, # of image components requested in result
 //
 // The return value from an image loader is an 'unsigned char *' which points
 // to the pixel data, or NULL on an allocation failure or if the image is
@@ -249,11 +157,12 @@ publish, and distribute this file as you see fit.
 // with each pixel consisting of N interleaved 8-bit components; the first
 // pixel pointed to is top-left-most in the image. There is no padding between
 // image scanlines or between pixels, regardless of format. The number of
-// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
-// If req_comp is non-zero, *comp has the number of components that _would_
-// have been output otherwise. E.g. if you set req_comp to 4, you will always
-// get RGBA output, but you can check *comp to see if it's trivially opaque
-// because e.g. there were only 3 channels in the source image.
+// components N is 'desired_channels' if desired_channels is non-zero, or
+// *channels_in_file otherwise. If desired_channels is non-zero,
+// *channels_in_file has the number of components that _would_ have been
+// output otherwise. E.g. if you set desired_channels to 4, you will always
+// get RGBA output, but you can check *channels_in_file to see if it's trivially
+// opaque because e.g. there were only 3 channels in the source image.
 //
 // An output image with N components has the following components interleaved
 // in this order in each pixel:
@@ -265,14 +174,50 @@ publish, and distribute this file as you see fit.
 //       4           red, green, blue, alpha
 //
 // If image loading fails for any reason, the return value will be NULL,
-// and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
-// can be queried for an extremely brief, end-user unfriendly explanation
-// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
-// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+// and *x, *y, *channels_in_file will be unchanged. The function
+// stbi_failure_reason() can be queried for an extremely brief, end-user
+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
 // more user-friendly ones.
 //
 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
 //
+// To query the width, height and component count of an image without having to
+// decode the full file, you can use the stbi_info family of functions:
+//
+//   int x,y,n,ok;
+//   ok = stbi_info(filename, &x, &y, &n);
+//   // returns ok=1 and sets x, y, n if image is a supported format,
+//   // 0 otherwise.
+//
+// Note that stb_image pervasively uses ints in its public API for sizes,
+// including sizes of memory buffers. This is now part of the API and thus
+// hard to change without causing breakage. As a result, the various image
+// loaders all have certain limits on image size; these differ somewhat
+// by format but generally boil down to either just under 2GB or just under
+// 1GB. When the decoded image would be larger than this, stb_image decoding
+// will fail.
+//
+// Additionally, stb_image will reject image files that have any of their
+// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
+// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
+// the only way to have an image with such dimensions load correctly
+// is for it to have a rather extreme aspect ratio. Either way, the
+// assumption here is that such larger images are likely to be malformed
+// or malicious. If you do need to load an image with individual dimensions
+// larger than that, and it still fits in the overall size limit, you can
+// #define STBI_MAX_DIMENSIONS on your own to be something larger.
+//
+// ===========================================================================
+//
+// UNICODE:
+//
+//   If compiling for Windows and you wish to use Unicode filenames, compile
+//   with
+//       #define STBI_WINDOWS_UTF8
+//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
+//   Windows wchar_t filenames to utf8.
+//
 // ===========================================================================
 //
 // Philosophy
@@ -285,15 +230,15 @@ publish, and distribute this file as you see fit.
 //
 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
 // and for best performance I may provide less-easy-to-use APIs that give higher
-// performance, in addition to the easy to use ones. Nevertheless, it's important
+// performance, in addition to the easy-to-use ones. Nevertheless, it's important
 // to keep in mind that from the standpoint of you, a client of this library,
-// all you care about is #1 and #3, and stb libraries do not emphasize #3 above all.
+// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
 //
 // Some secondary priorities arise directly from the first two, some of which
-// make more explicit reasons why performance can't be emphasized.
+// provide more explicit reasons why performance can't be emphasized.
 //
 //    - Portable ("ease of use")
-//    - Small footprint ("easy to maintain")
+//    - Small source code footprint ("easy to maintain")
 //    - No dependencies ("ease of use")
 //
 // ===========================================================================
@@ -325,13 +270,6 @@ publish, and distribute this file as you see fit.
 // (at least this is true for iOS and Android). Therefore, the NEON support is
 // toggled by a build flag: define STBI_NEON to get NEON loops.
 //
-// The output of the JPEG decoder is slightly different from versions where
-// SIMD support was introduced (that is, for versions before 1.49). The
-// difference is only +-1 in the 8-bit RGB channels, and only on a small
-// fraction of pixels. You can force the pre-1.49 behavior by defining
-// STBI_JPEG_OLD, but this will disable some of the SIMD decoding path
-// and hence cost some performance.
-//
 // If for some reason you do not want to use any of SIMD code, or if
 // you have issues compiling it, you can disable it entirely by
 // defining STBI_NO_SIMD.
@@ -340,11 +278,10 @@ publish, and distribute this file as you see fit.
 //
 // HDR image support   (disable by defining STBI_NO_HDR)
 //
-// stb_image now supports loading HDR images in general, and currently
-// the Radiance .HDR file format, although the support is provided
-// generically. You can still load any file through the existing interface;
-// if you attempt to load an HDR file, it will be automatically remapped to
-// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+// stb_image supports loading HDR images in general, and currently the Radiance
+// .HDR file format specifically. You can still load any file through the existing
+// interface; if you attempt to load an HDR file, it will be automatically remapped
+// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
 // both of these constants can be reconfigured through this interface:
 //
 //     stbi_hdr_to_ldr_gamma(2.2f);
@@ -376,18 +313,59 @@ publish, and distribute this file as you see fit.
 //
 // iPhone PNG support:
 //
-// By default we convert iphone-formatted PNGs back to RGB, even though
-// they are internally encoded differently. You can disable this conversion
-// by by calling stbi_convert_iphone_png_to_rgb(0), in which case
-// you will always just get the native iphone "format" through (which
-// is BGR stored in RGB).
+// We optionally support converting iPhone-formatted PNGs (which store
+// premultiplied BGRA) back to RGB, even though they're internally encoded
+// differently. To enable this conversion, call
+// stbi_convert_iphone_png_to_rgb(1).
 //
 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
 // pixel to remove any premultiplied alpha *only* if the image file explicitly
 // says there's premultiplied data (currently only happens in iPhone images,
 // and only if iPhone convert-to-rgb processing is on).
 //
-
+// ===========================================================================
+//
+// ADDITIONAL CONFIGURATION
+//
+//  - You can suppress implementation of any of the decoders to reduce
+//    your code footprint by #defining one or more of the following
+//    symbols before creating the implementation.
+//
+//        STBI_NO_JPEG
+//        STBI_NO_PNG
+//        STBI_NO_BMP
+//        STBI_NO_PSD
+//        STBI_NO_TGA
+//        STBI_NO_GIF
+//        STBI_NO_HDR
+//        STBI_NO_PIC
+//        STBI_NO_PNM   (.ppm and .pgm)
+//
+//  - You can request *only* certain decoders and suppress all other ones
+//    (this will be more forward-compatible, as addition of new decoders
+//    doesn't require you to disable them explicitly):
+//
+//        STBI_ONLY_JPEG
+//        STBI_ONLY_PNG
+//        STBI_ONLY_BMP
+//        STBI_ONLY_PSD
+//        STBI_ONLY_TGA
+//        STBI_ONLY_GIF
+//        STBI_ONLY_HDR
+//        STBI_ONLY_PIC
+//        STBI_ONLY_PNM   (.ppm and .pgm)
+//
+//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+//
+//  - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
+//    than that size (in either width or height) without further processing.
+//    This is to let programs in the wild set an upper bound to prevent
+//    denial-of-service attacks on untrusted data, as one could generate a
+//    valid image of gigantic dimensions and force stb_image to allocate a
+//    huge block of memory and spend disproportionate time decoding it. By
+//    default this is set to (1 << 24), which is 16777216, but that's still
+//    very big.
 
 #ifndef STBI_NO_STDIO
 #include <stdio.h>
@@ -397,7 +375,7 @@ publish, and distribute this file as you see fit.
 
 enum
 {
-   STBI_default = 0, // only used for req_comp
+   STBI_default = 0, // only used for desired_channels
 
    STBI_grey       = 1,
    STBI_grey_alpha = 2,
@@ -405,17 +383,21 @@ enum
    STBI_rgb_alpha  = 4
 };
 
+#include <stdlib.h>
 typedef unsigned char stbi_uc;
+typedef unsigned short stbi_us;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#ifndef STBIDEF
 #ifdef STB_IMAGE_STATIC
 #define STBIDEF static
 #else
 #define STBIDEF extern
 #endif
+#endif
 
 //////////////////////////////////////////////////////////////////////////////
 //
@@ -433,22 +415,52 @@ typedef struct
    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
 } stbi_io_callbacks;
 
-STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *comp, int req_comp);
+////////////////////////////////////
+//
+// 8-bits-per-channel interface
+//
+
+STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 
 #ifndef STBI_NO_STDIO
-STBIDEF stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 // for stbi_load_from_file, file pointer is left pointing immediately after image
 #endif
 
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
+#endif
+
+#ifdef STBI_WINDOWS_UTF8
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
+#endif
+
+////////////////////////////////////
+//
+// 16-bits-per-channel interface
+//
+
+STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+#endif
+
+////////////////////////////////////
+//
+// float-per-channel interface
+//
 #ifndef STBI_NO_LINEAR
-   STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *comp, int req_comp);
-   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
-   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
+   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
 
    #ifndef STBI_NO_STDIO
-   STBIDEF float *stbi_loadf_from_file  (FILE *f,                int *x, int *y, int *comp, int req_comp);
+   STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+   STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
    #endif
 #endif
 
@@ -472,7 +484,7 @@ STBIDEF int      stbi_is_hdr_from_file(FILE *f);
 
 
 // get a VERY brief reason for failure
-// NOT THREADSAFE
+// on most compilers (and ALL modern mainstream compilers) this is threadsafe
 STBIDEF const char *stbi_failure_reason  (void);
 
 // free the loaded image -- this is just free()
@@ -481,11 +493,14 @@ STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
 // get image dimensions & components without fully decoding
 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
+STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
+STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
 
 #ifndef STBI_NO_STDIO
-STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
-STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
-
+STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
+STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
+STBIDEF int      stbi_is_16_bit          (char const *filename);
+STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
 #endif
 
 
@@ -502,6 +517,13 @@ STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
 // flip the image vertically, so the first pixel in the output array is the bottom left
 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
 
+// as above, but only applies to images loaded on the thread that calls the function
+// this function is only available if your compiler supports thread-local variables;
+// calling it will fail to link if your compiler doesn't
+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
+
 // ZLIB client - used by PNG, available for other purposes
 
 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
@@ -566,9 +588,10 @@ STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
 #include <stddef.h> // ptrdiff_t on osx
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 
 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
-#include <math.h>  // ldexp
+#include <math.h>  // ldexp, pow
 #endif
 
 #ifndef STBI_NO_STDIO
@@ -580,6 +603,12 @@ STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
 #define STBI_ASSERT(x) assert(x)
 #endif
 
+#ifdef __cplusplus
+#define STBI_EXTERN extern "C"
+#else
+#define STBI_EXTERN extern
+#endif
+
 
 #ifndef _MSC_VER
    #ifdef __cplusplus
@@ -591,8 +620,25 @@ STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
    #define stbi_inline __forceinline
 #endif
 
+#ifndef STBI_NO_THREAD_LOCALS
+   #if defined(__cplusplus) &&  __cplusplus >= 201103L
+      #define STBI_THREAD_LOCAL       thread_local
+   #elif defined(__GNUC__) && __GNUC__ < 5
+      #define STBI_THREAD_LOCAL       __thread
+   #elif defined(_MSC_VER)
+      #define STBI_THREAD_LOCAL       __declspec(thread)
+   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
+      #define STBI_THREAD_LOCAL       _Thread_local
+   #endif
 
-#ifdef _MSC_VER
+   #ifndef STBI_THREAD_LOCAL
+      #if defined(__GNUC__)
+        #define STBI_THREAD_LOCAL       __thread
+      #endif
+   #endif
+#endif
+
+#if defined(_MSC_VER) || defined(__SYMBIAN32__)
 typedef unsigned short stbi__uint16;
 typedef   signed short stbi__int16;
 typedef unsigned int   stbi__uint32;
@@ -621,7 +667,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
 #ifdef STBI_HAS_LROTL
    #define stbi_lrot(x,y)  _lrotl(x,y)
 #else
-   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
+   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (-(y) & 31)))
 #endif
 
 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
@@ -649,12 +695,14 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
 #define STBI__X86_TARGET
 #endif
 
-#if defined(__GNUC__) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
-// NOTE: not clear do we actually need this for the 64-bit path?
+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
-// (but compiling with -msse2 allows the compiler to use SSE2 everywhere;
-// this is just broken and gcc are jerks for not fixing it properly
-// http://www.virtualdub.org/blog/pivot/entry.php?id=363 )
+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
+// but previous attempts to provide the SSE2 functions with runtime
+// detection caused numerous issues. The way architecture extensions are
+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
+// New behavior: if compiled with -msse2, we use SSE2 without any
+// detection; if not, we don't use it at all.
 #define STBI_NO_SIMD
 #endif
 
@@ -702,25 +750,27 @@ static int stbi__cpuid3(void)
 
 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
 
-static int stbi__sse2_available()
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
 {
    int info3 = stbi__cpuid3();
    return ((info3 >> 26) & 1) != 0;
 }
+#endif
+
 #else // assume GCC-style if not VC++
 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 
-static int stbi__sse2_available()
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
 {
-#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later
-   // GCC 4.8+ has a nice way to do this
-   return __builtin_cpu_supports("sse2");
-#else
-   // portable way to do this, preferably without using GCC inline ASM?
-   // just bail for now.
-   return 0;
-#endif
+   // If we're even attempting to compile this on GCC/Clang, that means
+   // -msse2 is on, which means the compiler is allowed to use SSE2
+   // instructions at will, and so are we.
+   return 1;
 }
+#endif
+
 #endif
 #endif
 
@@ -731,14 +781,21 @@ static int stbi__sse2_available()
 
 #ifdef STBI_NEON
 #include <arm_neon.h>
-// assume GCC or Clang on ARM targets
+#ifdef _MSC_VER
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+#else
 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 #endif
+#endif
 
 #ifndef STBI_SIMD_ALIGN
 #define STBI_SIMD_ALIGN(type, name) type name
 #endif
 
+#ifndef STBI_MAX_DIMENSIONS
+#define STBI_MAX_DIMENSIONS (1 << 24)
+#endif
+
 ///////////////////////////////////////////////
 //
 //  stbi__context struct and start_xxx functions
@@ -756,6 +813,7 @@ typedef struct
    int read_from_callbacks;
    int buflen;
    stbi_uc buffer_start[128];
+   int callback_already_read;
 
    stbi_uc *img_buffer, *img_buffer_end;
    stbi_uc *img_buffer_original, *img_buffer_original_end;
@@ -769,6 +827,7 @@ static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
 {
    s->io.read = NULL;
    s->read_from_callbacks = 0;
+   s->callback_already_read = 0;
    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
 }
@@ -780,7 +839,8 @@ static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *
    s->io_user_data = user;
    s->buflen = sizeof(s->buffer_start);
    s->read_from_callbacks = 1;
-   s->img_buffer_original = s->buffer_start;
+   s->callback_already_read = 0;
+   s->img_buffer = s->img_buffer_original = s->buffer_start;
    stbi__refill_buffer(s);
    s->img_buffer_original_end = s->img_buffer_end;
 }
@@ -794,12 +854,17 @@ static int stbi__stdio_read(void *user, char *data, int size)
 
 static void stbi__stdio_skip(void *user, int n)
 {
+   int ch;
    fseek((FILE*) user, n, SEEK_CUR);
+   ch = fgetc((FILE*) user);  /* have to read a byte to reset feof()'s flag */
+   if (ch != EOF) {
+      ungetc(ch, (FILE *) user);  /* push byte back onto stream if valid. */
+   }
 }
 
 static int stbi__stdio_eof(void *user)
 {
-   return feof((FILE*) user);
+   return feof((FILE*) user) || ferror((FILE *) user);
 }
 
 static stbi_io_callbacks stbi__stdio_callbacks =
@@ -827,79 +892,197 @@ static void stbi__rewind(stbi__context *s)
    s->img_buffer_end = s->img_buffer_original_end;
 }
 
+enum
+{
+   STBI_ORDER_RGB,
+   STBI_ORDER_BGR
+};
+
+typedef struct
+{
+   int bits_per_channel;
+   int num_channels;
+   int channel_order;
+} stbi__result_info;
+
 #ifndef STBI_NO_JPEG
 static int      stbi__jpeg_test(stbi__context *s);
-static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PNG
 static int      stbi__png_test(stbi__context *s);
-static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__png_is16(stbi__context *s);
 #endif
 
 #ifndef STBI_NO_BMP
 static int      stbi__bmp_test(stbi__context *s);
-static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_TGA
 static int      stbi__tga_test(stbi__context *s);
-static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PSD
 static int      stbi__psd_test(stbi__context *s);
-static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__psd_is16(stbi__context *s);
 #endif
 
 #ifndef STBI_NO_HDR
 static int      stbi__hdr_test(stbi__context *s);
-static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PIC
 static int      stbi__pic_test(stbi__context *s);
-static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_GIF
 static int      stbi__gif_test(stbi__context *s);
-static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PNM
 static int      stbi__pnm_test(stbi__context *s);
-static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__pnm_is16(stbi__context *s);
 #endif
 
-// this is not threadsafe
-static const char *stbi__g_failure_reason;
+static
+#ifdef STBI_THREAD_LOCAL
+STBI_THREAD_LOCAL
+#endif
+const char *stbi__g_failure_reason;
 
 STBIDEF const char *stbi_failure_reason(void)
 {
    return stbi__g_failure_reason;
 }
 
+#ifndef STBI_NO_FAILURE_STRINGS
 static int stbi__err(const char *str)
 {
    stbi__g_failure_reason = str;
    return 0;
 }
+#endif
 
 static void *stbi__malloc(size_t size)
 {
     return STBI_MALLOC(size);
 }
 
+// stb_image uses ints pervasively, including for offset calculations.
+// therefore the largest decoded image size we can support with the
+// current code, even on 64-bit targets, is INT_MAX. this is not a
+// significant limitation for the intended use case.
+//
+// we do, however, need to make sure our size calculations don't
+// overflow. hence a few helper functions for size calculations that
+// multiply integers together, making sure that they're non-negative
+// and no overflow occurs.
+
+// return 1 if the sum is valid, 0 on overflow.
+// negative terms are considered invalid.
+static int stbi__addsizes_valid(int a, int b)
+{
+   if (b < 0) return 0;
+   // now 0 <= b <= INT_MAX, hence also
+   // 0 <= INT_MAX - b <= INTMAX.
+   // And "a + b <= INT_MAX" (which might overflow) is the
+   // same as a <= INT_MAX - b (no overflow)
+   return a <= INT_MAX - b;
+}
+
+// returns 1 if the product is valid, 0 on overflow.
+// negative factors are considered invalid.
+static int stbi__mul2sizes_valid(int a, int b)
+{
+   if (a < 0 || b < 0) return 0;
+   if (b == 0) return 1; // mul-by-0 is always safe
+   // portable way to check for no overflows in a*b
+   return a <= INT_MAX/b;
+}
+
+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad2sizes_valid(int a, int b, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
+}
+#endif
+
+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__addsizes_valid(a*b*c, add);
+}
+
+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
+}
+#endif
+
+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+// mallocs with size overflow checking
+static void *stbi__malloc_mad2(int a, int b, int add)
+{
+   if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
+   return stbi__malloc(a*b + add);
+}
+#endif
+
+static void *stbi__malloc_mad3(int a, int b, int c, int add)
+{
+   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
+   return stbi__malloc(a*b*c + add);
+}
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
+{
+   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
+   return stbi__malloc(a*b*c*d + add);
+}
+#endif
+
+// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
+static int stbi__addints_valid(int a, int b)
+{
+   if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
+   if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
+   return a <= INT_MAX - b;
+}
+
+// returns 1 if the product of two ints fits in a signed short, 0 on overflow.
+static int stbi__mul2shorts_valid(int a, int b)
+{
+   if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
+   if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
+   if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
+   return a >= SHRT_MIN / b;
+}
+
 // stbi__err - error
 // stbi__errpf - error returning pointer to float
 // stbi__errpuc - error returning pointer to unsigned char
@@ -928,40 +1111,69 @@ static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
 #endif
 
-static int stbi__vertically_flip_on_load = 0;
+static int stbi__vertically_flip_on_load_global = 0;
 
 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
 {
-    stbi__vertically_flip_on_load = flag_true_if_should_flip;
+   stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
 }
 
-static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+#ifndef STBI_THREAD_LOCAL
+#define stbi__vertically_flip_on_load  stbi__vertically_flip_on_load_global
+#else
+static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
+
+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
 {
-   #ifndef STBI_NO_JPEG
-   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp);
-   #endif
+   stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
+   stbi__vertically_flip_on_load_set = 1;
+}
+
+#define stbi__vertically_flip_on_load  (stbi__vertically_flip_on_load_set       \
+                                         ? stbi__vertically_flip_on_load_local  \
+                                         : stbi__vertically_flip_on_load_global)
+#endif // STBI_THREAD_LOCAL
+
+static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
+{
+   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+   ri->num_channels = 0;
+
+   // test the formats with a very explicit header first (at least a FOURCC
+   // or distinctive magic number first)
    #ifndef STBI_NO_PNG
-   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp);
+   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_BMP
-   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp);
+   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_GIF
-   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp);
+   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PSD
-   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp);
+   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
+   #else
+   STBI_NOTUSED(bpc);
    #endif
    #ifndef STBI_NO_PIC
-   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp);
+   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
+   #endif
+
+   // then the formats that can end up attempting to load with just 1 or 2
+   // bytes matching expectations; these are prone to false positives, so
+   // try them later
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PNM
-   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp);
+   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
    #endif
 
    #ifndef STBI_NO_HDR
    if (stbi__hdr_test(s)) {
-      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp);
+      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
    }
    #endif
@@ -969,66 +1181,179 @@ static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *com
    #ifndef STBI_NO_TGA
    // test tga last because it's a crappy test!
    if (stbi__tga_test(s))
-      return stbi__tga_load(s,x,y,comp,req_comp);
+      return stbi__tga_load(s,x,y,comp,req_comp, ri);
    #endif
 
    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
 }
 
-static unsigned char *stbi__load_flip(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
 {
-   unsigned char *result = stbi__load_main(s, x, y, comp, req_comp);
+   int i;
+   int img_len = w * h * channels;
+   stbi_uc *reduced;
 
-   if (stbi__vertically_flip_on_load && result != NULL) {
-      int w = *x, h = *y;
-      int depth = req_comp ? req_comp : *comp;
-      int row,col,z;
-      stbi_uc temp;
-
-      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
-      for (row = 0; row < (h>>1); row++) {
-         for (col = 0; col < w; col++) {
-            for (z = 0; z < depth; z++) {
-               temp = result[(row * w + col) * depth + z];
-               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
-               result[((h - row - 1) * w + col) * depth + z] = temp;
-            }
-         }
+   reduced = (stbi_uc *) stbi__malloc(img_len);
+   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
+   STBI_FREE(orig);
+   return reduced;
+}
+
+static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
+{
+   int i;
+   int img_len = w * h * channels;
+   stbi__uint16 *enlarged;
+
+   enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
+   if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
+   STBI_FREE(orig);
+   return enlarged;
+}
+
+static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
+{
+   int row;
+   size_t bytes_per_row = (size_t)w * bytes_per_pixel;
+   stbi_uc temp[2048];
+   stbi_uc *bytes = (stbi_uc *)image;
+
+   for (row = 0; row < (h>>1); row++) {
+      stbi_uc *row0 = bytes + row*bytes_per_row;
+      stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
+      // swap row0 with row1
+      size_t bytes_left = bytes_per_row;
+      while (bytes_left) {
+         size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
+         memcpy(temp, row0, bytes_copy);
+         memcpy(row0, row1, bytes_copy);
+         memcpy(row1, temp, bytes_copy);
+         row0 += bytes_copy;
+         row1 += bytes_copy;
+         bytes_left -= bytes_copy;
       }
    }
+}
+
+#ifndef STBI_NO_GIF
+static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
+{
+   int slice;
+   int slice_size = w * h * bytes_per_pixel;
+
+   stbi_uc *bytes = (stbi_uc *)image;
+   for (slice = 0; slice < z; ++slice) {
+      stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
+      bytes += slice_size;
+   }
+}
+#endif
 
-   return result;
+static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
+
+   if (result == NULL)
+      return NULL;
+
+   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
+   if (ri.bits_per_channel != 8) {
+      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 8;
+   }
+
+   // @TODO: move stbi__convert_format to here
+
+   if (stbi__vertically_flip_on_load) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
+   }
+
+   return (unsigned char *) result;
 }
 
-#ifndef STBI_NO_HDR
+static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
+
+   if (result == NULL)
+      return NULL;
+
+   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
+   if (ri.bits_per_channel != 16) {
+      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 16;
+   }
+
+   // @TODO: move stbi__convert_format16 to here
+   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
+
+   if (stbi__vertically_flip_on_load) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
+   }
+
+   return (stbi__uint16 *) result;
+}
+
+#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
 {
    if (stbi__vertically_flip_on_load && result != NULL) {
-      int w = *x, h = *y;
-      int depth = req_comp ? req_comp : *comp;
-      int row,col,z;
-      float temp;
-
-      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
-      for (row = 0; row < (h>>1); row++) {
-         for (col = 0; col < w; col++) {
-            for (z = 0; z < depth; z++) {
-               temp = result[(row * w + col) * depth + z];
-               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
-               result[((h - row - 1) * w + col) * depth + z] = temp;
-            }
-         }
-      }
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
    }
 }
 #endif
 
 #ifndef STBI_NO_STDIO
 
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
+#endif
+
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
+{
+	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
+}
+#endif
+
 static FILE *stbi__fopen(char const *filename, char const *mode)
 {
    FILE *f;
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+   wchar_t wMode[64];
+   wchar_t wFilename[1024];
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
+      return 0;
+
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
+      return 0;
+
 #if defined(_MSC_VER) && _MSC_VER >= 1400
+	if (0 != _wfopen_s(&f, wFilename, wMode))
+		f = 0;
+#else
+   f = _wfopen(wFilename, wMode);
+#endif
+
+#elif defined(_MSC_VER) && _MSC_VER >= 1400
    if (0 != fopen_s(&f, filename, mode))
       f=0;
 #else
@@ -1053,42 +1378,98 @@ STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req
    unsigned char *result;
    stbi__context s;
    stbi__start_file(&s,f);
-   result = stbi__load_flip(&s,x,y,comp,req_comp);
+   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
    if (result) {
       // need to 'unget' all the characters in the IO buffer
       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
    }
    return result;
 }
+
+STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__uint16 *result;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
+   if (result) {
+      // need to 'unget' all the characters in the IO buffer
+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+   }
+   return result;
+}
+
+STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   stbi__uint16 *result;
+   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
+   result = stbi_load_from_file_16(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+
 #endif //!STBI_NO_STDIO
 
+STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
 {
    stbi__context s;
    stbi__start_mem(&s,buffer,len);
-   return stbi__load_flip(&s,x,y,comp,req_comp);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
 }
 
 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
 {
    stbi__context s;
    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__load_flip(&s,x,y,comp,req_comp);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
 }
 
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+   unsigned char *result;
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+
+   result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
+   if (stbi__vertically_flip_on_load) {
+      stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
+   }
+
+   return result;
+}
+#endif
+
 #ifndef STBI_NO_LINEAR
 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
 {
    unsigned char *data;
    #ifndef STBI_NO_HDR
    if (stbi__hdr_test(s)) {
-      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp);
+      stbi__result_info ri;
+      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
       if (hdr_data)
          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
       return hdr_data;
    }
    #endif
-   data = stbi__load_flip(s, x, y, comp, req_comp);
+   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
    if (data)
       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
@@ -1158,12 +1539,16 @@ STBIDEF int      stbi_is_hdr          (char const *filename)
    return result;
 }
 
-STBIDEF int      stbi_is_hdr_from_file(FILE *f)
+STBIDEF int stbi_is_hdr_from_file(FILE *f)
 {
    #ifndef STBI_NO_HDR
+   long pos = ftell(f);
+   int res;
    stbi__context s;
    stbi__start_file(&s,f);
-   return stbi__hdr_test(&s);
+   res = stbi__hdr_test(&s);
+   fseek(f, pos, SEEK_SET);
+   return res;
    #else
    STBI_NOTUSED(f);
    return 0;
@@ -1212,6 +1597,7 @@ enum
 static void stbi__refill_buffer(stbi__context *s)
 {
    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
+   s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
    if (n == 0) {
       // at end of file, treat same as if from memory, but need to handle case
       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
@@ -1236,6 +1622,9 @@ stbi_inline static stbi_uc stbi__get8(stbi__context *s)
    return 0;
 }
 
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
 stbi_inline static int stbi__at_eof(stbi__context *s)
 {
    if (s->io.read) {
@@ -1247,9 +1636,14 @@ stbi_inline static int stbi__at_eof(stbi__context *s)
 
    return s->img_buffer >= s->img_buffer_end;
 }
+#endif
 
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
+// nothing
+#else
 static void stbi__skip(stbi__context *s, int n)
 {
+   if (n == 0) return;  // already there!
    if (n < 0) {
       s->img_buffer = s->img_buffer_end;
       return;
@@ -1264,7 +1658,11 @@ static void stbi__skip(stbi__context *s, int n)
    }
    s->img_buffer += n;
 }
+#endif
 
+#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
+// nothing
+#else
 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
 {
    if (s->io.read) {
@@ -1288,18 +1686,27 @@ static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
    } else
       return 0;
 }
+#endif
 
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+// nothing
+#else
 static int stbi__get16be(stbi__context *s)
 {
    int z = stbi__get8(s);
    return (z << 8) + stbi__get8(s);
 }
+#endif
 
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+// nothing
+#else
 static stbi__uint32 stbi__get32be(stbi__context *s)
 {
    stbi__uint32 z = stbi__get16be(s);
    return (z << 16) + stbi__get16be(s);
 }
+#endif
 
 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
 // nothing
@@ -1315,13 +1722,16 @@ static int stbi__get16le(stbi__context *s)
 static stbi__uint32 stbi__get32le(stbi__context *s)
 {
    stbi__uint32 z = stbi__get16le(s);
-   return z + (stbi__get16le(s) << 16);
+   z += (stbi__uint32)stbi__get16le(s) << 16;
+   return z;
 }
 #endif
 
 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
 
-
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
 //////////////////////////////////////////////////////////////////////////////
 //
 //  generic converter from built-in img_n to req_comp
@@ -1337,7 +1747,11 @@ static stbi_uc stbi__compute_y(int r, int g, int b)
 {
    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
 }
+#endif
 
+#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
 {
    int i,j;
@@ -1346,7 +1760,7 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
    if (req_comp == img_n) return data;
    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
 
-   good = (unsigned char *) stbi__malloc(req_comp * x * y);
+   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
    if (good == NULL) {
       STBI_FREE(data);
       return stbi__errpuc("outofmem", "Out of memory");
@@ -1356,37 +1770,97 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
       unsigned char *src  = data + j * x * img_n   ;
       unsigned char *dest = good + j * x * req_comp;
 
-      #define COMBO(a,b)  ((a)*8+(b))
-      #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
       // convert source image with img_n components to one with req_comp components;
       // avoid switch per pixel, so use switch per scanline and massive macros
-      switch (COMBO(img_n, req_comp)) {
-         CASE(1,2) dest[0]=src[0], dest[1]=255; break;
-         CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
-         CASE(2,1) dest[0]=src[0]; break;
-         CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
-         CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
-         CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
-         CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
-         CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
-         default: STBI_ASSERT(0);
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break;
+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break;
+         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
       }
-      #undef CASE
+      #undef STBI__CASE
    }
 
    STBI_FREE(data);
    return good;
 }
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+// nothing
+#else
+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
+{
+   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+// nothing
+#else
+static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+   int i,j;
+   stbi__uint16 *good;
+
+   if (req_comp == img_n) return data;
+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
+   if (good == NULL) {
+      STBI_FREE(data);
+      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+   }
+
+   for (j=0; j < (int) y; ++j) {
+      stbi__uint16 *src  = data + j * x * img_n   ;
+      stbi__uint16 *dest = good + j * x * req_comp;
+
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      // convert source image with img_n components to one with req_comp components;
+      // avoid switch per pixel, so use switch per scanline and massive macros
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break;
+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break;
+         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
+      }
+      #undef STBI__CASE
+   }
+
+   STBI_FREE(data);
+   return good;
+}
+#endif
 
 #ifndef STBI_NO_LINEAR
 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
 {
    int i,k,n;
-   float *output = (float *) stbi__malloc(x * y * comp * sizeof(float));
+   float *output;
+   if (!data) return NULL;
+   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
    // compute number of non-alpha components
    if (comp & 1) n = comp; else n = comp-1;
@@ -1394,7 +1868,11 @@ static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
       for (k=0; k < n; ++k) {
          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
       }
-      if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
+   }
+   if (n < comp) {
+      for (i=0; i < x*y; ++i) {
+         output[i*comp + n] = data[i*comp + n]/255.0f;
+      }
    }
    STBI_FREE(data);
    return output;
@@ -1406,7 +1884,9 @@ static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
 {
    int i,k,n;
-   stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp);
+   stbi_uc *output;
+   if (!data) return NULL;
+   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
    // compute number of non-alpha components
    if (comp & 1) n = comp; else n = comp-1;
@@ -1471,7 +1951,7 @@ typedef struct
    stbi__context *s;
    stbi__huffman huff_dc[4];
    stbi__huffman huff_ac[4];
-   stbi_uc dequant[4][64];
+   stbi__uint16 dequant[4][64];
    stbi__int16 fast_ac[4][1 << FAST_BITS];
 
 // sizes for components, interleaved MCUs
@@ -1507,6 +1987,8 @@ typedef struct
    int            succ_high;
    int            succ_low;
    int            eob_run;
+   int            jfif;
+   int            app14_color_transform; // Adobe APP14 tag
    int            rgb;
 
    int scan_n, order[4];
@@ -1520,11 +2002,15 @@ typedef struct
 
 static int stbi__build_huffman(stbi__huffman *h, int *count)
 {
-   int i,j,k=0,code;
+   int i,j,k=0;
+   unsigned int code;
    // build size list for each symbol (from JPEG spec)
-   for (i=0; i < 16; ++i)
-      for (j=0; j < count[i]; ++j)
+   for (i=0; i < 16; ++i) {
+      for (j=0; j < count[i]; ++j) {
          h->size[k++] = (stbi_uc) (i+1);
+         if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
+      }
+   }
    h->size[k] = 0;
 
    // compute actual symbols (from jpeg spec)
@@ -1536,7 +2022,7 @@ static int stbi__build_huffman(stbi__huffman *h, int *count)
       if (h->size[k] == j) {
          while (h->size[k] == j)
             h->code[k++] = (stbi__uint16) (code++);
-         if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
+         if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
       }
       // compute largest code + 1 for this size, preshifted as needed later
       h->maxcode[j] = code << (16-j);
@@ -1577,10 +2063,10 @@ static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
             // magnitude code followed by receive_extend code
             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
             int m = 1 << (magbits - 1);
-            if (k < m) k += (-1 << magbits) + 1;
+            if (k < m) k += (~0U << magbits) + 1;
             // if the result is small enough, we can fit it in fast_ac table
             if (k >= -128 && k <= 127)
-               fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
+               fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
          }
       }
    }
@@ -1589,9 +2075,10 @@ static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
 {
    do {
-      int b = j->nomore ? 0 : stbi__get8(j->s);
+      unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
       if (b == 0xff) {
          int c = stbi__get8(j->s);
+         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
          if (c != 0) {
             j->marker = (unsigned char) c;
             j->nomore = 1;
@@ -1604,7 +2091,7 @@ static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
 }
 
 // (1 << n) - 1
-static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
+static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
 
 // decode a jpeg huffman value from the bitstream
 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
@@ -1648,6 +2135,8 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
 
    // convert the huffman code to the symbol id
    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
+   if(c < 0 || c >= 256) // symbol id out of bounds!
+       return -1;
    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
 
    // convert the id to a symbol
@@ -1657,7 +2146,7 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
 }
 
 // bias[n] = (-1<<n) + 1
-static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
+static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
 
 // combined JPEG 'receive' and JPEG 'extend', since baseline
 // always extends everything it receives.
@@ -1666,14 +2155,14 @@ stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
    unsigned int k;
    int sgn;
    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
 
-   sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
+   sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
    k = stbi_lrot(j->code_buffer, n);
-   STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
    j->code_buffer = k & ~stbi__bmask[n];
    k &= stbi__bmask[n];
    j->code_bits -= n;
-   return k + (stbi__jbias[n] & ~sgn);
+   return k + (stbi__jbias[n] & (sgn - 1));
 }
 
 // get some unsigned bits
@@ -1681,6 +2170,7 @@ stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
 {
    unsigned int k;
    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
    k = stbi_lrot(j->code_buffer, n);
    j->code_buffer = k & ~stbi__bmask[n];
    k &= stbi__bmask[n];
@@ -1692,6 +2182,7 @@ stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
 {
    unsigned int k;
    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
+   if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
    k = j->code_buffer;
    j->code_buffer <<= 1;
    --j->code_bits;
@@ -1700,7 +2191,7 @@ stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
 
 // given a value that's at position X in the zigzag stream,
 // where does it appear in the 8x8 matrix coded as row-major?
-static stbi_uc stbi__jpeg_dezigzag[64+15] =
+static const stbi_uc stbi__jpeg_dezigzag[64+15] =
 {
     0,  1,  8, 16,  9,  2,  3, 10,
    17, 24, 32, 25, 18, 11,  4,  5,
@@ -1716,21 +2207,23 @@ static stbi_uc stbi__jpeg_dezigzag[64+15] =
 };
 
 // decode one 64-entry block--
-static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
+static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
 {
    int diff,dc,k;
    int t;
 
    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
    t = stbi__jpeg_huff_decode(j, hdc);
-   if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+   if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
 
    // 0 all the ac values now so we can do it 32-bits at a time
    memset(data,0,64*sizeof(data[0]));
 
    diff = t ? stbi__extend_receive(j, t) : 0;
+   if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
    dc = j->img_comp[b].dc_pred + diff;
    j->img_comp[b].dc_pred = dc;
+   if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
    data[0] = (short) (dc * dequant[0]);
 
    // decode AC components, see JPEG spec
@@ -1744,6 +2237,7 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman
       if (r) { // fast-AC path
          k += (r >> 4) & 15; // run
          s = r & 15; // combined length
+         if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
          j->code_buffer <<= s;
          j->code_bits -= s;
          // decode into unzigzag'd location
@@ -1780,11 +2274,14 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__
       // first scan for DC coefficient, must be first
       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
       t = stbi__jpeg_huff_decode(j, hdc);
+      if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
       diff = t ? stbi__extend_receive(j, t) : 0;
 
+      if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
       dc = j->img_comp[b].dc_pred + diff;
       j->img_comp[b].dc_pred = dc;
-      data[0] = (short) (dc << j->succ_low);
+      if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+      data[0] = (short) (dc * (1 << j->succ_low));
    } else {
       // refinement scan for DC coefficient
       if (stbi__jpeg_get_bit(j))
@@ -1818,10 +2315,11 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
          if (r) { // fast-AC path
             k += (r >> 4) & 15; // run
             s = r & 15; // combined length
+            if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
             j->code_buffer <<= s;
             j->code_bits -= s;
             zig = stbi__jpeg_dezigzag[k++];
-            data[zig] = (short) ((r >> 8) << shift);
+            data[zig] = (short) ((r >> 8) * (1 << shift));
          } else {
             int rs = stbi__jpeg_huff_decode(j, hac);
             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
@@ -1839,7 +2337,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
             } else {
                k += r;
                zig = stbi__jpeg_dezigzag[k++];
-               data[zig] = (short) (stbi__extend_receive(j,s) << shift);
+               data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
             }
          }
       } while (k <= j->spec_end);
@@ -1926,7 +2424,7 @@ stbi_inline static stbi_uc stbi__clamp(int x)
 }
 
 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
-#define stbi__fsh(x)  ((x) << 12)
+#define stbi__fsh(x)  ((x) * 4096)
 
 // derived from jidctint -- DCT_ISLOW
 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
@@ -1981,7 +2479,7 @@ static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
          //    (1|2|3|4|5|6|7)==0          0     seconds
          //    all separate               -0.047 seconds
          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
-         int dcterm = d[0] << 2;
+         int dcterm = d[0]*4;
          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
       } else {
          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
@@ -2425,7 +2923,7 @@ static stbi_uc stbi__get_marker(stbi__jpeg *j)
    x = stbi__get8(j->s);
    if (x != 0xff) return STBI__MARKER_none;
    while (x == 0xff)
-      x = stbi__get8(j->s);
+      x = stbi__get8(j->s); // consume repeated 0xff fill bytes
    return x;
 }
 
@@ -2440,7 +2938,7 @@ static void stbi__jpeg_reset(stbi__jpeg *j)
    j->code_bits = 0;
    j->code_buffer = 0;
    j->nomore = 0;
-   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
+   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
    j->marker = STBI__MARKER_none;
    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
    j->eob_run = 0;
@@ -2572,7 +3070,7 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
    }
 }
 
-static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
+static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
 {
    int i;
    for (i=0; i < 64; ++i)
@@ -2614,13 +3112,14 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
          L = stbi__get16be(z->s)-2;
          while (L > 0) {
             int q = stbi__get8(z->s);
-            int p = q >> 4;
+            int p = q >> 4, sixteen = (p != 0);
             int t = q & 15,i;
-            if (p != 0) return stbi__err("bad DQT type","Corrupt JPEG");
+            if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
+
             for (i=0; i < 64; ++i)
-               z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s);
-            L -= 65;
+               z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
+            L -= (sixteen ? 129 : 65);
          }
          return L==0;
 
@@ -2637,6 +3136,7 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
                sizes[i] = stbi__get8(z->s);
                n += sizes[i];
             }
+            if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
             L -= 17;
             if (tc == 0) {
                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
@@ -2653,12 +3153,50 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
          }
          return L==0;
    }
+
    // check for comment block or APP blocks
    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
-      stbi__skip(z->s, stbi__get16be(z->s)-2);
+      L = stbi__get16be(z->s);
+      if (L < 2) {
+         if (m == 0xFE)
+            return stbi__err("bad COM len","Corrupt JPEG");
+         else
+            return stbi__err("bad APP len","Corrupt JPEG");
+      }
+      L -= 2;
+
+      if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
+         static const unsigned char tag[5] = {'J','F','I','F','\0'};
+         int ok = 1;
+         int i;
+         for (i=0; i < 5; ++i)
+            if (stbi__get8(z->s) != tag[i])
+               ok = 0;
+         L -= 5;
+         if (ok)
+            z->jfif = 1;
+      } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
+         static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
+         int ok = 1;
+         int i;
+         for (i=0; i < 6; ++i)
+            if (stbi__get8(z->s) != tag[i])
+               ok = 0;
+         L -= 6;
+         if (ok) {
+            stbi__get8(z->s); // version
+            stbi__get16be(z->s); // flags0
+            stbi__get16be(z->s); // flags1
+            z->app14_color_transform = stbi__get8(z->s); // color transform
+            L -= 6;
+         }
+      }
+
+      stbi__skip(z->s, L);
       return 1;
    }
-   return 0;
+
+   return stbi__err("unknown marker","Corrupt JPEG");
 }
 
 // after we see SOS
@@ -2701,6 +3239,28 @@ static int stbi__process_scan_header(stbi__jpeg *z)
    return 1;
 }
 
+static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
+{
+   int i;
+   for (i=0; i < ncomp; ++i) {
+      if (z->img_comp[i].raw_data) {
+         STBI_FREE(z->img_comp[i].raw_data);
+         z->img_comp[i].raw_data = NULL;
+         z->img_comp[i].data = NULL;
+      }
+      if (z->img_comp[i].raw_coeff) {
+         STBI_FREE(z->img_comp[i].raw_coeff);
+         z->img_comp[i].raw_coeff = 0;
+         z->img_comp[i].coeff = 0;
+      }
+      if (z->img_comp[i].linebuf) {
+         STBI_FREE(z->img_comp[i].linebuf);
+         z->img_comp[i].linebuf = NULL;
+      }
+   }
+   return why;
+}
+
 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 {
    stbi__context *s = z->s;
@@ -2709,8 +3269,10 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
    c = stbi__get8(s);
-   if (c != 3 && c != 1) return stbi__err("bad component count","Corrupt JPEG");    // JFIF requires
+   if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
    s->img_n = c;
    for (i=0; i < c; ++i) {
       z->img_comp[i].data = NULL;
@@ -2721,15 +3283,10 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 
    z->rgb = 0;
    for (i=0; i < s->img_n; ++i) {
-      static unsigned char rgb[3] = { 'R', 'G', 'B' };
+      static const unsigned char rgb[3] = { 'R', 'G', 'B' };
       z->img_comp[i].id = stbi__get8(s);
-      if (z->img_comp[i].id != i+1)   // JFIF requires
-         if (z->img_comp[i].id != i) {  // some version of jpegtran outputs non-JFIF-compliant files!
-            // somethings output this (see http://fileformats.archiveteam.org/wiki/JPEG#Color_format)
-            if (z->img_comp[i].id != rgb[i])
-               return stbi__err("bad component ID","Corrupt JPEG");
-            ++z->rgb;
-         }
+      if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
+         ++z->rgb;
       q = stbi__get8(s);
       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
@@ -2738,18 +3295,26 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 
    if (scan != STBI__SCAN_load) return 1;
 
-   if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
 
    for (i=0; i < s->img_n; ++i) {
       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
    }
 
+   // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
+   // and I've never seen a non-corrupted JPEG file actually use them
+   for (i=0; i < s->img_n; ++i) {
+      if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
+      if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
+   }
+
    // compute interleaved mcu info
    z->img_h_max = h_max;
    z->img_v_max = v_max;
    z->img_mcu_w = h_max * 8;
    z->img_mcu_h = v_max * 8;
+   // these sizes can't be more than 17 bits
    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
 
@@ -2761,28 +3326,27 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
       // the bogus oversized data from using interleaved MCUs and their
       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
       // discard the extra data until colorspace conversion
+      //
+      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+      // so these muls can't overflow with 32-bit ints (which we require)
       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
-      z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
-
-      if (z->img_comp[i].raw_data == NULL) {
-         for(--i; i >= 0; --i) {
-            STBI_FREE(z->img_comp[i].raw_data);
-            z->img_comp[i].raw_data = NULL;
-         }
-         return stbi__err("outofmem", "Out of memory");
-      }
+      z->img_comp[i].coeff = 0;
+      z->img_comp[i].raw_coeff = 0;
+      z->img_comp[i].linebuf = NULL;
+      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
+      if (z->img_comp[i].raw_data == NULL)
+         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
       // align blocks for idct using mmx/sse
       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
-      z->img_comp[i].linebuf = NULL;
       if (z->progressive) {
-         z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
-         z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
-         z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
+         // w2, h2 are multiples of 8 (see above)
+         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+         if (z->img_comp[i].raw_coeff == NULL)
+            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
-      } else {
-         z->img_comp[i].coeff = 0;
-         z->img_comp[i].raw_coeff = 0;
       }
    }
 
@@ -2801,6 +3365,8 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
 {
    int m;
+   z->jfif = 0;
+   z->app14_color_transform = -1; // valid values are 0,1,2
    z->marker = STBI__MARKER_none; // initialize cached marker to empty
    m = stbi__get_marker(z);
    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
@@ -2820,6 +3386,28 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
    return 1;
 }
 
+static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
+{
+   // some JPEGs have junk at end, skip over it but if we find what looks
+   // like a valid marker, resume there
+   while (!stbi__at_eof(j->s)) {
+      stbi_uc x = stbi__get8(j->s);
+      while (x == 0xff) { // might be a marker
+         if (stbi__at_eof(j->s)) return STBI__MARKER_none;
+         x = stbi__get8(j->s);
+         if (x != 0x00 && x != 0xff) {
+            // not a stuffed zero or lead-in to another marker, looks
+            // like an actual marker, return it
+            return x;
+         }
+         // stuffed zero has x=0 now which ends the loop, meaning we go
+         // back to regular scan loop.
+         // repeated 0xff keeps trying to read the next byte of the marker.
+      }
+   }
+   return STBI__MARKER_none;
+}
+
 // decode image to YCbCr format
 static int stbi__decode_jpeg_image(stbi__jpeg *j)
 {
@@ -2836,22 +3424,22 @@ static int stbi__decode_jpeg_image(stbi__jpeg *j)
          if (!stbi__process_scan_header(j)) return 0;
          if (!stbi__parse_entropy_coded_data(j)) return 0;
          if (j->marker == STBI__MARKER_none ) {
-            // handle 0s at the end of image data from IP Kamera 9060
-            while (!stbi__at_eof(j->s)) {
-               int x = stbi__get8(j->s);
-               if (x == 255) {
-                  j->marker = stbi__get8(j->s);
-                  break;
-               } else if (x != 0) {
-                  return stbi__err("junk before marker", "Corrupt JPEG");
-               }
-            }
+         j->marker = stbi__skip_jpeg_junk_at_end(j);
             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
          }
+         m = stbi__get_marker(j);
+         if (STBI__RESTART(m))
+            m = stbi__get_marker(j);
+      } else if (stbi__DNL(m)) {
+         int Ld = stbi__get16be(j->s);
+         stbi__uint32 NL = stbi__get16be(j->s);
+         if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
+         if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
+         m = stbi__get_marker(j);
       } else {
-         if (!stbi__process_marker(j, m)) return 0;
+         if (!stbi__process_marker(j, m)) return 1;
+         m = stbi__get_marker(j);
       }
-      m = stbi__get_marker(j);
    }
    if (j->progressive)
       stbi__jpeg_finish(j);
@@ -3066,38 +3654,9 @@ static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_
    return out;
 }
 
-#ifdef STBI_JPEG_OLD
-// this is the same YCbCr-to-RGB calculation that stb_image has used
-// historically before the algorithm changes in 1.49
-#define float2fixed(x)  ((int) ((x) * 65536 + 0.5))
-static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
-{
-   int i;
-   for (i=0; i < count; ++i) {
-      int y_fixed = (y[i] << 16) + 32768; // rounding
-      int r,g,b;
-      int cr = pcr[i] - 128;
-      int cb = pcb[i] - 128;
-      r = y_fixed + cr*float2fixed(1.40200f);
-      g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
-      b = y_fixed                            + cb*float2fixed(1.77200f);
-      r >>= 16;
-      g >>= 16;
-      b >>= 16;
-      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
-      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
-      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
-      out[0] = (stbi_uc)r;
-      out[1] = (stbi_uc)g;
-      out[2] = (stbi_uc)b;
-      out[3] = 255;
-      out += step;
-   }
-}
-#else
 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
 // to make sure the code produces the same results in both SIMD and scalar
-#define float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
+#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
 {
    int i;
@@ -3106,9 +3665,9 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc
       int r,g,b;
       int cr = pcr[i] - 128;
       int cb = pcb[i] - 128;
-      r = y_fixed +  cr* float2fixed(1.40200f);
-      g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
-      b = y_fixed                               +   cb* float2fixed(1.77200f);
+      r = y_fixed +  cr* stbi__float2fixed(1.40200f);
+      g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
       r >>= 20;
       g >>= 20;
       b >>= 20;
@@ -3122,7 +3681,6 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc
       out += step;
    }
 }
-#endif
 
 #if defined(STBI_SSE2) || defined(STBI_NEON)
 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
@@ -3241,9 +3799,9 @@ static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc cons
       int r,g,b;
       int cr = pcr[i] - 128;
       int cb = pcb[i] - 128;
-      r = y_fixed + cr* float2fixed(1.40200f);
-      g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
-      b = y_fixed                             +   cb* float2fixed(1.77200f);
+      r = y_fixed + cr* stbi__float2fixed(1.40200f);
+      g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
       r >>= 20;
       g >>= 20;
       b >>= 20;
@@ -3269,18 +3827,14 @@ static void stbi__setup_jpeg(stbi__jpeg *j)
 #ifdef STBI_SSE2
    if (stbi__sse2_available()) {
       j->idct_block_kernel = stbi__idct_simd;
-      #ifndef STBI_JPEG_OLD
       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-      #endif
       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
    }
 #endif
 
 #ifdef STBI_NEON
    j->idct_block_kernel = stbi__idct_simd;
-   #ifndef STBI_JPEG_OLD
    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-   #endif
    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
 #endif
 }
@@ -3288,23 +3842,7 @@ static void stbi__setup_jpeg(stbi__jpeg *j)
 // clean up the temporary component buffers
 static void stbi__cleanup_jpeg(stbi__jpeg *j)
 {
-   int i;
-   for (i=0; i < j->s->img_n; ++i) {
-      if (j->img_comp[i].raw_data) {
-         STBI_FREE(j->img_comp[i].raw_data);
-         j->img_comp[i].raw_data = NULL;
-         j->img_comp[i].data = NULL;
-      }
-      if (j->img_comp[i].raw_coeff) {
-         STBI_FREE(j->img_comp[i].raw_coeff);
-         j->img_comp[i].raw_coeff = 0;
-         j->img_comp[i].coeff = 0;
-      }
-      if (j->img_comp[i].linebuf) {
-         STBI_FREE(j->img_comp[i].linebuf);
-         j->img_comp[i].linebuf = NULL;
-      }
-   }
+   stbi__free_jpeg_components(j, j->s->img_n, 0);
 }
 
 typedef struct
@@ -3317,9 +3855,16 @@ typedef struct
    int ypos;    // which pre-expansion row we're on
 } stbi__resample;
 
+// fast 0..255 * 0..255 => 0..255 rounded multiplication
+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
+{
+   unsigned int t = x*y + 128;
+   return (stbi_uc) ((t + (t >>8)) >> 8);
+}
+
 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
 {
-   int n, decode_n;
+   int n, decode_n, is_rgb;
    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
 
    // validate req_comp
@@ -3329,19 +3874,25 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
 
    // determine actual number of components to generate
-   n = req_comp ? req_comp : z->s->img_n;
+   n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
+
+   is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
 
-   if (z->s->img_n == 3 && n < 3)
+   if (z->s->img_n == 3 && n < 3 && !is_rgb)
       decode_n = 1;
    else
       decode_n = z->s->img_n;
 
+   // nothing to do if no components requested; check this now to avoid
+   // accessing uninitialized coutput[0] later
+   if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
+
    // resample and color-convert
    {
       int k;
       unsigned int i,j;
       stbi_uc *output;
-      stbi_uc *coutput[4];
+      stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
 
       stbi__resample res_comp[4];
 
@@ -3368,7 +3919,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
       }
 
       // can't error after this so, this is safe
-      output = (stbi_uc *) stbi__malloc(n * z->s->img_x * z->s->img_y + 1);
+      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
 
       // now go ahead and resample
@@ -3391,7 +3942,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
          if (n >= 3) {
             stbi_uc *y = coutput[0];
             if (z->s->img_n == 3) {
-               if (z->rgb == 3) {
+               if (is_rgb) {
                   for (i=0; i < z->s->img_x; ++i) {
                      out[0] = y[i];
                      out[1] = coutput[1][i];
@@ -3402,6 +3953,28 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
                } else {
                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
                }
+            } else if (z->s->img_n == 4) {
+               if (z->app14_color_transform == 0) { // CMYK
+                  for (i=0; i < z->s->img_x; ++i) {
+                     stbi_uc m = coutput[3][i];
+                     out[0] = stbi__blinn_8x8(coutput[0][i], m);
+                     out[1] = stbi__blinn_8x8(coutput[1][i], m);
+                     out[2] = stbi__blinn_8x8(coutput[2][i], m);
+                     out[3] = 255;
+                     out += n;
+                  }
+               } else if (z->app14_color_transform == 2) { // YCCK
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+                  for (i=0; i < z->s->img_x; ++i) {
+                     stbi_uc m = coutput[3][i];
+                     out[0] = stbi__blinn_8x8(255 - out[0], m);
+                     out[1] = stbi__blinn_8x8(255 - out[1], m);
+                     out[2] = stbi__blinn_8x8(255 - out[2], m);
+                     out += n;
+                  }
+               } else { // YCbCr + alpha?  Ignore the fourth channel for now
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+               }
             } else
                for (i=0; i < z->s->img_x; ++i) {
                   out[0] = out[1] = out[2] = y[i];
@@ -3409,25 +3982,56 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
                   out += n;
                }
          } else {
-            stbi_uc *y = coutput[0];
-            if (n == 1)
-               for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
-            else
-               for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
+            if (is_rgb) {
+               if (n == 1)
+                  for (i=0; i < z->s->img_x; ++i)
+                     *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+               else {
+                  for (i=0; i < z->s->img_x; ++i, out += 2) {
+                     out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+                     out[1] = 255;
+                  }
+               }
+            } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
+               for (i=0; i < z->s->img_x; ++i) {
+                  stbi_uc m = coutput[3][i];
+                  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
+                  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
+                  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
+                  out[0] = stbi__compute_y(r, g, b);
+                  out[1] = 255;
+                  out += n;
+               }
+            } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
+               for (i=0; i < z->s->img_x; ++i) {
+                  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
+                  out[1] = 255;
+                  out += n;
+               }
+            } else {
+               stbi_uc *y = coutput[0];
+               if (n == 1)
+                  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
+               else
+                  for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
+            }
          }
       }
       stbi__cleanup_jpeg(z);
       *out_x = z->s->img_x;
       *out_y = z->s->img_y;
-      if (comp) *comp  = z->s->img_n; // report original components, not output
+      if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
       return output;
    }
 }
 
-static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    unsigned char* result;
    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
+   if (!j) return stbi__errpuc("outofmem", "Out of memory");
+   memset(j, 0, sizeof(stbi__jpeg));
+   STBI_NOTUSED(ri);
    j->s = s;
    stbi__setup_jpeg(j);
    result = load_jpeg_image(j, x,y,comp,req_comp);
@@ -3438,11 +4042,14 @@ static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *com
 static int stbi__jpeg_test(stbi__context *s)
 {
    int r;
-   stbi__jpeg j;
-   j.s = s;
-   stbi__setup_jpeg(&j);
-   r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
+   stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+   if (!j) return stbi__err("outofmem", "Out of memory");
+   memset(j, 0, sizeof(stbi__jpeg));
+   j->s = s;
+   stbi__setup_jpeg(j);
+   r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
    stbi__rewind(s);
+   STBI_FREE(j);
    return r;
 }
 
@@ -3454,7 +4061,7 @@ static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
    }
    if (x) *x = j->s->img_x;
    if (y) *y = j->s->img_y;
-   if (comp) *comp = j->s->img_n;
+   if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
    return 1;
 }
 
@@ -3462,6 +4069,8 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
 {
    int result;
    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
+   if (!j) return stbi__err("outofmem", "Out of memory");
+   memset(j, 0, sizeof(stbi__jpeg));
    j->s = s;
    result = stbi__jpeg_info_raw(j, x, y, comp);
    STBI_FREE(j);
@@ -3481,6 +4090,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
 // fast-way is faster to check than jpeg huffman, but slow way is slower
 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
+#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
 
 // zlib-style huffman encoding
 // (jpegs packs from left, zlib from right, so can't share code)
@@ -3490,8 +4100,8 @@ typedef struct
    stbi__uint16 firstcode[16];
    int maxcode[17];
    stbi__uint16 firstsymbol[16];
-   stbi_uc  size[288];
-   stbi__uint16 value[288];
+   stbi_uc  size[STBI__ZNSYMS];
+   stbi__uint16 value[STBI__ZNSYMS];
 } stbi__zhuffman;
 
 stbi_inline static int stbi__bitreverse16(int n)
@@ -3511,7 +4121,7 @@ stbi_inline static int stbi__bit_reverse(int v, int bits)
    return stbi__bitreverse16(v) >> (16-bits);
 }
 
-static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
+static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
 {
    int i,k=0;
    int code, next_code[16], sizes[17];
@@ -3568,6 +4178,7 @@ typedef struct
 {
    stbi_uc *zbuffer, *zbuffer_end;
    int num_bits;
+   int hit_zeof_once;
    stbi__uint32 code_buffer;
 
    char *zout;
@@ -3578,16 +4189,23 @@ typedef struct
    stbi__zhuffman z_length, z_distance;
 } stbi__zbuf;
 
+stbi_inline static int stbi__zeof(stbi__zbuf *z)
+{
+   return (z->zbuffer >= z->zbuffer_end);
+}
+
 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
 {
-   if (z->zbuffer >= z->zbuffer_end) return 0;
-   return *z->zbuffer++;
+   return stbi__zeof(z) ? 0 : *z->zbuffer++;
 }
 
 static void stbi__fill_bits(stbi__zbuf *z)
 {
    do {
-      STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
+      if (z->code_buffer >= (1U << z->num_bits)) {
+        z->zbuffer = z->zbuffer_end;  /* treat this as EOF so we fail. */
+        return;
+      }
       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
       z->num_bits += 8;
    } while (z->num_bits <= 24);
@@ -3612,10 +4230,11 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
    for (s=STBI__ZFAST_BITS+1; ; ++s)
       if (k < z->maxcode[s])
          break;
-   if (s == 16) return -1; // invalid code!
+   if (s >= 16) return -1; // invalid code!
    // code size is s, so:
    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
-   STBI_ASSERT(z->size[b] == s);
+   if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
+   if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead.
    a->code_buffer >>= s;
    a->num_bits -= s;
    return z->value[b];
@@ -3624,7 +4243,23 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
 {
    int b,s;
-   if (a->num_bits < 16) stbi__fill_bits(a);
+   if (a->num_bits < 16) {
+      if (stbi__zeof(a)) {
+         if (!a->hit_zeof_once) {
+            // This is the first time we hit eof, insert 16 extra padding btis
+            // to allow us to keep going; if we actually consume any of them
+            // though, that is invalid data. This is caught later.
+            a->hit_zeof_once = 1;
+            a->num_bits += 16; // add 16 implicit zero bits
+         } else {
+            // We already inserted our extra 16 padding bits and are again
+            // out, this stream is actually prematurely terminated.
+            return -1;
+         }
+      } else {
+         stbi__fill_bits(a);
+      }
+   }
    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
    if (b) {
       s = b >> 9;
@@ -3638,13 +4273,16 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
 {
    char *q;
-   int cur, limit, old_limit;
+   unsigned int cur, limit, old_limit;
    z->zout = zout;
    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
-   cur   = (int) (z->zout     - z->zout_start);
-   limit = old_limit = (int) (z->zout_end - z->zout_start);
-   while (cur + n > limit)
+   cur   = (unsigned int) (z->zout - z->zout_start);
+   limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
+   if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
+   while (cur + n > limit) {
+      if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
       limit *= 2;
+   }
    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
    STBI_NOTUSED(old_limit);
    if (q == NULL) return stbi__err("outofmem", "Out of memory");
@@ -3654,18 +4292,18 @@ static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room
    return 1;
 }
 
-static int stbi__zlength_base[31] = {
+static const int stbi__zlength_base[31] = {
    3,4,5,6,7,8,9,10,11,13,
    15,17,19,23,27,31,35,43,51,59,
    67,83,99,115,131,163,195,227,258,0,0 };
 
-static int stbi__zlength_extra[31]=
+static const int stbi__zlength_extra[31]=
 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
 
-static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
 
-static int stbi__zdist_extra[32] =
+static const int stbi__zdist_extra[32] =
 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
 
 static int stbi__parse_huffman_block(stbi__zbuf *a)
@@ -3685,17 +4323,25 @@ static int stbi__parse_huffman_block(stbi__zbuf *a)
          int len,dist;
          if (z == 256) {
             a->zout = zout;
+            if (a->hit_zeof_once && a->num_bits < 16) {
+               // The first time we hit zeof, we inserted 16 extra zero bits into our bit
+               // buffer so the decoder can just do its speculative decoding. But if we
+               // actually consumed any of those bits (which is the case when num_bits < 16),
+               // the stream actually read past the end so it is malformed.
+               return stbi__err("unexpected end","Corrupt PNG");
+            }
             return 1;
          }
+         if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
          z -= 257;
          len = stbi__zlength_base[z];
          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
          z = stbi__zhuffman_decode(a, &a->z_distance);
-         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
+         if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
          dist = stbi__zdist_base[z];
          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
-         if (zout + len > a->zout_end) {
+         if (len > a->zout_end - zout) {
             if (!stbi__zexpand(a, zout, len)) return 0;
             zout = a->zout;
          }
@@ -3712,7 +4358,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a)
 
 static int stbi__compute_huffman_codes(stbi__zbuf *a)
 {
-   static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+   static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
    stbi__zhuffman z_codelength;
    stbi_uc lencodes[286+32+137];//padding for maximum single op
    stbi_uc codelength_sizes[19];
@@ -3721,6 +4367,7 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a)
    int hlit  = stbi__zreceive(a,5) + 257;
    int hdist = stbi__zreceive(a,5) + 1;
    int hclen = stbi__zreceive(a,4) + 4;
+   int ntot  = hlit + hdist;
 
    memset(codelength_sizes, 0, sizeof(codelength_sizes));
    for (i=0; i < hclen; ++i) {
@@ -3730,27 +4377,30 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a)
    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
 
    n = 0;
-   while (n < hlit + hdist) {
+   while (n < ntot) {
       int c = stbi__zhuffman_decode(a, &z_codelength);
       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
       if (c < 16)
          lencodes[n++] = (stbi_uc) c;
-      else if (c == 16) {
-         c = stbi__zreceive(a,2)+3;
-         memset(lencodes+n, lencodes[n-1], c);
-         n += c;
-      } else if (c == 17) {
-         c = stbi__zreceive(a,3)+3;
-         memset(lencodes+n, 0, c);
-         n += c;
-      } else {
-         STBI_ASSERT(c == 18);
-         c = stbi__zreceive(a,7)+11;
-         memset(lencodes+n, 0, c);
+      else {
+         stbi_uc fill = 0;
+         if (c == 16) {
+            c = stbi__zreceive(a,2)+3;
+            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
+            fill = lencodes[n-1];
+         } else if (c == 17) {
+            c = stbi__zreceive(a,3)+3;
+         } else if (c == 18) {
+            c = stbi__zreceive(a,7)+11;
+         } else {
+            return stbi__err("bad codelengths", "Corrupt PNG");
+         }
+         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
+         memset(lencodes+n, fill, c);
          n += c;
       }
    }
-   if (n != hlit+hdist) return stbi__err("bad codelengths","Corrupt PNG");
+   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
    return 1;
@@ -3769,7 +4419,7 @@ static int stbi__parse_uncompressed_block(stbi__zbuf *a)
       a->code_buffer >>= 8;
       a->num_bits -= 8;
    }
-   STBI_ASSERT(a->num_bits == 0);
+   if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
    // now fill header the normal way
    while (k < 4)
       header[k++] = stbi__zget8(a);
@@ -3791,6 +4441,7 @@ static int stbi__parse_zlib_header(stbi__zbuf *a)
    int cm    = cmf & 15;
    /* int cinfo = cmf >> 4; */
    int flg   = stbi__zget8(a);
+   if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
@@ -3798,9 +4449,24 @@ static int stbi__parse_zlib_header(stbi__zbuf *a)
    return 1;
 }
 
-// @TODO: should statically initialize these for optimal thread safety
-static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32];
-static void stbi__init_zdefaults(void)
+static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
+{
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
+};
+static const stbi_uc stbi__zdefault_distance[32] =
+{
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
+};
+/*
+Init algorithm:
 {
    int i;   // use <= to match clearly with spec
    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
@@ -3810,6 +4476,7 @@ static void stbi__init_zdefaults(void)
 
    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
 }
+*/
 
 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
 {
@@ -3818,6 +4485,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
       if (!stbi__parse_zlib_header(a)) return 0;
    a->num_bits = 0;
    a->code_buffer = 0;
+   a->hit_zeof_once = 0;
    do {
       final = stbi__zreceive(a,1);
       type = stbi__zreceive(a,2);
@@ -3828,8 +4496,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
       } else {
          if (type == 1) {
             // use fixed code lengths
-            if (!stbi__zdefault_distance[31]) stbi__init_zdefaults();
-            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
+            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , STBI__ZNSYMS)) return 0;
             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
          } else {
             if (!stbi__compute_huffman_codes(a)) return 0;
@@ -3953,7 +4620,7 @@ static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
 
 static int stbi__check_png_header(stbi__context *s)
 {
-   static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
+   static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
    int i;
    for (i=0; i < 8; ++i)
       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
@@ -3974,9 +4641,8 @@ enum {
    STBI__F_up=2,
    STBI__F_avg=3,
    STBI__F_paeth=4,
-   // synthetic filters used for first scanline to avoid needing a dummy row of 0s
-   STBI__F_avg_first,
-   STBI__F_paeth_first
+   // synthetic filter used for first scanline to avoid needing a dummy row of 0s
+   STBI__F_avg_first
 };
 
 static stbi_uc first_row_filter[5] =
@@ -3985,29 +4651,56 @@ static stbi_uc first_row_filter[5] =
    STBI__F_sub,
    STBI__F_none,
    STBI__F_avg_first,
-   STBI__F_paeth_first
+   STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub
 };
 
 static int stbi__paeth(int a, int b, int c)
 {
-   int p = a + b - c;
-   int pa = abs(p-a);
-   int pb = abs(p-b);
-   int pc = abs(p-c);
-   if (pa <= pb && pa <= pc) return a;
-   if (pb <= pc) return b;
-   return c;
+   // This formulation looks very different from the reference in the PNG spec, but is
+   // actually equivalent and has favorable data dependencies and admits straightforward
+   // generation of branch-free code, which helps performance significantly.
+   int thresh = c*3 - (a + b);
+   int lo = a < b ? a : b;
+   int hi = a < b ? b : a;
+   int t0 = (hi <= thresh) ? lo : c;
+   int t1 = (thresh <= lo) ? hi : t0;
+   return t1;
 }
 
-static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+
+// adds an extra all-255 alpha channel
+// dest == src is legal
+// img_n must be 1 or 3
+static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n)
+{
+   int i;
+   // must process data backwards since we allow dest==src
+   if (img_n == 1) {
+      for (i=x-1; i >= 0; --i) {
+         dest[i*2+1] = 255;
+         dest[i*2+0] = src[i];
+      }
+   } else {
+      STBI_ASSERT(img_n == 3);
+      for (i=x-1; i >= 0; --i) {
+         dest[i*4+3] = 255;
+         dest[i*4+2] = src[i*3+2];
+         dest[i*4+1] = src[i*3+1];
+         dest[i*4+0] = src[i*3+0];
+      }
+   }
+}
 
 // create the png data from post-deflated data
 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
 {
-   int bytes = (depth == 16? 2 : 1);
+   int bytes = (depth == 16 ? 2 : 1);
    stbi__context *s = a->s;
    stbi__uint32 i,j,stride = x*out_n*bytes;
    stbi__uint32 img_len, img_width_bytes;
+   stbi_uc *filter_buf;
+   int all_ok = 1;
    int k;
    int img_n = s->img_n; // copy it into a local for later
 
@@ -4016,211 +4709,167 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
    int width = x;
 
    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
-   a->out = (stbi_uc *) stbi__malloc(x * y * output_bytes); // extra bytes to write off the end into
+   a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
    if (!a->out) return stbi__err("outofmem", "Out of memory");
 
+   // note: error exits here don't need to clean up a->out individually,
+   // stbi__do_png always does on error.
+   if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+   if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG");
    img_len = (img_width_bytes + 1) * y;
-   if (s->img_x == x && s->img_y == y) {
-      if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
-   } else { // interlaced:
-      if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
+
+   // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
+   // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
+   // so just check for raw_len < img_len always.
+   if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
+
+   // Allocate two scan lines worth of filter workspace buffer.
+   filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0);
+   if (!filter_buf) return stbi__err("outofmem", "Out of memory");
+
+   // Filtering for low-bit-depth images
+   if (depth < 8) {
+      filter_bytes = 1;
+      width = img_width_bytes;
    }
 
    for (j=0; j < y; ++j) {
-      stbi_uc *cur = a->out + stride*j;
-      stbi_uc *prior = cur - stride;
+      // cur/prior filter buffers alternate
+      stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes;
+      stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes;
+      stbi_uc *dest = a->out + stride*j;
+      int nk = width * filter_bytes;
       int filter = *raw++;
 
-      if (filter > 4)
-         return stbi__err("invalid filter","Corrupt PNG");
-
-      if (depth < 8) {
-         STBI_ASSERT(img_width_bytes <= x);
-         cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
-         filter_bytes = 1;
-         width = img_width_bytes;
+      // check filter type
+      if (filter > 4) {
+         all_ok = stbi__err("invalid filter","Corrupt PNG");
+         break;
       }
 
       // if first row, use special filter that doesn't sample previous row
       if (j == 0) filter = first_row_filter[filter];
 
-      // handle first byte explicitly
-      for (k=0; k < filter_bytes; ++k) {
-         switch (filter) {
-            case STBI__F_none       : cur[k] = raw[k]; break;
-            case STBI__F_sub        : cur[k] = raw[k]; break;
-            case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
-            case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
-            case STBI__F_avg_first  : cur[k] = raw[k]; break;
-            case STBI__F_paeth_first: cur[k] = raw[k]; break;
-         }
-      }
-
-      if (depth == 8) {
-         if (img_n != out_n)
-            cur[img_n] = 255; // first pixel
-         raw += img_n;
-         cur += out_n;
-         prior += out_n;
-      } else if (depth == 16) {
-         if (img_n != out_n) {
-            cur[filter_bytes]   = 255; // first pixel top byte
-            cur[filter_bytes+1] = 255; // first pixel bottom byte
-         }
-         raw += filter_bytes;
-         cur += output_bytes;
-         prior += output_bytes;
-      } else {
-         raw += 1;
-         cur += 1;
-         prior += 1;
+      // perform actual filtering
+      switch (filter) {
+      case STBI__F_none:
+         memcpy(cur, raw, nk);
+         break;
+      case STBI__F_sub:
+         memcpy(cur, raw, filter_bytes);
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]);
+         break;
+      case STBI__F_up:
+         for (k = 0; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
+         break;
+      case STBI__F_avg:
+         for (k = 0; k < filter_bytes; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1));
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1));
+         break;
+      case STBI__F_paeth:
+         for (k = 0; k < filter_bytes; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0)
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes]));
+         break;
+      case STBI__F_avg_first:
+         memcpy(cur, raw, filter_bytes);
+         for (k = filter_bytes; k < nk; ++k)
+            cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1));
+         break;
       }
 
-      // this is a little gross, so that we don't switch per-pixel or per-component
-      if (depth < 8 || img_n == out_n) {
-         int nk = (width - 1)*filter_bytes;
-         #define CASE(f) \
-             case f:     \
-                for (k=0; k < nk; ++k)
-         switch (filter) {
-            // "none" filter turns into a memcpy here; make that explicit.
-            case STBI__F_none:         memcpy(cur, raw, nk); break;
-            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
-            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
-            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
-            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
-            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
-         }
-         #undef CASE
-         raw += nk;
-      } else {
-         STBI_ASSERT(img_n+1 == out_n);
-         #define CASE(f) \
-             case f:     \
-                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
-                   for (k=0; k < filter_bytes; ++k)
-         switch (filter) {
-            CASE(STBI__F_none)         cur[k] = raw[k]; break;
-            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); break;
-            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); break;
-            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); break;
-            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); break;
-            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); break;
-         }
-         #undef CASE
-
-         // the loop above sets the high byte of the pixels' alpha, but for
-         // 16 bit png files we also need the low byte set. we'll do that here.
-         if (depth == 16) {
-            cur = a->out + stride*j; // start at the beginning of the row again
-            for (i=0; i < x; ++i,cur+=output_bytes) {
-               cur[filter_bytes+1] = 255;
-            }
-         }
-      }
-   }
+      raw += nk;
 
-   // we make a separate pass to expand bits to pixels; for performance,
-   // this could run two scanlines behind the above code, so it won't
-   // intefere with filtering but will still be in the cache.
-   if (depth < 8) {
-      for (j=0; j < y; ++j) {
-         stbi_uc *cur = a->out + stride*j;
-         stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
-         // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
-         // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
+      // expand decoded bits in cur to dest, also adding an extra alpha channel if desired
+      if (depth < 8) {
          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+         stbi_uc *in = cur;
+         stbi_uc *out = dest;
+         stbi_uc inb = 0;
+         stbi__uint32 nsmp = x*img_n;
 
-         // note that the final byte might overshoot and write more data than desired.
-         // we can allocate enough data that this never writes out of memory, but it
-         // could also overwrite the next scanline. can it overwrite non-empty data
-         // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
-         // so we need to explicitly clamp the final ones
-
+         // expand bits to bytes first
          if (depth == 4) {
-            for (k=x*img_n; k >= 2; k-=2, ++in) {
-               *cur++ = scale * ((*in >> 4)       );
-               *cur++ = scale * ((*in     ) & 0x0f);
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 1) == 0) inb = *in++;
+               *out++ = scale * (inb >> 4);
+               inb <<= 4;
             }
-            if (k > 0) *cur++ = scale * ((*in >> 4)       );
          } else if (depth == 2) {
-            for (k=x*img_n; k >= 4; k-=4, ++in) {
-               *cur++ = scale * ((*in >> 6)       );
-               *cur++ = scale * ((*in >> 4) & 0x03);
-               *cur++ = scale * ((*in >> 2) & 0x03);
-               *cur++ = scale * ((*in     ) & 0x03);
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 3) == 0) inb = *in++;
+               *out++ = scale * (inb >> 6);
+               inb <<= 2;
             }
-            if (k > 0) *cur++ = scale * ((*in >> 6)       );
-            if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
-            if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
-         } else if (depth == 1) {
-            for (k=x*img_n; k >= 8; k-=8, ++in) {
-               *cur++ = scale * ((*in >> 7)       );
-               *cur++ = scale * ((*in >> 6) & 0x01);
-               *cur++ = scale * ((*in >> 5) & 0x01);
-               *cur++ = scale * ((*in >> 4) & 0x01);
-               *cur++ = scale * ((*in >> 3) & 0x01);
-               *cur++ = scale * ((*in >> 2) & 0x01);
-               *cur++ = scale * ((*in >> 1) & 0x01);
-               *cur++ = scale * ((*in     ) & 0x01);
+         } else {
+            STBI_ASSERT(depth == 1);
+            for (i=0; i < nsmp; ++i) {
+               if ((i & 7) == 0) inb = *in++;
+               *out++ = scale * (inb >> 7);
+               inb <<= 1;
             }
-            if (k > 0) *cur++ = scale * ((*in >> 7)       );
-            if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
-            if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
-            if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
-            if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
-            if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
-            if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
          }
-         if (img_n != out_n) {
-            int q;
-            // insert alpha = 255
-            cur = a->out + stride*j;
+
+         // insert alpha=255 values if desired
+         if (img_n != out_n)
+            stbi__create_png_alpha_expand8(dest, dest, x, img_n);
+      } else if (depth == 8) {
+         if (img_n == out_n)
+            memcpy(dest, cur, x*img_n);
+         else
+            stbi__create_png_alpha_expand8(dest, cur, x, img_n);
+      } else if (depth == 16) {
+         // convert the image data from big-endian to platform-native
+         stbi__uint16 *dest16 = (stbi__uint16*)dest;
+         stbi__uint32 nsmp = x*img_n;
+
+         if (img_n == out_n) {
+            for (i = 0; i < nsmp; ++i, ++dest16, cur += 2)
+               *dest16 = (cur[0] << 8) | cur[1];
+         } else {
+            STBI_ASSERT(img_n+1 == out_n);
             if (img_n == 1) {
-               for (q=x-1; q >= 0; --q) {
-                  cur[q*2+1] = 255;
-                  cur[q*2+0] = cur[q];
+               for (i = 0; i < x; ++i, dest16 += 2, cur += 2) {
+                  dest16[0] = (cur[0] << 8) | cur[1];
+                  dest16[1] = 0xffff;
                }
             } else {
                STBI_ASSERT(img_n == 3);
-               for (q=x-1; q >= 0; --q) {
-                  cur[q*4+3] = 255;
-                  cur[q*4+2] = cur[q*3+2];
-                  cur[q*4+1] = cur[q*3+1];
-                  cur[q*4+0] = cur[q*3+0];
+               for (i = 0; i < x; ++i, dest16 += 4, cur += 6) {
+                  dest16[0] = (cur[0] << 8) | cur[1];
+                  dest16[1] = (cur[2] << 8) | cur[3];
+                  dest16[2] = (cur[4] << 8) | cur[5];
+                  dest16[3] = 0xffff;
                }
             }
          }
       }
-   } else if (depth == 16) {
-      // force the image data from big-endian to platform-native.
-      // this is done in a separate pass due to the decoding relying
-      // on the data being untouched, but could probably be done
-      // per-line during decode if care is taken.
-      stbi_uc *cur = a->out;
-      stbi__uint16 *cur16 = (stbi__uint16*)cur;
-
-      for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
-         *cur16 = (cur[0] << 8) | cur[1];
-      }
    }
 
+   STBI_FREE(filter_buf);
+   if (!all_ok) return 0;
+
    return 1;
 }
 
 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
 {
+   int bytes = (depth == 16 ? 2 : 1);
+   int out_bytes = out_n * bytes;
    stbi_uc *final;
    int p;
    if (!interlaced)
       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
 
    // de-interlacing
-   final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n);
+   final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+   if (!final) return stbi__err("outofmem", "Out of memory");
    for (p=0; p < 7; ++p) {
       int xorig[] = { 0,4,0,2,0,1,0 };
       int yorig[] = { 0,0,4,0,2,0,1 };
@@ -4240,8 +4889,8 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint3
             for (i=0; i < x; ++i) {
                int out_y = j*yspc[p]+yorig[p];
                int out_x = i*xspc[p]+xorig[p];
-               memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
-                      a->out + (j*x+i)*out_n, out_n);
+               memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
+                      a->out + (j*x+i)*out_bytes, out_bytes);
             }
          }
          STBI_FREE(a->out);
@@ -4309,7 +4958,7 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
    stbi_uc *p, *temp_out, *orig = a->out;
 
-   p = (stbi_uc *) stbi__malloc(pixel_count * pal_img_n);
+   p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
    if (p == NULL) return stbi__err("outofmem", "Out of memory");
 
    // between here and free(out) below, exitting would leak
@@ -4341,39 +4990,46 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
    return 1;
 }
 
-static int stbi__reduce_png(stbi__png *p)
-{
-   int i;
-   int img_len = p->s->img_x * p->s->img_y * p->s->img_out_n;
-   stbi_uc *reduced;
-   stbi__uint16 *orig = (stbi__uint16*)p->out;
-
-   if (p->depth != 16) return 1; // don't need to do anything if not 16-bit data
-
-   reduced = (stbi_uc *)stbi__malloc(img_len);
-   if (p == NULL) return stbi__err("outofmem", "Out of memory");
-
-   for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is a decent approx of 16->8 bit scaling
+static int stbi__unpremultiply_on_load_global = 0;
+static int stbi__de_iphone_flag_global = 0;
 
-   p->out = reduced;
-   STBI_FREE(orig);
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+{
+   stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
+}
 
-   return 1;
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+{
+   stbi__de_iphone_flag_global = flag_true_if_should_convert;
 }
 
-static int stbi__unpremultiply_on_load = 0;
-static int stbi__de_iphone_flag = 0;
+#ifndef STBI_THREAD_LOCAL
+#define stbi__unpremultiply_on_load  stbi__unpremultiply_on_load_global
+#define stbi__de_iphone_flag  stbi__de_iphone_flag_global
+#else
+static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
+static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
 
-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
 {
-   stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
+   stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
+   stbi__unpremultiply_on_load_set = 1;
 }
 
-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
 {
-   stbi__de_iphone_flag = flag_true_if_should_convert;
+   stbi__de_iphone_flag_local = flag_true_if_should_convert;
+   stbi__de_iphone_flag_set = 1;
 }
 
+#define stbi__unpremultiply_on_load  (stbi__unpremultiply_on_load_set           \
+                                       ? stbi__unpremultiply_on_load_local      \
+                                       : stbi__unpremultiply_on_load_global)
+#define stbi__de_iphone_flag  (stbi__de_iphone_flag_set                         \
+                                ? stbi__de_iphone_flag_local                    \
+                                : stbi__de_iphone_flag_global)
+#endif // STBI_THREAD_LOCAL
+
 static void stbi__de_iphone(stbi__png *z)
 {
    stbi__context *s = z->s;
@@ -4395,9 +5051,10 @@ static void stbi__de_iphone(stbi__png *z)
             stbi_uc a = p[3];
             stbi_uc t = p[0];
             if (a) {
-               p[0] = p[2] * 255 / a;
-               p[1] = p[1] * 255 / a;
-               p[2] =  t   * 255 / a;
+               stbi_uc half = a / 2;
+               p[0] = (p[2] * 255 + half) / a;
+               p[1] = (p[1] * 255 + half) / a;
+               p[2] = ( t   * 255 + half) / a;
             } else {
                p[0] = p[2];
                p[2] = t;
@@ -4416,12 +5073,12 @@ static void stbi__de_iphone(stbi__png *z)
    }
 }
 
-#define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
+#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
 
 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
 {
    stbi_uc palette[1024], pal_img_n=0;
-   stbi_uc has_trans=0, tc[3];
+   stbi_uc has_trans=0, tc[3]={0};
    stbi__uint16 tc16[3];
    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
    int first=1,k,interlace=0, color=0, is_iphone=0;
@@ -4447,11 +5104,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
             first = 0;
             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
-            s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
-            s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
+            s->img_x = stbi__get32be(s);
+            s->img_y = stbi__get32be(s);
+            if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+            if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
-			if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
+            if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
@@ -4460,14 +5119,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
             if (!pal_img_n) {
                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
-               if (scan == STBI__SCAN_header) return 1;
             } else {
                // if paletted, then pal_n is our final components, and
                // img_n is # components to decompress/filter.
                s->img_n = 1;
                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
-               // if SCAN_header, have to scan to see if we have a tRNS
             }
+            // even with SCAN_header, have to scan to see if we have a tRNS
             break;
          }
 
@@ -4499,10 +5157,14 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
                has_trans = 1;
+               // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
+               if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
                if (z->depth == 16) {
-                  for (k = 0; k < s->img_n; ++k) tc16[k] = stbi__get16be(s); // copy the values as-is
+                  for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning
+                     tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
                } else {
-                  for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+                  for (k = 0; k < s->img_n && k < 3; ++k)
+                     tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
                }
             }
             break;
@@ -4511,7 +5173,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
          case STBI__PNG_TYPE('I','D','A','T'): {
             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
-            if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
+            if (scan == STBI__SCAN_header) {
+               // header scan definitely stops at first IDAT
+               if (pal_img_n)
+                  s->img_n = pal_img_n;
+               return 1;
+            }
+            if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
             if ((int)(ioff + c.length) < (int)ioff) return 0;
             if (ioff + c.length > idata_limit) {
                stbi__uint32 idata_limit_old = idata_limit;
@@ -4560,8 +5228,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
                if (req_comp >= 3) s->img_out_n = req_comp;
                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
                   return 0;
+            } else if (has_trans) {
+               // non-paletted image with tRNS -> source image has (constant) alpha
+               ++s->img_n;
             }
             STBI_FREE(z->expanded); z->expanded = NULL;
+            // end of PNG chunk, read and skip CRC
+            stbi__get32be(s);
             return 1;
          }
 
@@ -4587,20 +5260,24 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
    }
 }
 
-static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp)
+static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
 {
-   unsigned char *result=NULL;
+   void *result=NULL;
    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
-      if (p->depth == 16) {
-         if (!stbi__reduce_png(p)) {
-            return result;
-         }
-      }
+      if (p->depth <= 8)
+         ri->bits_per_channel = 8;
+      else if (p->depth == 16)
+         ri->bits_per_channel = 16;
+      else
+         return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
       result = p->out;
       p->out = NULL;
       if (req_comp && req_comp != p->s->img_out_n) {
-         result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         if (ri->bits_per_channel == 8)
+            result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         else
+            result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
          p->s->img_out_n = req_comp;
          if (result == NULL) return result;
       }
@@ -4615,11 +5292,11 @@ static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req
    return result;
 }
 
-static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi__png p;
    p.s = s;
-   return stbi__do_png(&p, x,y,comp,req_comp);
+   return stbi__do_png(&p, x,y,comp,req_comp, ri);
 }
 
 static int stbi__png_test(stbi__context *s)
@@ -4642,11 +5319,24 @@ static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
    return 1;
 }
 
-static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
+static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   stbi__png p;
+   p.s = s;
+   return stbi__png_info_raw(&p, x, y, comp);
+}
+
+static int stbi__png_is16(stbi__context *s)
 {
    stbi__png p;
    p.s = s;
-   return stbi__png_info_raw(&p, x, y, comp);
+   if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
+	   return 0;
+   if (p.depth != 16) {
+      stbi__rewind(p.s);
+      return 0;
+   }
+   return 1;
 }
 #endif
 
@@ -4681,11 +5371,11 @@ static int stbi__high_bit(unsigned int z)
 {
    int n=0;
    if (z == 0) return -1;
-   if (z >= 0x10000) n += 16, z >>= 16;
-   if (z >= 0x00100) n +=  8, z >>=  8;
-   if (z >= 0x00010) n +=  4, z >>=  4;
-   if (z >= 0x00004) n +=  2, z >>=  2;
-   if (z >= 0x00002) n +=  1, z >>=  1;
+   if (z >= 0x10000) { n += 16; z >>= 16; }
+   if (z >= 0x00100) { n +=  8; z >>=  8; }
+   if (z >= 0x00010) { n +=  4; z >>=  4; }
+   if (z >= 0x00004) { n +=  2; z >>=  2; }
+   if (z >= 0x00002) { n +=  1;/* >>=  1;*/ }
    return n;
 }
 
@@ -4699,29 +5389,62 @@ static int stbi__bitcount(unsigned int a)
    return a & 0xff;
 }
 
-static int stbi__shiftsigned(int v, int shift, int bits)
-{
-   int result;
-   int z=0;
-
-   if (shift < 0) v <<= -shift;
-   else v >>= shift;
-   result = v;
-
-   z = bits;
-   while (z < 8) {
-      result += v >> z;
-      z += bits;
-   }
-   return result;
+// extract an arbitrarily-aligned N-bit value (N=bits)
+// from v, and then make it 8-bits long and fractionally
+// extend it to full full range.
+static int stbi__shiftsigned(unsigned int v, int shift, int bits)
+{
+   static unsigned int mul_table[9] = {
+      0,
+      0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
+      0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
+   };
+   static unsigned int shift_table[9] = {
+      0, 0,0,1,0,2,4,6,0,
+   };
+   if (shift < 0)
+      v <<= -shift;
+   else
+      v >>= shift;
+   STBI_ASSERT(v < 256);
+   v >>= (8-bits);
+   STBI_ASSERT(bits >= 0 && bits <= 8);
+   return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
 }
 
 typedef struct
 {
    int bpp, offset, hsz;
    unsigned int mr,mg,mb,ma, all_a;
+   int extra_read;
 } stbi__bmp_data;
 
+static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
+{
+   // BI_BITFIELDS specifies masks explicitly, don't override
+   if (compress == 3)
+      return 1;
+
+   if (compress == 0) {
+      if (info->bpp == 16) {
+         info->mr = 31u << 10;
+         info->mg = 31u <<  5;
+         info->mb = 31u <<  0;
+      } else if (info->bpp == 32) {
+         info->mr = 0xffu << 16;
+         info->mg = 0xffu <<  8;
+         info->mb = 0xffu <<  0;
+         info->ma = 0xffu << 24;
+         info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+      } else {
+         // otherwise, use defaults, which is all-0
+         info->mr = info->mg = info->mb = info->ma = 0;
+      }
+      return 1;
+   }
+   return 0; // error
+}
+
 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
 {
    int hsz;
@@ -4732,7 +5455,10 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
    info->offset = stbi__get32le(s);
    info->hsz = hsz = stbi__get32le(s);
    info->mr = info->mg = info->mb = info->ma = 0;
-   
+   info->extra_read = 14;
+
+   if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
+
    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
    if (hsz == 12) {
       s->img_x = stbi__get16le(s);
@@ -4743,10 +5469,11 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
    }
    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
    info->bpp = stbi__get16le(s);
-   if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
    if (hsz != 12) {
       int compress = stbi__get32le(s);
       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+      if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
+      if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
       stbi__get32le(s); // discard sizeof
       stbi__get32le(s); // discard hres
       stbi__get32le(s); // discard vres
@@ -4761,21 +5488,12 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
          }
          if (info->bpp == 16 || info->bpp == 32) {
             if (compress == 0) {
-               if (info->bpp == 32) {
-                  info->mr = 0xffu << 16;
-                  info->mg = 0xffu <<  8;
-                  info->mb = 0xffu <<  0;
-                  info->ma = 0xffu << 24;
-                  info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
-               } else {
-                  info->mr = 31u << 10;
-                  info->mg = 31u <<  5;
-                  info->mb = 31u <<  0;
-               }
+               stbi__bmp_set_mask_defaults(info, compress);
             } else if (compress == 3) {
                info->mr = stbi__get32le(s);
                info->mg = stbi__get32le(s);
                info->mb = stbi__get32le(s);
+               info->extra_read += 12;
                // not documented, but generated by photoshop and handled by mspaint
                if (info->mr == info->mg && info->mg == info->mb) {
                   // ?!?!?
@@ -4785,6 +5503,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
                return stbi__errpuc("bad BMP", "bad BMP");
          }
       } else {
+         // V4/V5 header
          int i;
          if (hsz != 108 && hsz != 124)
             return stbi__errpuc("bad BMP", "bad BMP");
@@ -4792,6 +5511,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
          info->mg = stbi__get32le(s);
          info->mb = stbi__get32le(s);
          info->ma = stbi__get32le(s);
+         if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
+            stbi__bmp_set_mask_defaults(info, compress);
          stbi__get32le(s); // discard color space
          for (i=0; i < 12; ++i)
             stbi__get32le(s); // discard color space parameters
@@ -4807,7 +5528,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
 }
 
 
-static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *out;
    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
@@ -4815,14 +5536,18 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
    int psize=0,i,j,width;
    int flip_vertically, pad, target;
    stbi__bmp_data info;
+   STBI_NOTUSED(ri);
 
-   info.all_a = 255;   
+   info.all_a = 255;
    if (stbi__bmp_parse_header(s, &info) == NULL)
       return NULL; // error code already set
 
    flip_vertically = ((int) s->img_y) > 0;
    s->img_y = abs((int) s->img_y);
 
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
    mr = info.mr;
    mg = info.mg;
    mb = info.mb;
@@ -4831,19 +5556,45 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
 
    if (info.hsz == 12) {
       if (info.bpp < 24)
-         psize = (info.offset - 14 - 24) / 3;
+         psize = (info.offset - info.extra_read - 24) / 3;
    } else {
       if (info.bpp < 16)
-         psize = (info.offset - 14 - info.hsz) >> 2;
+         psize = (info.offset - info.extra_read - info.hsz) >> 2;
+   }
+   if (psize == 0) {
+      // accept some number of extra bytes after the header, but if the offset points either to before
+      // the header ends or implies a large amount of extra data, reject the file as malformed
+      int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
+      int header_limit = 1024; // max we actually read is below 256 bytes currently.
+      int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
+      if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
+         return stbi__errpuc("bad header", "Corrupt BMP");
+      }
+      // we established that bytes_read_so_far is positive and sensible.
+      // the first half of this test rejects offsets that are either too small positives, or
+      // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
+      // ensures the number computed in the second half of the test can't overflow.
+      if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
+         return stbi__errpuc("bad offset", "Corrupt BMP");
+      } else {
+         stbi__skip(s, info.offset - bytes_read_so_far);
+      }
    }
 
-   s->img_n = ma ? 4 : 3;
+   if (info.bpp == 24 && ma == 0xff000000)
+      s->img_n = 3;
+   else
+      s->img_n = ma ? 4 : 3;
    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
       target = req_comp;
    else
       target = s->img_n; // if they want monochrome, we'll post-convert
 
-   out = (stbi_uc *) stbi__malloc(target * s->img_x * s->img_y);
+   // sanity-check size
+   if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+      return stbi__errpuc("too large", "Corrupt BMP");
+
+   out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    if (info.bpp < 16) {
       int z=0;
@@ -4855,36 +5606,56 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
          if (info.hsz != 12) stbi__get8(s);
          pal[i][3] = 255;
       }
-      stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
-      if (info.bpp == 4) width = (s->img_x + 1) >> 1;
+      stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+      if (info.bpp == 1) width = (s->img_x + 7) >> 3;
+      else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
       else if (info.bpp == 8) width = s->img_x;
       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
       pad = (-width)&3;
-      for (j=0; j < (int) s->img_y; ++j) {
-         for (i=0; i < (int) s->img_x; i += 2) {
-            int v=stbi__get8(s),v2=0;
-            if (info.bpp == 4) {
-               v2 = v & 15;
-               v >>= 4;
+      if (info.bpp == 1) {
+         for (j=0; j < (int) s->img_y; ++j) {
+            int bit_offset = 7, v = stbi__get8(s);
+            for (i=0; i < (int) s->img_x; ++i) {
+               int color = (v>>bit_offset)&0x1;
+               out[z++] = pal[color][0];
+               out[z++] = pal[color][1];
+               out[z++] = pal[color][2];
+               if (target == 4) out[z++] = 255;
+               if (i+1 == (int) s->img_x) break;
+               if((--bit_offset) < 0) {
+                  bit_offset = 7;
+                  v = stbi__get8(s);
+               }
             }
-            out[z++] = pal[v][0];
-            out[z++] = pal[v][1];
-            out[z++] = pal[v][2];
-            if (target == 4) out[z++] = 255;
-            if (i+1 == (int) s->img_x) break;
-            v = (info.bpp == 8) ? stbi__get8(s) : v2;
-            out[z++] = pal[v][0];
-            out[z++] = pal[v][1];
-            out[z++] = pal[v][2];
-            if (target == 4) out[z++] = 255;
+            stbi__skip(s, pad);
+         }
+      } else {
+         for (j=0; j < (int) s->img_y; ++j) {
+            for (i=0; i < (int) s->img_x; i += 2) {
+               int v=stbi__get8(s),v2=0;
+               if (info.bpp == 4) {
+                  v2 = v & 15;
+                  v >>= 4;
+               }
+               out[z++] = pal[v][0];
+               out[z++] = pal[v][1];
+               out[z++] = pal[v][2];
+               if (target == 4) out[z++] = 255;
+               if (i+1 == (int) s->img_x) break;
+               v = (info.bpp == 8) ? stbi__get8(s) : v2;
+               out[z++] = pal[v][0];
+               out[z++] = pal[v][1];
+               out[z++] = pal[v][2];
+               if (target == 4) out[z++] = 255;
+            }
+            stbi__skip(s, pad);
          }
-         stbi__skip(s, pad);
       }
    } else {
       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
       int z = 0;
       int easy=0;
-      stbi__skip(s, info.offset - 14 - info.hsz);
+      stbi__skip(s, info.offset - info.extra_read - info.hsz);
       if (info.bpp == 24) width = 3 * s->img_x;
       else if (info.bpp == 16) width = 2*s->img_x;
       else /* bpp = 32 and pad = 0 */ width=0;
@@ -4902,6 +5673,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
+         if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
       }
       for (j=0; j < (int) s->img_y; ++j) {
          if (easy) {
@@ -4919,7 +5691,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
             int bpp = info.bpp;
             for (i=0; i < (int) s->img_x; ++i) {
                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
-               int a;
+               unsigned int a;
                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
@@ -4931,7 +5703,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
          stbi__skip(s, pad);
       }
    }
-   
+
    // if alpha channel is all 0s, replace with all 255s
    if (target == 4 && all_a == 0)
       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
@@ -4943,7 +5715,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
          stbi_uc *p1 = out +      j     *s->img_x*target;
          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
          for (i=0; i < (int) s->img_x*target; ++i) {
-            t = p1[i], p1[i] = p2[i], p2[i] = t;
+            t = p1[i]; p1[i] = p2[i]; p2[i] = t;
          }
       }
    }
@@ -4967,14 +5739,14 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
 {
    // only RGB or RGBA (incl. 16bit) or grey allowed
-   if(is_rgb16) *is_rgb16 = 0;
+   if (is_rgb16) *is_rgb16 = 0;
    switch(bits_per_pixel) {
       case 8:  return STBI_grey;
       case 16: if(is_grey) return STBI_grey_alpha;
-            // else: fall-through
+               // fallthrough
       case 15: if(is_rgb16) *is_rgb16 = 1;
-            return STBI_rgb;
-      case 24: // fall-through
+               return STBI_rgb;
+      case 24: // fallthrough
       case 32: return bits_per_pixel/8;
       default: return 0;
    }
@@ -5077,18 +5849,18 @@ static int stbi__tga_test(stbi__context *s)
 }
 
 // read 16bit value and convert to 24bit RGB
-void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
+static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
 {
-   stbi__uint16 px = stbi__get16le(s);
+   stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
    stbi__uint16 fiveBitMask = 31;
    // we have 3 channels with 5bits each
    int r = (px >> 10) & fiveBitMask;
    int g = (px >> 5) & fiveBitMask;
    int b = px & fiveBitMask;
    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
-   out[0] = (r * 255)/31;
-   out[1] = (g * 255)/31;
-   out[2] = (b * 255)/31;
+   out[0] = (stbi_uc)((r * 255)/31);
+   out[1] = (stbi_uc)((g * 255)/31);
+   out[2] = (stbi_uc)((b * 255)/31);
 
    // some people claim that the most significant bit might be used for alpha
    // (possibly if an alpha-bit is set in the "image descriptor byte")
@@ -5096,7 +5868,7 @@ void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
 }
 
-static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    //   read in the TGA header stuff
    int tga_offset = stbi__get8(s);
@@ -5118,10 +5890,16 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
    unsigned char *tga_data;
    unsigned char *tga_palette = NULL;
    int i, j;
-   unsigned char raw_data[4];
+   unsigned char raw_data[4] = {0};
    int RLE_count = 0;
    int RLE_repeating = 0;
    int read_next_pixel = 1;
+   STBI_NOTUSED(ri);
+   STBI_NOTUSED(tga_x_origin); // @TODO
+   STBI_NOTUSED(tga_y_origin); // @TODO
+
+   if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
 
    //   do a tiny bit of precessing
    if ( tga_image_type >= 8 )
@@ -5143,7 +5921,10 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
    *y = tga_height;
    if (comp) *comp = tga_comp;
 
-   tga_data = (unsigned char*)stbi__malloc( (size_t)tga_width * tga_height * tga_comp );
+   if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
+      return stbi__errpuc("too large", "Corrupt TGA");
+
+   tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
 
    // skip to the data's starting position (offset usually = 0)
@@ -5159,10 +5940,15 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
       //   do I need to load a palette?
       if ( tga_indexed)
       {
+         if (tga_palette_len == 0) {  /* you have to have at least one entry! */
+            STBI_FREE(tga_data);
+            return stbi__errpuc("bad palette", "Corrupt TGA");
+         }
+
          //   any data to skip? (offset usually = 0)
          stbi__skip(s, tga_palette_start );
          //   load the palette
-         tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_comp );
+         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
          if (!tga_palette) {
             STBI_FREE(tga_data);
             return stbi__errpuc("outofmem", "Out of memory");
@@ -5282,6 +6068,7 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
    //   Microsoft's C compilers happy... [8^(
    tga_palette_start = tga_palette_len = tga_palette_bits =
          tga_x_origin = tga_y_origin = 0;
+   STBI_NOTUSED(tga_palette_start);
    //   OK, done
    return tga_data;
 }
@@ -5298,14 +6085,53 @@ static int stbi__psd_test(stbi__context *s)
    return r;
 }
 
-static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
+{
+   int count, nleft, len;
+
+   count = 0;
+   while ((nleft = pixelCount - count) > 0) {
+      len = stbi__get8(s);
+      if (len == 128) {
+         // No-op.
+      } else if (len < 128) {
+         // Copy next len+1 bytes literally.
+         len++;
+         if (len > nleft) return 0; // corrupt data
+         count += len;
+         while (len) {
+            *p = stbi__get8(s);
+            p += 4;
+            len--;
+         }
+      } else if (len > 128) {
+         stbi_uc   val;
+         // Next -len+1 bytes in the dest are replicated from next source byte.
+         // (Interpret len as a negative 8-bit int.)
+         len = 257 - len;
+         if (len > nleft) return 0; // corrupt data
+         val = stbi__get8(s);
+         count += len;
+         while (len) {
+            *p = val;
+            p += 4;
+            len--;
+         }
+      }
+   }
+
+   return 1;
+}
+
+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
 {
-   int   pixelCount;
+   int pixelCount;
    int channelCount, compression;
-   int channel, i, count, len;
+   int channel, i;
    int bitdepth;
    int w,h;
    stbi_uc *out;
+   STBI_NOTUSED(ri);
 
    // Check identifier
    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
@@ -5327,6 +6153,9 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
    h = stbi__get32be(s);
    w = stbi__get32be(s);
 
+   if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
    // Make sure the depth is 8 bits.
    bitdepth = stbi__get16be(s);
    if (bitdepth != 8 && bitdepth != 16)
@@ -5362,8 +6191,18 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
    if (compression > 1)
       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
 
+   // Check size
+   if (!stbi__mad3sizes_valid(4, w, h, 0))
+      return stbi__errpuc("too large", "Corrupt PSD");
+
    // Create the destination image.
-   out = (stbi_uc *) stbi__malloc(4 * w*h);
+
+   if (!compression && bitdepth == 16 && bpc == 16) {
+      out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
+      ri->bits_per_channel = 16;
+   } else
+      out = (stbi_uc *) stbi__malloc(4 * w*h);
+
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    pixelCount = w*h;
 
@@ -5380,7 +6219,7 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
       //     Else if n is 128, noop.
       // Endloop
 
-      // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
+      // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
       // which we're going to just skip.
       stbi__skip(s, h * channelCount * 2 );
 
@@ -5395,82 +6234,86 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
                *p = (channel == 3 ? 255 : 0);
          } else {
             // Read the RLE data.
-            count = 0;
-            while (count < pixelCount) {
-               len = stbi__get8(s);
-               if (len == 128) {
-                  // No-op.
-               } else if (len < 128) {
-                  // Copy next len+1 bytes literally.
-                  len++;
-                  count += len;
-                  while (len) {
-                     *p = stbi__get8(s);
-                     p += 4;
-                     len--;
-                  }
-               } else if (len > 128) {
-                  stbi_uc   val;
-                  // Next -len+1 bytes in the dest are replicated from next source byte.
-                  // (Interpret len as a negative 8-bit int.)
-                  len ^= 0x0FF;
-                  len += 2;
-                  val = stbi__get8(s);
-                  count += len;
-                  while (len) {
-                     *p = val;
-                     p += 4;
-                     len--;
-                  }
-               }
+            if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+               STBI_FREE(out);
+               return stbi__errpuc("corrupt", "bad RLE data");
             }
          }
       }
 
    } else {
       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
-      // where each channel consists of an 8-bit value for each pixel in the image.
+      // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
 
       // Read the data by channel.
       for (channel = 0; channel < 4; channel++) {
-         stbi_uc *p;
-
-         p = out + channel;
          if (channel >= channelCount) {
             // Fill this channel with default data.
-            stbi_uc val = channel == 3 ? 255 : 0;
-            for (i = 0; i < pixelCount; i++, p += 4)
-               *p = val;
-         } else {
-            // Read the data.
-            if (bitdepth == 16) {
-               for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = (stbi_uc) (stbi__get16be(s) >> 8);
+            if (bitdepth == 16 && bpc == 16) {
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               stbi__uint16 val = channel == 3 ? 65535 : 0;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = val;
             } else {
+               stbi_uc *p = out+channel;
+               stbi_uc val = channel == 3 ? 255 : 0;
                for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = stbi__get8(s);
+                  *p = val;
+            }
+         } else {
+            if (ri->bits_per_channel == 16) {    // output bpc
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = (stbi__uint16) stbi__get16be(s);
+            } else {
+               stbi_uc *p = out+channel;
+               if (bitdepth == 16) {  // input bpc
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = (stbi_uc) (stbi__get16be(s) >> 8);
+               } else {
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = stbi__get8(s);
+               }
             }
          }
       }
    }
 
+   // remove weird white matte from PSD
    if (channelCount >= 4) {
-      for (i=0; i < w*h; ++i) {
-         unsigned char *pixel = out + 4*i;
-         if (pixel[3] != 0 && pixel[3] != 255) {
-            // remove weird white matte from PSD
-            float a = pixel[3] / 255.0f;
-            float ra = 1.0f / a;
-            float inv_a = 255.0f * (1 - ra);
-            pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
-            pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
-            pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+      if (ri->bits_per_channel == 16) {
+         for (i=0; i < w*h; ++i) {
+            stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 65535) {
+               float a = pixel[3] / 65535.0f;
+               float ra = 1.0f / a;
+               float inv_a = 65535.0f * (1 - ra);
+               pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
+               pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
+               pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
+            }
+         }
+      } else {
+         for (i=0; i < w*h; ++i) {
+            unsigned char *pixel = out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 255) {
+               float a = pixel[3] / 255.0f;
+               float ra = 1.0f / a;
+               float inv_a = 255.0f * (1 - ra);
+               pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
+               pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
+               pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+            }
          }
       }
    }
 
+   // convert to desired output format
    if (req_comp && req_comp != 4) {
-      out = stbi__convert_format(out, 4, req_comp, w, h);
+      if (ri->bits_per_channel == 16)
+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
+      else
+         out = stbi__convert_format(out, 4, req_comp, w, h);
       if (out == NULL) return out; // stbi__convert_format frees input on failure
    }
 
@@ -5654,25 +6497,33 @@ static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *c
    return result;
 }
 
-static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp)
+static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
 {
    stbi_uc *result;
-   int i, x,y;
+   int i, x,y, internal_comp;
+   STBI_NOTUSED(ri);
+
+   if (!comp) comp = &internal_comp;
 
    for (i=0; i<92; ++i)
       stbi__get8(s);
 
    x = stbi__get16be(s);
    y = stbi__get16be(s);
+
+   if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
-   if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode");
+   if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
 
    stbi__get32be(s); //skip `ratio'
    stbi__get16be(s); //skip `fields'
    stbi__get16be(s); //skip `pad'
 
    // intermediate buffer is RGBA
-   result = (stbi_uc *) stbi__malloc(x*y*4);
+   result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
+   if (!result) return stbi__errpuc("outofmem", "Out of memory");
    memset(result, 0xff, x*y*4);
 
    if (!stbi__pic_load_core(s,x,y,comp, result)) {
@@ -5709,11 +6560,13 @@ typedef struct
 typedef struct
 {
    int w,h;
-   stbi_uc *out, *old_out;             // output buffer (always 4 components)
-   int flags, bgindex, ratio, transparent, eflags, delay;
+   stbi_uc *out;                 // output buffer (always 4 components)
+   stbi_uc *background;          // The current "background" as far as a gif is concerned
+   stbi_uc *history;
+   int flags, bgindex, ratio, transparent, eflags;
    stbi_uc  pal[256][4];
    stbi_uc lpal[256][4];
-   stbi__gif_lzw codes[4096];
+   stbi__gif_lzw codes[8192];
    stbi_uc *color_table;
    int parse, step;
    int lflags;
@@ -5721,6 +6574,7 @@ typedef struct
    int max_x, max_y;
    int cur_x, cur_y;
    int line_size;
+   int delay;
 } stbi__gif;
 
 static int stbi__gif_test_raw(stbi__context *s)
@@ -5769,6 +6623,9 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
    g->ratio = stbi__get8(s);
    g->transparent = -1;
 
+   if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+   if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+
    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
 
    if (is_info) return 1;
@@ -5782,6 +6639,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
 {
    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
+   if (!g) return stbi__err("outofmem", "Out of memory");
    if (!stbi__gif_header(s, g, comp, 1)) {
       STBI_FREE(g);
       stbi__rewind( s );
@@ -5796,6 +6654,7 @@ static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
 {
    stbi_uc *p, *c;
+   int idx;
 
    // recurse to decode the prefixes, since the linked-list is backwards,
    // and working backwards through an interleaved image would be nasty
@@ -5804,10 +6663,12 @@ static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
 
    if (g->cur_y >= g->max_y) return;
 
-   p = &g->out[g->cur_x + g->cur_y];
-   c = &g->color_table[g->codes[code].suffix * 4];
+   idx = g->cur_x + g->cur_y;
+   p = &g->out[idx];
+   g->history[idx / 4] = 1;
 
-   if (c[3] >= 128) {
+   c = &g->color_table[g->codes[code].suffix * 4];
+   if (c[3] > 128) { // don't render transparent pixels;
       p[0] = c[2];
       p[1] = c[1];
       p[2] = c[0];
@@ -5881,11 +6742,16 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
                stbi__skip(s,len);
             return g->out;
          } else if (code <= avail) {
-            if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
+            if (first) {
+               return stbi__errpuc("no clear code", "Corrupt GIF");
+            }
 
             if (oldcode >= 0) {
                p = &g->codes[avail++];
-               if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
+               if (avail > 8192) {
+                  return stbi__errpuc("too many codes", "Corrupt GIF");
+               }
+
                p->prefix = (stbi__int16) oldcode;
                p->first = g->codes[oldcode].first;
                p->suffix = (code == avail) ? p->first : g->codes[code].first;
@@ -5907,59 +6773,77 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
    }
 }
 
-static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
-{
-   int x, y;
-   stbi_uc *c = g->pal[g->bgindex];
-   for (y = y0; y < y1; y += 4 * g->w) {
-      for (x = x0; x < x1; x += 4) {
-         stbi_uc *p  = &g->out[y + x];
-         p[0] = c[2];
-         p[1] = c[1];
-         p[2] = c[0];
-         p[3] = 0;
-      }
-   }
-}
-
 // this function is designed to support animated gifs, although stb_image doesn't support it
-static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
+// two back is the image from two frames ago, used for a very specific disposal format
+static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
 {
-   int i;
-   stbi_uc *prev_out = 0;
+   int dispose;
+   int first_frame;
+   int pi;
+   int pcount;
+   STBI_NOTUSED(req_comp);
 
-   if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
-      return 0; // stbi__g_failure_reason set by stbi__gif_header
+   // on first frame, any non-written pixels get the background colour (non-transparent)
+   first_frame = 0;
+   if (g->out == 0) {
+      if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
+      if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
+         return stbi__errpuc("too large", "GIF image is too large");
+      pcount = g->w * g->h;
+      g->out = (stbi_uc *) stbi__malloc(4 * pcount);
+      g->background = (stbi_uc *) stbi__malloc(4 * pcount);
+      g->history = (stbi_uc *) stbi__malloc(pcount);
+      if (!g->out || !g->background || !g->history)
+         return stbi__errpuc("outofmem", "Out of memory");
+
+      // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
+      // background colour is only used for pixels that are not rendered first frame, after that "background"
+      // color refers to the color that was there the previous frame.
+      memset(g->out, 0x00, 4 * pcount);
+      memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
+      memset(g->history, 0x00, pcount);        // pixels that were affected previous frame
+      first_frame = 1;
+   } else {
+      // second frame - how do we dispose of the previous one?
+      dispose = (g->eflags & 0x1C) >> 2;
+      pcount = g->w * g->h;
 
-   prev_out = g->out;
-   g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
-   if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
+      if ((dispose == 3) && (two_back == 0)) {
+         dispose = 2; // if I don't have an image to revert back to, default to the old background
+      }
 
-   switch ((g->eflags & 0x1C) >> 2) {
-      case 0: // unspecified (also always used on 1st frame)
-         stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
-         break;
-      case 1: // do not dispose
-         if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
-         g->old_out = prev_out;
-         break;
-      case 2: // dispose to background
-         if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
-         stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
-         break;
-      case 3: // dispose to previous
-         if (g->old_out) {
-            for (i = g->start_y; i < g->max_y; i += 4 * g->w)
-               memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
+      if (dispose == 3) { // use previous graphic
+         for (pi = 0; pi < pcount; ++pi) {
+            if (g->history[pi]) {
+               memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
+            }
          }
-         break;
+      } else if (dispose == 2) {
+         // restore what was changed last frame to background before that frame;
+         for (pi = 0; pi < pcount; ++pi) {
+            if (g->history[pi]) {
+               memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
+            }
+         }
+      } else {
+         // This is a non-disposal case eithe way, so just
+         // leave the pixels as is, and they will become the new background
+         // 1: do not dispose
+         // 0:  not specified.
+      }
+
+      // background is what out is after the undoing of the previou frame;
+      memcpy( g->background, g->out, 4 * g->w * g->h );
    }
 
+   // clear my history;
+   memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
+
    for (;;) {
-      switch (stbi__get8(s)) {
+      int tag = stbi__get8(s);
+      switch (tag) {
          case 0x2C: /* Image Descriptor */
          {
-            int prev_trans = -1;
             stbi__int32 x, y, w, h;
             stbi_uc *o;
 
@@ -5978,6 +6862,13 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
             g->cur_x   = g->start_x;
             g->cur_y   = g->start_y;
 
+            // if the width of the specified rectangle is 0, that means
+            // we may not see *any* pixels or the image is malformed;
+            // to make sure this is caught, move the current y down to
+            // max_y (which is what out_gif_code checks).
+            if (w == 0)
+               g->cur_y = g->max_y;
+
             g->lflags = stbi__get8(s);
 
             if (g->lflags & 0x40) {
@@ -5992,19 +6883,24 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
                g->color_table = (stbi_uc *) g->lpal;
             } else if (g->flags & 0x80) {
-               if (g->transparent >= 0 && (g->eflags & 0x01)) {
-                  prev_trans = g->pal[g->transparent][3];
-                  g->pal[g->transparent][3] = 0;
-               }
                g->color_table = (stbi_uc *) g->pal;
             } else
                return stbi__errpuc("missing color table", "Corrupt GIF");
 
             o = stbi__process_gif_raster(s, g);
-            if (o == NULL) return NULL;
-
-            if (prev_trans != -1)
-               g->pal[g->transparent][3] = (stbi_uc) prev_trans;
+            if (!o) return NULL;
+
+            // if this was the first frame,
+            pcount = g->w * g->h;
+            if (first_frame && (g->bgindex > 0)) {
+               // if first frame, any pixel not drawn to gets the background color
+               for (pi = 0; pi < pcount; ++pi) {
+                  if (g->history[pi] == 0) {
+                     g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
+                     memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
+                  }
+               }
+            }
 
             return o;
          }
@@ -6012,19 +6908,35 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
          case 0x21: // Comment Extension.
          {
             int len;
-            if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
+            int ext = stbi__get8(s);
+            if (ext == 0xF9) { // Graphic Control Extension.
                len = stbi__get8(s);
                if (len == 4) {
                   g->eflags = stbi__get8(s);
-                  g->delay = stbi__get16le(s);
-                  g->transparent = stbi__get8(s);
+                  g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
+
+                  // unset old transparent
+                  if (g->transparent >= 0) {
+                     g->pal[g->transparent][3] = 255;
+                  }
+                  if (g->eflags & 0x01) {
+                     g->transparent = stbi__get8(s);
+                     if (g->transparent >= 0) {
+                        g->pal[g->transparent][3] = 0;
+                     }
+                  } else {
+                     // don't need transparent
+                     stbi__skip(s, 1);
+                     g->transparent = -1;
+                  }
                } else {
                   stbi__skip(s, len);
                   break;
                }
             }
-            while ((len = stbi__get8(s)) != 0)
+            while ((len = stbi__get8(s)) != 0) {
                stbi__skip(s, len);
+            }
             break;
          }
 
@@ -6035,27 +6947,130 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
             return stbi__errpuc("unknown code", "Corrupt GIF");
       }
    }
+}
 
-   STBI_NOTUSED(req_comp);
+static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
+{
+   STBI_FREE(g->out);
+   STBI_FREE(g->history);
+   STBI_FREE(g->background);
+
+   if (out) STBI_FREE(out);
+   if (delays && *delays) STBI_FREE(*delays);
+   return stbi__errpuc("outofmem", "Out of memory");
 }
 
-static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+   if (stbi__gif_test(s)) {
+      int layers = 0;
+      stbi_uc *u = 0;
+      stbi_uc *out = 0;
+      stbi_uc *two_back = 0;
+      stbi__gif g;
+      int stride;
+      int out_size = 0;
+      int delays_size = 0;
+
+      STBI_NOTUSED(out_size);
+      STBI_NOTUSED(delays_size);
+
+      memset(&g, 0, sizeof(g));
+      if (delays) {
+         *delays = 0;
+      }
+
+      do {
+         u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
+         if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
+
+         if (u) {
+            *x = g.w;
+            *y = g.h;
+            ++layers;
+            stride = g.w * g.h * 4;
+
+            if (out) {
+               void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
+               if (!tmp)
+                  return stbi__load_gif_main_outofmem(&g, out, delays);
+               else {
+                   out = (stbi_uc*) tmp;
+                   out_size = layers * stride;
+               }
+
+               if (delays) {
+                  int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
+                  if (!new_delays)
+                     return stbi__load_gif_main_outofmem(&g, out, delays);
+                  *delays = new_delays;
+                  delays_size = layers * sizeof(int);
+               }
+            } else {
+               out = (stbi_uc*)stbi__malloc( layers * stride );
+               if (!out)
+                  return stbi__load_gif_main_outofmem(&g, out, delays);
+               out_size = layers * stride;
+               if (delays) {
+                  *delays = (int*) stbi__malloc( layers * sizeof(int) );
+                  if (!*delays)
+                     return stbi__load_gif_main_outofmem(&g, out, delays);
+                  delays_size = layers * sizeof(int);
+               }
+            }
+            memcpy( out + ((layers - 1) * stride), u, stride );
+            if (layers >= 2) {
+               two_back = out - 2 * stride;
+            }
+
+            if (delays) {
+               (*delays)[layers - 1U] = g.delay;
+            }
+         }
+      } while (u != 0);
+
+      // free temp buffer;
+      STBI_FREE(g.out);
+      STBI_FREE(g.history);
+      STBI_FREE(g.background);
+
+      // do the final conversion after loading everything;
+      if (req_comp && req_comp != 4)
+         out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
+
+      *z = layers;
+      return out;
+   } else {
+      return stbi__errpuc("not GIF", "Image was not as a gif type.");
+   }
+}
+
+static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *u = 0;
-   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
-   memset(g, 0, sizeof(*g));
+   stbi__gif g;
+   memset(&g, 0, sizeof(g));
+   STBI_NOTUSED(ri);
 
-   u = stbi__gif_load_next(s, g, comp, req_comp);
+   u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
    if (u) {
-      *x = g->w;
-      *y = g->h;
+      *x = g.w;
+      *y = g.h;
+
+      // moved conversion to after successful load so that the same
+      // can be done for multiple frames.
       if (req_comp && req_comp != 4)
-         u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
+         u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
+   } else if (g.out) {
+      // if there was an error and we allocated an image buffer, free it!
+      STBI_FREE(g.out);
    }
-   else if (g->out)
-      STBI_FREE(g->out);
-   STBI_FREE(g);
+
+   // free buffers needed for multiple frame loading;
+   STBI_FREE(g.history);
+   STBI_FREE(g.background);
+
    return u;
 }
 
@@ -6069,20 +7084,24 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
 // Radiance RGBE HDR loader
 // originally by Nicolas Schulz
 #ifndef STBI_NO_HDR
-static int stbi__hdr_test_core(stbi__context *s)
+static int stbi__hdr_test_core(stbi__context *s, const char *signature)
 {
-   const char *signature = "#?RADIANCE\n";
    int i;
    for (i=0; signature[i]; ++i)
       if (stbi__get8(s) != signature[i])
-         return 0;
+          return 0;
+   stbi__rewind(s);
    return 1;
 }
 
 static int stbi__hdr_test(stbi__context* s)
 {
-   int r = stbi__hdr_test_core(s);
+   int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
    stbi__rewind(s);
+   if(!r) {
+       r = stbi__hdr_test_core(s, "#?RGBE\n");
+       stbi__rewind(s);
+   }
    return r;
 }
 
@@ -6136,7 +7155,7 @@ static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
    }
 }
 
-static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    char buffer[STBI__HDR_BUFLEN];
    char *token;
@@ -6147,10 +7166,12 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
    int len;
    unsigned char count, value;
    int i, j, k, c1,c2, z;
-
+   const char *headerToken;
+   STBI_NOTUSED(ri);
 
    // Check identifier
-   if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
+   headerToken = stbi__hdr_gettoken(s,buffer);
+   if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
       return stbi__errpf("not HDR", "Corrupt HDR image");
 
    // Parse header
@@ -6173,14 +7194,22 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
    token += 3;
    width = (int) strtol(token, NULL, 10);
 
+   if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
+   if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
+
    *x = width;
    *y = height;
 
    if (comp) *comp = 3;
    if (req_comp == 0) req_comp = 3;
 
+   if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+      return stbi__errpf("too large", "HDR image is too large");
+
    // Read data
-   hdr_data = (float *) stbi__malloc(height * width * req_comp * sizeof(float));
+   hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+   if (!hdr_data)
+      return stbi__errpf("outofmem", "Out of memory");
 
    // Load image data
    // image data is stored as some number of sca
@@ -6219,20 +7248,29 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
          len <<= 8;
          len |= stbi__get8(s);
          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
-         if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4);
+         if (scanline == NULL) {
+            scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
+            if (!scanline) {
+               STBI_FREE(hdr_data);
+               return stbi__errpf("outofmem", "Out of memory");
+            }
+         }
 
          for (k = 0; k < 4; ++k) {
+            int nleft;
             i = 0;
-            while (i < width) {
+            while ((nleft = width - i) > 0) {
                count = stbi__get8(s);
                if (count > 128) {
                   // Run
                   value = stbi__get8(s);
                   count -= 128;
+                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
                   for (z = 0; z < count; ++z)
                      scanline[i++ * 4 + k] = value;
                } else {
                   // Dump
+                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
                   for (z = 0; z < count; ++z)
                      scanline[i++ * 4 + k] = stbi__get8(s);
                }
@@ -6241,7 +7279,8 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
          for (i=0; i < width; ++i)
             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
       }
-      STBI_FREE(scanline);
+      if (scanline)
+         STBI_FREE(scanline);
    }
 
    return hdr_data;
@@ -6252,6 +7291,11 @@ static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
    char buffer[STBI__HDR_BUFLEN];
    char *token;
    int valid = 0;
+   int dummy;
+
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
 
    if (stbi__hdr_test(s) == 0) {
        stbi__rewind( s );
@@ -6293,14 +7337,20 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
    void *p;
    stbi__bmp_data info;
 
-   info.all_a = 255;   
+   info.all_a = 255;
    p = stbi__bmp_parse_header(s, &info);
-   stbi__rewind( s );
-   if (p == NULL)
+   if (p == NULL) {
+      stbi__rewind( s );
       return 0;
-   *x = s->img_x;
-   *y = s->img_y;
-   *comp = info.ma ? 4 : 3;
+   }
+   if (x) *x = s->img_x;
+   if (y) *y = s->img_y;
+   if (comp) {
+      if (info.bpp == 24 && info.ma == 0xff000000)
+         *comp = 3;
+      else
+         *comp = info.ma ? 4 : 3;
+   }
    return 1;
 }
 #endif
@@ -6308,7 +7358,10 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
 #ifndef STBI_NO_PSD
 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
 {
-   int channelCount;
+   int channelCount, dummy, depth;
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
    if (stbi__get32be(s) != 0x38425053) {
        stbi__rewind( s );
        return 0;
@@ -6325,7 +7378,8 @@ static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
    }
    *y = stbi__get32be(s);
    *x = stbi__get32be(s);
-   if (stbi__get16be(s) != 8) {
+   depth = stbi__get16be(s);
+   if (depth != 8 && depth != 16) {
        stbi__rewind( s );
        return 0;
    }
@@ -6336,14 +7390,45 @@ static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
    *comp = 4;
    return 1;
 }
+
+static int stbi__psd_is16(stbi__context *s)
+{
+   int channelCount, depth;
+   if (stbi__get32be(s) != 0x38425053) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 1) {
+       stbi__rewind( s );
+       return 0;
+   }
+   stbi__skip(s, 6);
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   STBI_NOTUSED(stbi__get32be(s));
+   STBI_NOTUSED(stbi__get32be(s));
+   depth = stbi__get16be(s);
+   if (depth != 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   return 1;
+}
 #endif
 
 #ifndef STBI_NO_PIC
 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
 {
-   int act_comp=0,num_packets=0,chained;
+   int act_comp=0,num_packets=0,chained,dummy;
    stbi__pic_packet packets[10];
 
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
       stbi__rewind(s);
       return 0;
@@ -6403,7 +7488,6 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
 // Known limitations:
 //    Does not support comments in the header section
 //    Does not support ASCII image data (formats P2 and P3)
-//    Does not support 16-bit-per-channel
 
 #ifndef STBI_NO_PNM
 
@@ -6419,21 +7503,38 @@ static int      stbi__pnm_test(stbi__context *s)
    return 1;
 }
 
-static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *out;
-   if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
+   STBI_NOTUSED(ri);
+
+   ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
+   if (ri->bits_per_channel == 0)
       return 0;
+
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
    *x = s->img_x;
    *y = s->img_y;
-   *comp = s->img_n;
+   if (comp) *comp = s->img_n;
+
+   if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
+      return stbi__errpuc("too large", "PNM too large");
 
-   out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y);
+   out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
+   if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
+      STBI_FREE(out);
+      return stbi__errpuc("bad PNM", "PNM file truncated");
+   }
 
    if (req_comp && req_comp != s->img_n) {
-      out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+      if (ri->bits_per_channel == 16) {
+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
+      } else {
+         out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+      }
       if (out == NULL) return out; // stbi__convert_format frees input on failure
    }
    return out;
@@ -6470,6 +7571,8 @@ static int      stbi__pnm_getinteger(stbi__context *s, char *c)
    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
       value = value*10 + (*c - '0');
       *c = (char) stbi__get8(s);
+      if((value > 214748364) || (value == 214748364 && *c > '7'))
+          return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
    }
 
    return value;
@@ -6477,16 +7580,20 @@ static int      stbi__pnm_getinteger(stbi__context *s, char *c)
 
 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
 {
-   int maxv;
+   int maxv, dummy;
    char c, p, t;
 
-   stbi__rewind( s );
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
+   stbi__rewind(s);
 
    // Get identifier
    p = (char) stbi__get8(s);
    t = (char) stbi__get8(s);
    if (p != 'P' || (t != '5' && t != '6')) {
-       stbi__rewind( s );
+       stbi__rewind(s);
        return 0;
    }
 
@@ -6496,17 +7603,29 @@ static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
    stbi__pnm_skip_whitespace(s, &c);
 
    *x = stbi__pnm_getinteger(s, &c); // read width
+   if(*x == 0)
+       return stbi__err("invalid width", "PPM image header had zero or overflowing width");
    stbi__pnm_skip_whitespace(s, &c);
 
    *y = stbi__pnm_getinteger(s, &c); // read height
+   if (*y == 0)
+       return stbi__err("invalid width", "PPM image header had zero or overflowing width");
    stbi__pnm_skip_whitespace(s, &c);
 
    maxv = stbi__pnm_getinteger(s, &c);  // read max value
-
-   if (maxv > 255)
-      return stbi__err("max value > 255", "PPM image not 8-bit");
+   if (maxv > 65535)
+      return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
+   else if (maxv > 255)
+      return 16;
    else
-      return 1;
+      return 8;
+}
+
+static int stbi__pnm_is16(stbi__context *s)
+{
+   if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
+	   return 1;
+   return 0;
 }
 #endif
 
@@ -6552,6 +7671,22 @@ static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
 }
 
+static int stbi__is_16_main(stbi__context *s)
+{
+   #ifndef STBI_NO_PNG
+   if (stbi__png_is16(s))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_is16(s))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_is16(s))  return 1;
+   #endif
+   return 0;
+}
+
 #ifndef STBI_NO_STDIO
 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
 {
@@ -6573,6 +7708,27 @@ STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
    fseek(f,pos,SEEK_SET);
    return r;
 }
+
+STBIDEF int stbi_is_16_bit(char const *filename)
+{
+    FILE *f = stbi__fopen(filename, "rb");
+    int result;
+    if (!f) return stbi__err("can't fopen", "Unable to open file");
+    result = stbi_is_16_bit_from_file(f);
+    fclose(f);
+    return result;
+}
+
+STBIDEF int stbi_is_16_bit_from_file(FILE *f)
+{
+   int r;
+   stbi__context s;
+   long pos = ftell(f);
+   stbi__start_file(&s, f);
+   r = stbi__is_16_main(&s);
+   fseek(f,pos,SEEK_SET);
+   return r;
+}
 #endif // !STBI_NO_STDIO
 
 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
@@ -6589,10 +7745,44 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int
    return stbi__info_main(&s,x,y,comp);
 }
 
+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__is_16_main(&s);
+}
+
+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+   return stbi__is_16_main(&s);
+}
+
 #endif // STB_IMAGE_IMPLEMENTATION
 
 /*
    revision history:
+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+      2.19  (2018-02-11) fix warning
+      2.18  (2018-01-30) fix warnings
+      2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
+                         1-bit BMP
+                         *_is_16_bit api
+                         avoid warnings
+      2.16  (2017-07-23) all functions have 16-bit variants;
+                         STBI_NO_STDIO works again;
+                         compilation fixes;
+                         fix rounding in unpremultiply;
+                         optimize vertical flip;
+                         disable raw_len validation;
+                         documentation fixes
+      2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
+                         warning fixes; disable run-time SSE detection on gcc;
+                         uniform handling of optional "return" values;
+                         thread-safe initialization of zlib tables
+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
       2.11  (2016-04-02) allocate large structures on the stack
                          remove white matting for transparent PSD
@@ -6752,4 +7942,47 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int
               on 'test' only check type, not whether we support this variant
       0.50  (2006-11-19)
               first released version
-*/
\ No newline at end of file
+*/
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/zenovis/stbi/include/tinyexr.h b/zenovis/stbi/include/tinyexr.h
deleted file mode 100644
index 20adfeffbb..0000000000
--- a/zenovis/stbi/include/tinyexr.h
+++ /dev/null
@@ -1,13315 +0,0 @@
-/*
-Copyright (c) 2014 - 2019, Syoyo Fujita and many contributors.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Syoyo Fujita nor the
-      names of its contributors may be used to endorse or promote products
-      derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-// TinyEXR contains some OpenEXR code, which is licensed under ------------
-
-///////////////////////////////////////////////////////////////////////////
-//
-// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
-// Digital Ltd. LLC
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// *       Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// *       Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// *       Neither the name of Industrial Light & Magic nor the names of
-// its contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-///////////////////////////////////////////////////////////////////////////
-
-// End of OpenEXR license -------------------------------------------------
-
-#ifndef TINYEXR_H_
-#define TINYEXR_H_
-
-//
-//
-//   Do this:
-//    #define TINYEXR_IMPLEMENTATION
-//   before you include this file in *one* C or C++ file to create the
-//   implementation.
-//
-//   // i.e. it should look like this:
-//   #include ...
-//   #include ...
-//   #include ...
-//   #define TINYEXR_IMPLEMENTATION
-//   #include "tinyexr.h"
-//
-//
-
-#include <stddef.h>  // for size_t
-#include <stdint.h>  // guess stdint.h is available(C99)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Use embedded miniz or not to decode ZIP format pixel. Linking with zlib
-// required if this flas is 0.
-#ifndef TINYEXR_USE_MINIZ
-#define TINYEXR_USE_MINIZ (1)
-#endif
-
-// Disable PIZ comporession when applying cpplint.
-#ifndef TINYEXR_USE_PIZ
-#define TINYEXR_USE_PIZ (1)
-#endif
-
-#ifndef TINYEXR_USE_ZFP
-#define TINYEXR_USE_ZFP (0)  // TinyEXR extension.
-// http://computation.llnl.gov/projects/floating-point-compression
-#endif
-
-#define TINYEXR_SUCCESS (0)
-#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1)
-#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2)
-#define TINYEXR_ERROR_INVALID_ARGUMENT (-3)
-#define TINYEXR_ERROR_INVALID_DATA (-4)
-#define TINYEXR_ERROR_INVALID_FILE (-5)
-#define TINYEXR_ERROR_INVALID_PARAMETER (-6)
-#define TINYEXR_ERROR_CANT_OPEN_FILE (-7)
-#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-8)
-#define TINYEXR_ERROR_INVALID_HEADER (-9)
-#define TINYEXR_ERROR_UNSUPPORTED_FEATURE (-10)
-#define TINYEXR_ERROR_CANT_WRITE_FILE (-11)
-#define TINYEXR_ERROR_SERIALZATION_FAILED (-12)
-
-// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf }
-
-// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2
-#define TINYEXR_PIXELTYPE_UINT (0)
-#define TINYEXR_PIXELTYPE_HALF (1)
-#define TINYEXR_PIXELTYPE_FLOAT (2)
-
-#define TINYEXR_MAX_HEADER_ATTRIBUTES (1024)
-#define TINYEXR_MAX_CUSTOM_ATTRIBUTES (128)
-
-#define TINYEXR_COMPRESSIONTYPE_NONE (0)
-#define TINYEXR_COMPRESSIONTYPE_RLE (1)
-#define TINYEXR_COMPRESSIONTYPE_ZIPS (2)
-#define TINYEXR_COMPRESSIONTYPE_ZIP (3)
-#define TINYEXR_COMPRESSIONTYPE_PIZ (4)
-#define TINYEXR_COMPRESSIONTYPE_ZFP (128)  // TinyEXR extension
-
-#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0)
-#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1)
-#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2)
-
-#define TINYEXR_TILE_ONE_LEVEL (0)
-#define TINYEXR_TILE_MIPMAP_LEVELS (1)
-#define TINYEXR_TILE_RIPMAP_LEVELS (2)
-
-#define TINYEXR_TILE_ROUND_DOWN (0)
-#define TINYEXR_TILE_ROUND_UP (1)
-
-typedef struct _EXRVersion {
-  int version;    // this must be 2
-  int tiled;      // tile format image
-  int long_name;  // long name attribute
-  int non_image;  // deep image(EXR 2.0)
-  int multipart;  // multi-part(EXR 2.0)
-} EXRVersion;
-
-typedef struct _EXRAttribute {
-  char name[256];  // name and type are up to 255 chars long.
-  char type[256];
-  unsigned char *value;  // uint8_t*
-  int size;
-  int pad0;
-} EXRAttribute;
-
-typedef struct _EXRChannelInfo {
-  char name[256];  // less than 255 bytes long
-  int pixel_type;
-  int x_sampling;
-  int y_sampling;
-  unsigned char p_linear;
-  unsigned char pad[3];
-} EXRChannelInfo;
-
-typedef struct _EXRTile {
-  int offset_x;
-  int offset_y;
-  int level_x;
-  int level_y;
-
-  int width;   // actual width in a tile.
-  int height;  // actual height int a tile.
-
-  unsigned char **images;  // image[channels][pixels]
-} EXRTile;
-
-typedef struct _EXRHeader {
-  float pixel_aspect_ratio;
-  int line_order;
-  int data_window[4];
-  int display_window[4];
-  float screen_window_center[2];
-  float screen_window_width;
-
-  int chunk_count;
-
-  // Properties for tiled format(`tiledesc`).
-  int tiled;
-  int tile_size_x;
-  int tile_size_y;
-  int tile_level_mode;
-  int tile_rounding_mode;
-
-  int long_name;
-  int non_image;
-  int multipart;
-  unsigned int header_len;
-
-  // Custom attributes(exludes required attributes(e.g. `channels`,
-  // `compression`, etc)
-  int num_custom_attributes;
-  EXRAttribute *custom_attributes;  // array of EXRAttribute. size =
-                                    // `num_custom_attributes`.
-
-  EXRChannelInfo *channels;  // [num_channels]
-
-  int *pixel_types;  // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for
-  // each channel. This is overwritten with `requested_pixel_types` when
-  // loading.
-  int num_channels;
-
-  int compression_type;        // compression type(TINYEXR_COMPRESSIONTYPE_*)
-  int *requested_pixel_types;  // Filled initially by
-                               // ParseEXRHeaderFrom(Meomory|File), then users
-                               // can edit it(only valid for HALF pixel type
-                               // channel)
-
-} EXRHeader;
-
-typedef struct _EXRMultiPartHeader {
-  int num_headers;
-  EXRHeader *headers;
-
-} EXRMultiPartHeader;
-
-typedef struct _EXRImage {
-  EXRTile *tiles;  // Tiled pixel data. The application must reconstruct image
-                   // from tiles manually. NULL if scanline format.
-  unsigned char **images;  // image[channels][pixels]. NULL if tiled format.
-
-  int width;
-  int height;
-  int num_channels;
-
-  // Properties for tile format.
-  int num_tiles;
-
-} EXRImage;
-
-typedef struct _EXRMultiPartImage {
-  int num_images;
-  EXRImage *images;
-
-} EXRMultiPartImage;
-
-typedef struct _DeepImage {
-  const char **channel_names;
-  float ***image;      // image[channels][scanlines][samples]
-  int **offset_table;  // offset_table[scanline][offsets]
-  int num_channels;
-  int width;
-  int height;
-  int pad0;
-} DeepImage;
-
-// @deprecated { to be removed. }
-// Loads single-frame OpenEXR image. Assume EXR image contains A(single channel
-// alpha) or RGB(A) channels.
-// Application must free image data as returned by `out_rgba`
-// Result image format is: float x RGBA x width x hight
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int LoadEXR(float **out_rgba, int *width, int *height,
-                   const char *filename, const char **err);
-
-// @deprecated { to be removed. }
-// Simple wrapper API for ParseEXRHeaderFromFile.
-// checking given file is a EXR file(by just look up header)
-// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for
-// others
-extern int IsEXR(const char *filename);
-
-// @deprecated { to be removed. }
-// Saves single-frame OpenEXR image. Assume EXR image contains RGB(A) channels.
-// components must be 1(Grayscale), 3(RGB) or 4(RGBA).
-// Input image format is: `float x width x height`, or `float x RGB(A) x width x
-// hight`
-// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero
-// value.
-// Save image as fp32(FLOAT) format when `save_as_fp16` is 0.
-// Use ZIP compression by default.
-// Returns negative value and may set error string in `err` when there's an
-// error
-extern int SaveEXR(const float *data, const int width, const int height,
-                   const int components, const int save_as_fp16,
-                   const char *filename, const char **err);
-
-// Initialize EXRHeader struct
-extern void InitEXRHeader(EXRHeader *exr_header);
-
-// Initialize EXRImage struct
-extern void InitEXRImage(EXRImage *exr_image);
-
-// Free's internal data of EXRHeader struct
-extern int FreeEXRHeader(EXRHeader *exr_header);
-
-// Free's internal data of EXRImage struct
-extern int FreeEXRImage(EXRImage *exr_image);
-
-// Free's error message
-extern void FreeEXRErrorMessage(const char *msg);
-
-// Parse EXR version header of a file.
-extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename);
-
-// Parse EXR version header from memory-mapped EXR data.
-extern int ParseEXRVersionFromMemory(EXRVersion *version,
-                                     const unsigned char *memory, size_t size);
-
-// Parse single-part OpenEXR header from a file and initialize `EXRHeader`.
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version,
-                                  const char *filename, const char **err);
-
-// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`.
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int ParseEXRHeaderFromMemory(EXRHeader *header,
-                                    const EXRVersion *version,
-                                    const unsigned char *memory, size_t size,
-                                    const char **err);
-
-// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*`
-// array.
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers,
-                                           int *num_headers,
-                                           const EXRVersion *version,
-                                           const char *filename,
-                                           const char **err);
-
-// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*`
-// array
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers,
-                                             int *num_headers,
-                                             const EXRVersion *version,
-                                             const unsigned char *memory,
-                                             size_t size, const char **err);
-
-// Loads single-part OpenEXR image from a file.
-// Application must setup `ParseEXRHeaderFromFile` before calling this function.
-// Application can free EXRImage using `FreeEXRImage`
-// Returns negative value and may set error string in `err` when there's an
-// error
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header,
-                                const char *filename, const char **err);
-
-// Loads single-part OpenEXR image from a memory.
-// Application must setup `EXRHeader` with
-// `ParseEXRHeaderFromMemory` before calling this function.
-// Application can free EXRImage using `FreeEXRImage`
-// Returns negative value and may set error string in `err` when there's an
-// error
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header,
-                                  const unsigned char *memory,
-                                  const size_t size, const char **err);
-
-// Loads multi-part OpenEXR image from a file.
-// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this
-// function.
-// Application can free EXRImage using `FreeEXRImage`
-// Returns negative value and may set error string in `err` when there's an
-// error
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int LoadEXRMultipartImageFromFile(EXRImage *images,
-                                         const EXRHeader **headers,
-                                         unsigned int num_parts,
-                                         const char *filename,
-                                         const char **err);
-
-// Loads multi-part OpenEXR image from a memory.
-// Application must setup `EXRHeader*` array with
-// `ParseEXRMultipartHeaderFromMemory` before calling this function.
-// Application can free EXRImage using `FreeEXRImage`
-// Returns negative value and may set error string in `err` when there's an
-// error
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int LoadEXRMultipartImageFromMemory(EXRImage *images,
-                                           const EXRHeader **headers,
-                                           unsigned int num_parts,
-                                           const unsigned char *memory,
-                                           const size_t size, const char **err);
-
-// Saves multi-channel, single-frame OpenEXR image to a file.
-// Returns negative value and may set error string in `err` when there's an
-// error
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int SaveEXRImageToFile(const EXRImage *image,
-                              const EXRHeader *exr_header, const char *filename,
-                              const char **err);
-
-// Saves multi-channel, single-frame OpenEXR image to a memory.
-// Image is compressed using EXRImage.compression value.
-// Return the number of bytes if success.
-// Return zero and will set error string in `err` when there's an
-// error.
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern size_t SaveEXRImageToMemory(const EXRImage *image,
-                                   const EXRHeader *exr_header,
-                                   unsigned char **memory, const char **err);
-
-// Loads single-frame OpenEXR deep image.
-// Application must free memory of variables in DeepImage(image, offset_table)
-// Returns negative value and may set error string in `err` when there's an
-// error
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int LoadDeepEXR(DeepImage *out_image, const char *filename,
-                       const char **err);
-
-// NOT YET IMPLEMENTED:
-// Saves single-frame OpenEXR deep image.
-// Returns negative value and may set error string in `err` when there's an
-// error
-// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename,
-//                       const char **err);
-
-// NOT YET IMPLEMENTED:
-// Loads multi-part OpenEXR deep image.
-// Application must free memory of variables in DeepImage(image, offset_table)
-// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const
-// char *filename,
-//                       const char **err);
-
-// For emscripten.
-// Loads single-frame OpenEXR image from memory. Assume EXR image contains
-// RGB(A) channels.
-// Returns negative value and may set error string in `err` when there's an
-// error
-// When there was an error message, Application must free `err` with
-// FreeEXRErrorMessage()
-extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
-                             const unsigned char *memory, size_t size,
-                             const char **err);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TINYEXR_H_
-
-#ifdef TINYEXR_IMPLEMENTATION
-#ifndef TINYEXR_IMPLEMENTATION_DEIFNED
-#define TINYEXR_IMPLEMENTATION_DEIFNED
-
-#include <algorithm>
-#include <cassert>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <sstream>
-
-//#include <iostream> // debug
-
-#include <limits>
-#include <string>
-#include <vector>
-
-#if __cplusplus > 199711L
-// C++11
-#include <cstdint>
-#endif  // __cplusplus > 199711L
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#if TINYEXR_USE_MINIZ
-#else
-//  Issue #46. Please include your own zlib-compatible API header before
-//  including `tinyexr.h`
-//#include "zlib.h"
-#endif
-
-#if TINYEXR_USE_ZFP
-#include "zfp.h"
-#endif
-
-namespace tinyexr {
-
-#if __cplusplus > 199711L
-// C++11
-typedef uint64_t tinyexr_uint64;
-typedef int64_t tinyexr_int64;
-#else
-// Although `long long` is not a standard type pre C++11, assume it is defined
-// as a compiler's extension.
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wc++11-long-long"
-#endif
-typedef unsigned long long tinyexr_uint64;
-typedef long long tinyexr_int64;
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-#endif
-
-#if TINYEXR_USE_MINIZ
-
-namespace miniz {
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wc++11-long-long"
-#pragma clang diagnostic ignored "-Wold-style-cast"
-#pragma clang diagnostic ignored "-Wpadded"
-#pragma clang diagnostic ignored "-Wsign-conversion"
-#pragma clang diagnostic ignored "-Wc++11-extensions"
-#pragma clang diagnostic ignored "-Wconversion"
-#pragma clang diagnostic ignored "-Wunused-function"
-#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
-#pragma clang diagnostic ignored "-Wundef"
-
-#if __has_warning("-Wcomma")
-#pragma clang diagnostic ignored "-Wcomma"
-#endif
-
-#if __has_warning("-Wmacro-redefined")
-#pragma clang diagnostic ignored "-Wmacro-redefined"
-#endif
-
-#if __has_warning("-Wcast-qual")
-#pragma clang diagnostic ignored "-Wcast-qual"
-#endif
-
-#if __has_warning("-Wzero-as-null-pointer-constant")
-#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
-#endif
-
-#if __has_warning("-Wtautological-constant-compare")
-#pragma clang diagnostic ignored "-Wtautological-constant-compare"
-#endif
-
-#endif
-
-/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP
-   reading/writing/appending, PNG writing
-   See "unlicense" statement at the end of this file.
-   Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013
-   Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951:
-   http://www.ietf.org/rfc/rfc1951.txt
-
-   Most API's defined in miniz.c are optional. For example, to disable the
-   archive related functions just define
-   MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO
-   (see the list below for more macros).
-
-   * Change History
-     10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major
-   release with Zip64 support (almost there!):
-       - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug
-   (thanks kahmyong.moon@hp.com) which could cause locate files to not find
-   files. This bug
-        would only have occured in earlier versions if you explicitly used this
-   flag, OR if you used mz_zip_extract_archive_file_to_heap() or
-   mz_zip_add_mem_to_archive_file_in_place()
-        (which used this flag). If you can't switch to v1.15 but want to fix
-   this bug, just remove the uses of this flag from both helper funcs (and of
-   course don't use the flag).
-       - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when
-   pUser_read_buf is not NULL and compressed size is > uncompressed size
-       - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract
-   compressed data from directory entries, to account for weird zipfiles which
-   contain zero-size compressed data on dir entries.
-         Hopefully this fix won't cause any issues on weird zip archives,
-   because it assumes the low 16-bits of zip external attributes are DOS
-   attributes (which I believe they always are in practice).
-       - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the
-   internal attributes, just the filename and external attributes
-       - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed
-       - Added cmake support for Linux builds which builds all the examples,
-   tested with clang v3.3 and gcc v4.6.
-       - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti
-       - Merged MZ_FORCEINLINE fix from hdeanclark
-       - Fix <time.h> include before config #ifdef, thanks emil.brink
-       - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping
-   (super useful for OpenGL apps), and explicit control over the compression
-   level (so you can
-        set it to 1 for real-time compression).
-       - Merged in some compiler fixes from paulharris's github repro.
-       - Retested this build under Windows (VS 2010, including static analysis),
-   tcc  0.9.26, gcc v4.6 and clang v3.3.
-       - Added example6.c, which dumps an image of the mandelbrot set to a PNG
-   file.
-       - Modified example2 to help test the
-   MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more.
-       - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix
-   possible src file fclose() leak if alignment bytes+local header file write
-   faiiled
-                 - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader():
-   Was pushing the wrong central dir header offset, appears harmless in this
-   release, but it became a problem in the zip64 branch
-     5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE,
-   #include <time.h> (thanks fermtect).
-     5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix
-   mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit.
-       - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and
-   re-ran a randomized regression test on ~500k files.
-       - Eliminated a bunch of warnings when compiling with GCC 32-bit/64.
-       - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze
-   (static analysis) option and fixed all warnings (except for the silly
-        "Use of the comma-operator in a tested expression.." analysis warning,
-   which I purposely use to work around a MSVC compiler warning).
-       - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and
-   tested Linux executables. The codeblocks workspace is compatible with
-   Linux+Win32/x64.
-       - Added miniz_tester solution/project, which is a useful little app
-   derived from LZHAM's tester app that I use as part of the regression test.
-       - Ran miniz.c and tinfl.c through another series of regression testing on
-   ~500,000 files and archives.
-       - Modified example5.c so it purposely disables a bunch of high-level
-   functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the
-   MINIZ_NO_STDIO bug report.)
-       - Fix ftell() usage in examples so they exit with an error on files which
-   are too large (a limitation of the examples, not miniz itself).
-     4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple
-   minor level_and_flags issues in the archive API's.
-      level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce
-   Dawson <bruced@valvesoftware.com> for the feedback/bug report.
-     5/28/11 v1.11 - Added statement from unlicense.org
-     5/27/11 v1.10 - Substantial compressor optimizations:
-      - Level 1 is now ~4x faster than before. The L1 compressor's throughput
-   now varies between 70-110MB/sec. on a
-      - Core i7 (actual throughput varies depending on the type of data, and x64
-   vs. x86).
-      - Improved baseline L2-L9 compression perf. Also, greatly improved
-   compression perf. issues on some file types.
-      - Refactored the compression code for better readability and
-   maintainability.
-      - Added level 10 compression level (L10 has slightly better ratio than
-   level 9, but could have a potentially large
-       drop in throughput on some files).
-     5/15/11 v1.09 - Initial stable release.
-
-   * Low-level Deflate/Inflate implementation notes:
-
-     Compression: Use the "tdefl" API's. The compressor supports raw, static,
-   and dynamic blocks, lazy or
-     greedy parsing, match length filtering, RLE-only, and Huffman-only streams.
-   It performs and compresses
-     approximately as well as zlib.
-
-     Decompression: Use the "tinfl" API's. The entire decompressor is
-   implemented as a single function
-     coroutine: see tinfl_decompress(). It supports decompression into a 32KB
-   (or larger power of 2) wrapping buffer, or into a memory
-     block large enough to hold the entire file.
-
-     The low-level tdefl/tinfl API's do not make any use of dynamic memory
-   allocation.
-
-   * zlib-style API notes:
-
-     miniz.c implements a fairly large subset of zlib. There's enough
-   functionality present for it to be a drop-in
-     zlib replacement in many apps:
-        The z_stream struct, optional memory allocation callbacks
-        deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
-        inflateInit/inflateInit2/inflate/inflateEnd
-        compress, compress2, compressBound, uncompress
-        CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly
-   routines.
-        Supports raw deflate streams or standard zlib streams with adler-32
-   checking.
-
-     Limitations:
-      The callback API's are not implemented yet. No support for gzip headers or
-   zlib static dictionaries.
-      I've tried to closely emulate zlib's various flavors of stream flushing
-   and return status codes, but
-      there are no guarantees that miniz.c pulls this off perfectly.
-
-   * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function,
-   originally written by
-     Alex Evans. Supports 1-4 bytes/pixel images.
-
-   * ZIP archive API notes:
-
-     The ZIP archive API's where designed with simplicity and efficiency in
-   mind, with just enough abstraction to
-     get the job done with minimal fuss. There are simple API's to retrieve file
-   information, read files from
-     existing archives, create new archives, append new files to existing
-   archives, or clone archive data from
-     one archive to another. It supports archives located in memory or the heap,
-   on disk (using stdio.h),
-     or you can specify custom file read/write callbacks.
-
-     - Archive reading: Just call this function to read a single file from a
-   disk archive:
-
-      void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const
-   char *pArchive_name,
-        size_t *pSize, mz_uint zip_flags);
-
-     For more complex cases, use the "mz_zip_reader" functions. Upon opening an
-   archive, the entire central
-     directory is located and read as-is into memory, and subsequent file access
-   only occurs when reading individual files.
-
-     - Archives file scanning: The simple way is to use this function to scan a
-   loaded archive for a specific file:
-
-     int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName,
-   const char *pComment, mz_uint flags);
-
-     The locate operation can optionally check file comments too, which (as one
-   example) can be used to identify
-     multiple versions of the same file in an archive. This function uses a
-   simple linear search through the central
-     directory, so it's not very fast.
-
-     Alternately, you can iterate through all the files in an archive (using
-   mz_zip_reader_get_num_files()) and
-     retrieve detailed info on each file by calling mz_zip_reader_file_stat().
-
-     - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer
-   immediately writes compressed file data
-     to disk and builds an exact image of the central directory in memory. The
-   central directory image is written
-     all at once at the end of the archive file when the archive is finalized.
-
-     The archive writer can optionally align each file's local header and file
-   data to any power of 2 alignment,
-     which can be useful when the archive will be read from optical media. Also,
-   the writer supports placing
-     arbitrary data blobs at the very beginning of ZIP archives. Archives
-   written using either feature are still
-     readable by any ZIP tool.
-
-     - Archive appending: The simple way to add a single file to an archive is
-   to call this function:
-
-      mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename,
-   const char *pArchive_name,
-        const void *pBuf, size_t buf_size, const void *pComment, mz_uint16
-   comment_size, mz_uint level_and_flags);
-
-     The archive will be created if it doesn't already exist, otherwise it'll be
-   appended to.
-     Note the appending is done in-place and is not an atomic operation, so if
-   something goes wrong
-     during the operation it's possible the archive could be left without a
-   central directory (although the local
-     file headers and file data will be fine, so the archive will be
-   recoverable).
-
-     For more complex archive modification scenarios:
-     1. The safest way is to use a mz_zip_reader to read the existing archive,
-   cloning only those bits you want to
-     preserve into a new archive using using the
-   mz_zip_writer_add_from_zip_reader() function (which compiles the
-     compressed file data as-is). When you're done, delete the old archive and
-   rename the newly written archive, and
-     you're done. This is safe but requires a bunch of temporary disk space or
-   heap memory.
-
-     2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using
-   mz_zip_writer_init_from_reader(),
-     append new files as needed, then finalize the archive which will write an
-   updated central directory to the
-     original archive. (This is basically what
-   mz_zip_add_mem_to_archive_file_in_place() does.) There's a
-     possibility that the archive's central directory could be lost with this
-   method if anything goes wrong, though.
-
-     - ZIP archive support limitations:
-     No zip64 or spanning support. Extraction functions can only handle
-   unencrypted, stored or deflated files.
-     Requires streams capable of seeking.
-
-   * This is a header file library, like stb_image.c. To get only a header file,
-   either cut and paste the
-     below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then
-   include miniz.c from it.
-
-   * Important: For best perf. be sure to customize the below macros for your
-   target platform:
-     #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
-     #define MINIZ_LITTLE_ENDIAN 1
-     #define MINIZ_HAS_64BIT_REGISTERS 1
-
-   * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before
-   including miniz.c to ensure miniz
-     uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be
-   able to process large files
-     (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
-*/
-
-#ifndef MINIZ_HEADER_INCLUDED
-#define MINIZ_HEADER_INCLUDED
-
-//#include <stdlib.h>
-
-// Defines to completely disable specific portions of miniz.c:
-// If all macros here are defined the only functionality remaining will be
-// CRC-32, adler-32, tinfl, and tdefl.
-
-// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on
-// stdio for file I/O.
-//#define MINIZ_NO_STDIO
-
-// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able
-// to get the current time, or
-// get/set file times, and the C run-time funcs that get/set times won't be
-// called.
-// The current downside is the times written to your archives will be from 1979.
-#define MINIZ_NO_TIME
-
-// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's.
-#define MINIZ_NO_ARCHIVE_APIS
-
-// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive
-// API's.
-//#define MINIZ_NO_ARCHIVE_WRITING_APIS
-
-// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression
-// API's.
-//#define MINIZ_NO_ZLIB_APIS
-
-// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent
-// conflicts against stock zlib.
-//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
-
-// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc.
-// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom
-// user alloc/free/realloc
-// callbacks to the zlib and archive API's, and a few stand-alone helper API's
-// which don't provide custom user
-// functions (such as tdefl_compress_mem_to_heap() and
-// tinfl_decompress_mem_to_heap()) won't work.
-//#define MINIZ_NO_MALLOC
-
-#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
-// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc
-// on Linux
-#define MINIZ_NO_TIME
-#endif
-
-#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
-//#include <time.h>
-#endif
-
-#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
-    defined(__i386) || defined(__i486__) || defined(__i486) ||  \
-    defined(i386) || defined(__ia64__) || defined(__x86_64__)
-// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
-#define MINIZ_X86_OR_X64_CPU 1
-#endif
-
-#if defined(__sparcv9)
-// Big endian
-#else
-#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
-// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
-#define MINIZ_LITTLE_ENDIAN 1
-#endif
-#endif
-
-#if MINIZ_X86_OR_X64_CPU
-// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient
-// integer loads and stores from unaligned addresses.
-//#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
-#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES \
-  0  // disable to suppress compiler warnings
-#endif
-
-#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || \
-    defined(_LP64) || defined(__LP64__) || defined(__ia64__) ||   \
-    defined(__x86_64__)
-// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are
-// reasonably fast (and don't involve compiler generated calls to helper
-// functions).
-#define MINIZ_HAS_64BIT_REGISTERS 1
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// ------------------- zlib-style API Definitions.
-
-// For more compatibility with zlib, miniz.c uses unsigned long for some
-// parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits!
-typedef unsigned long mz_ulong;
-
-// mz_free() internally uses the MZ_FREE() macro (which by default calls free()
-// unless you've modified the MZ_MALLOC macro) to release a block allocated from
-// the heap.
-void mz_free(void *p);
-
-#define MZ_ADLER32_INIT (1)
-// mz_adler32() returns the initial adler-32 value to use when called with
-// ptr==NULL.
-mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
-
-#define MZ_CRC32_INIT (0)
-// mz_crc32() returns the initial CRC-32 value to use when called with
-// ptr==NULL.
-mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
-
-// Compression strategies.
-enum {
-  MZ_DEFAULT_STRATEGY = 0,
-  MZ_FILTERED = 1,
-  MZ_HUFFMAN_ONLY = 2,
-  MZ_RLE = 3,
-  MZ_FIXED = 4
-};
-
-// Method
-#define MZ_DEFLATED 8
-
-#ifndef MINIZ_NO_ZLIB_APIS
-
-// Heap allocation callbacks.
-// Note that mz_alloc_func parameter types purpsosely differ from zlib's:
-// items/size is size_t, not unsigned long.
-typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
-typedef void (*mz_free_func)(void *opaque, void *address);
-typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items,
-                                 size_t size);
-
-#define MZ_VERSION "9.1.15"
-#define MZ_VERNUM 0x91F0
-#define MZ_VER_MAJOR 9
-#define MZ_VER_MINOR 1
-#define MZ_VER_REVISION 15
-#define MZ_VER_SUBREVISION 0
-
-// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The
-// other values are for advanced use (refer to the zlib docs).
-enum {
-  MZ_NO_FLUSH = 0,
-  MZ_PARTIAL_FLUSH = 1,
-  MZ_SYNC_FLUSH = 2,
-  MZ_FULL_FLUSH = 3,
-  MZ_FINISH = 4,
-  MZ_BLOCK = 5
-};
-
-// Return status codes. MZ_PARAM_ERROR is non-standard.
-enum {
-  MZ_OK = 0,
-  MZ_STREAM_END = 1,
-  MZ_NEED_DICT = 2,
-  MZ_ERRNO = -1,
-  MZ_STREAM_ERROR = -2,
-  MZ_DATA_ERROR = -3,
-  MZ_MEM_ERROR = -4,
-  MZ_BUF_ERROR = -5,
-  MZ_VERSION_ERROR = -6,
-  MZ_PARAM_ERROR = -10000
-};
-
-// Compression levels: 0-9 are the standard zlib-style levels, 10 is best
-// possible compression (not zlib compatible, and may be very slow),
-// MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
-enum {
-  MZ_NO_COMPRESSION = 0,
-  MZ_BEST_SPEED = 1,
-  MZ_BEST_COMPRESSION = 9,
-  MZ_UBER_COMPRESSION = 10,
-  MZ_DEFAULT_LEVEL = 6,
-  MZ_DEFAULT_COMPRESSION = -1
-};
-
-// Window bits
-#define MZ_DEFAULT_WINDOW_BITS 15
-
-struct mz_internal_state;
-
-// Compression/decompression stream struct.
-typedef struct mz_stream_s {
-  const unsigned char *next_in;  // pointer to next byte to read
-  unsigned int avail_in;         // number of bytes available at next_in
-  mz_ulong total_in;             // total number of bytes consumed so far
-
-  unsigned char *next_out;  // pointer to next byte to write
-  unsigned int avail_out;   // number of bytes that can be written to next_out
-  mz_ulong total_out;       // total number of bytes produced so far
-
-  char *msg;                        // error msg (unused)
-  struct mz_internal_state *state;  // internal state, allocated by zalloc/zfree
-
-  mz_alloc_func
-      zalloc;          // optional heap allocation function (defaults to malloc)
-  mz_free_func zfree;  // optional heap free function (defaults to free)
-  void *opaque;        // heap alloc function user pointer
-
-  int data_type;      // data_type (unused)
-  mz_ulong adler;     // adler32 of the source or uncompressed data
-  mz_ulong reserved;  // not used
-} mz_stream;
-
-typedef mz_stream *mz_streamp;
-
-// Returns the version string of miniz.c.
-const char *mz_version(void);
-
-// mz_deflateInit() initializes a compressor with default options:
-// Parameters:
-//  pStream must point to an initialized mz_stream struct.
-//  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
-//  level 1 enables a specially optimized compression function that's been
-//  optimized purely for performance, not ratio.
-//  (This special func. is currently only enabled when
-//  MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
-// Return values:
-//  MZ_OK on success.
-//  MZ_STREAM_ERROR if the stream is bogus.
-//  MZ_PARAM_ERROR if the input parameters are bogus.
-//  MZ_MEM_ERROR on out of memory.
-int mz_deflateInit(mz_streamp pStream, int level);
-
-// mz_deflateInit2() is like mz_deflate(), except with more control:
-// Additional parameters:
-//   method must be MZ_DEFLATED
-//   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with
-//   zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no
-//   header or footer)
-//   mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
-int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits,
-                    int mem_level, int strategy);
-
-// Quickly resets a compressor without having to reallocate anything. Same as
-// calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
-int mz_deflateReset(mz_streamp pStream);
-
-// mz_deflate() compresses the input to output, consuming as much of the input
-// and producing as much output as possible.
-// Parameters:
-//   pStream is the stream to read from and write to. You must initialize/update
-//   the next_in, avail_in, next_out, and avail_out members.
-//   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or
-//   MZ_FINISH.
-// Return values:
-//   MZ_OK on success (when flushing, or if more input is needed but not
-//   available, and/or there's more output to be written but the output buffer
-//   is full).
-//   MZ_STREAM_END if all input has been consumed and all output bytes have been
-//   written. Don't call mz_deflate() on the stream anymore.
-//   MZ_STREAM_ERROR if the stream is bogus.
-//   MZ_PARAM_ERROR if one of the parameters is invalid.
-//   MZ_BUF_ERROR if no forward progress is possible because the input and/or
-//   output buffers are empty. (Fill up the input buffer or free up some output
-//   space and try again.)
-int mz_deflate(mz_streamp pStream, int flush);
-
-// mz_deflateEnd() deinitializes a compressor:
-// Return values:
-//  MZ_OK on success.
-//  MZ_STREAM_ERROR if the stream is bogus.
-int mz_deflateEnd(mz_streamp pStream);
-
-// mz_deflateBound() returns a (very) conservative upper bound on the amount of
-// data that could be generated by deflate(), assuming flush is set to only
-// MZ_NO_FLUSH or MZ_FINISH.
-mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
-
-// Single-call compression functions mz_compress() and mz_compress2():
-// Returns MZ_OK on success, or one of the error codes from mz_deflate() on
-// failure.
-int mz_compress(unsigned char *pDest, mz_ulong *pDest_len,
-                const unsigned char *pSource, mz_ulong source_len);
-int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len,
-                 const unsigned char *pSource, mz_ulong source_len, int level);
-
-// mz_compressBound() returns a (very) conservative upper bound on the amount of
-// data that could be generated by calling mz_compress().
-mz_ulong mz_compressBound(mz_ulong source_len);
-
-// Initializes a decompressor.
-int mz_inflateInit(mz_streamp pStream);
-
-// mz_inflateInit2() is like mz_inflateInit() with an additional option that
-// controls the window size and whether or not the stream has been wrapped with
-// a zlib header/footer:
-// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or
-// -MZ_DEFAULT_WINDOW_BITS (raw deflate).
-int mz_inflateInit2(mz_streamp pStream, int window_bits);
-
-// Decompresses the input stream to the output, consuming only as much of the
-// input as needed, and writing as much to the output as possible.
-// Parameters:
-//   pStream is the stream to read from and write to. You must initialize/update
-//   the next_in, avail_in, next_out, and avail_out members.
-//   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
-//   On the first call, if flush is MZ_FINISH it's assumed the input and output
-//   buffers are both sized large enough to decompress the entire stream in a
-//   single call (this is slightly faster).
-//   MZ_FINISH implies that there are no more source bytes available beside
-//   what's already in the input buffer, and that the output buffer is large
-//   enough to hold the rest of the decompressed data.
-// Return values:
-//   MZ_OK on success. Either more input is needed but not available, and/or
-//   there's more output to be written but the output buffer is full.
-//   MZ_STREAM_END if all needed input has been consumed and all output bytes
-//   have been written. For zlib streams, the adler-32 of the decompressed data
-//   has also been verified.
-//   MZ_STREAM_ERROR if the stream is bogus.
-//   MZ_DATA_ERROR if the deflate stream is invalid.
-//   MZ_PARAM_ERROR if one of the parameters is invalid.
-//   MZ_BUF_ERROR if no forward progress is possible because the input buffer is
-//   empty but the inflater needs more input to continue, or if the output
-//   buffer is not large enough. Call mz_inflate() again
-//   with more input data, or with more room in the output buffer (except when
-//   using single call decompression, described above).
-int mz_inflate(mz_streamp pStream, int flush);
-
-// Deinitializes a decompressor.
-int mz_inflateEnd(mz_streamp pStream);
-
-// Single-call decompression.
-// Returns MZ_OK on success, or one of the error codes from mz_inflate() on
-// failure.
-int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len,
-                  const unsigned char *pSource, mz_ulong source_len);
-
-// Returns a string description of the specified error code, or NULL if the
-// error code is invalid.
-const char *mz_error(int err);
-
-// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used
-// as a drop-in replacement for the subset of zlib that miniz.c supports.
-// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you
-// use zlib in the same project.
-#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
-typedef unsigned char Byte;
-typedef unsigned int uInt;
-typedef mz_ulong uLong;
-typedef Byte Bytef;
-typedef uInt uIntf;
-typedef char charf;
-typedef int intf;
-typedef void *voidpf;
-typedef uLong uLongf;
-typedef void *voidp;
-typedef void *const voidpc;
-#define Z_NULL 0
-#define Z_NO_FLUSH MZ_NO_FLUSH
-#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
-#define Z_SYNC_FLUSH MZ_SYNC_FLUSH
-#define Z_FULL_FLUSH MZ_FULL_FLUSH
-#define Z_FINISH MZ_FINISH
-#define Z_BLOCK MZ_BLOCK
-#define Z_OK MZ_OK
-#define Z_STREAM_END MZ_STREAM_END
-#define Z_NEED_DICT MZ_NEED_DICT
-#define Z_ERRNO MZ_ERRNO
-#define Z_STREAM_ERROR MZ_STREAM_ERROR
-#define Z_DATA_ERROR MZ_DATA_ERROR
-#define Z_MEM_ERROR MZ_MEM_ERROR
-#define Z_BUF_ERROR MZ_BUF_ERROR
-#define Z_VERSION_ERROR MZ_VERSION_ERROR
-#define Z_PARAM_ERROR MZ_PARAM_ERROR
-#define Z_NO_COMPRESSION MZ_NO_COMPRESSION
-#define Z_BEST_SPEED MZ_BEST_SPEED
-#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
-#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
-#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
-#define Z_FILTERED MZ_FILTERED
-#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
-#define Z_RLE MZ_RLE
-#define Z_FIXED MZ_FIXED
-#define Z_DEFLATED MZ_DEFLATED
-#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
-#define alloc_func mz_alloc_func
-#define free_func mz_free_func
-#define internal_state mz_internal_state
-#define z_stream mz_stream
-#define deflateInit mz_deflateInit
-#define deflateInit2 mz_deflateInit2
-#define deflateReset mz_deflateReset
-#define deflate mz_deflate
-#define deflateEnd mz_deflateEnd
-#define deflateBound mz_deflateBound
-#define compress mz_compress
-#define compress2 mz_compress2
-#define compressBound mz_compressBound
-#define inflateInit mz_inflateInit
-#define inflateInit2 mz_inflateInit2
-#define inflate mz_inflate
-#define inflateEnd mz_inflateEnd
-#define uncompress mz_uncompress
-#define crc32 mz_crc32
-#define adler32 mz_adler32
-#define MAX_WBITS 15
-#define MAX_MEM_LEVEL 9
-#define zError mz_error
-#define ZLIB_VERSION MZ_VERSION
-#define ZLIB_VERNUM MZ_VERNUM
-#define ZLIB_VER_MAJOR MZ_VER_MAJOR
-#define ZLIB_VER_MINOR MZ_VER_MINOR
-#define ZLIB_VER_REVISION MZ_VER_REVISION
-#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
-#define zlibVersion mz_version
-#define zlib_version mz_version()
-#endif  // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
-
-#endif  // MINIZ_NO_ZLIB_APIS
-
-// ------------------- Types and macros
-
-typedef unsigned char mz_uint8;
-typedef signed short mz_int16;
-typedef unsigned short mz_uint16;
-typedef unsigned int mz_uint32;
-typedef unsigned int mz_uint;
-typedef long long mz_int64;
-typedef unsigned long long mz_uint64;
-typedef int mz_bool;
-
-#define MZ_FALSE (0)
-#define MZ_TRUE (1)
-
-// An attempt to work around MSVC's spammy "warning C4127: conditional
-// expression is constant" message.
-#ifdef _MSC_VER
-#define MZ_MACRO_END while (0, 0)
-#else
-#define MZ_MACRO_END while (0)
-#endif
-
-// ------------------- ZIP archive reading/writing
-
-#ifndef MINIZ_NO_ARCHIVE_APIS
-
-enum {
-  MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024,
-  MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
-  MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
-};
-
-typedef struct {
-  mz_uint32 m_file_index;
-  mz_uint32 m_central_dir_ofs;
-  mz_uint16 m_version_made_by;
-  mz_uint16 m_version_needed;
-  mz_uint16 m_bit_flag;
-  mz_uint16 m_method;
-#ifndef MINIZ_NO_TIME
-  time_t m_time;
-#endif
-  mz_uint32 m_crc32;
-  mz_uint64 m_comp_size;
-  mz_uint64 m_uncomp_size;
-  mz_uint16 m_internal_attr;
-  mz_uint32 m_external_attr;
-  mz_uint64 m_local_header_ofs;
-  mz_uint32 m_comment_size;
-  char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
-  char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
-} mz_zip_archive_file_stat;
-
-typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs,
-                                    void *pBuf, size_t n);
-typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs,
-                                     const void *pBuf, size_t n);
-
-struct mz_zip_internal_state_tag;
-typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
-
-typedef enum {
-  MZ_ZIP_MODE_INVALID = 0,
-  MZ_ZIP_MODE_READING = 1,
-  MZ_ZIP_MODE_WRITING = 2,
-  MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
-} mz_zip_mode;
-
-typedef struct mz_zip_archive_tag {
-  mz_uint64 m_archive_size;
-  mz_uint64 m_central_directory_file_ofs;
-  mz_uint m_total_files;
-  mz_zip_mode m_zip_mode;
-
-  mz_uint m_file_offset_alignment;
-
-  mz_alloc_func m_pAlloc;
-  mz_free_func m_pFree;
-  mz_realloc_func m_pRealloc;
-  void *m_pAlloc_opaque;
-
-  mz_file_read_func m_pRead;
-  mz_file_write_func m_pWrite;
-  void *m_pIO_opaque;
-
-  mz_zip_internal_state *m_pState;
-
-} mz_zip_archive;
-
-typedef enum {
-  MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100,
-  MZ_ZIP_FLAG_IGNORE_PATH = 0x0200,
-  MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400,
-  MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
-} mz_zip_flags;
-
-// ZIP archive reading
-
-// Inits a ZIP archive reader.
-// These functions read and validate the archive's central directory.
-mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size,
-                           mz_uint32 flags);
-mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem,
-                               size_t size, mz_uint32 flags);
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename,
-                                mz_uint32 flags);
-#endif
-
-// Returns the total number of files in the archive.
-mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
-
-// Returns detailed information about an archive file entry.
-mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index,
-                                mz_zip_archive_file_stat *pStat);
-
-// Determines if an archive file entry is a directory entry.
-mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip,
-                                          mz_uint file_index);
-mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip,
-                                        mz_uint file_index);
-
-// Retrieves the filename of an archive file entry.
-// Returns the number of bytes written to pFilename, or if filename_buf_size is
-// 0 this function returns the number of bytes needed to fully store the
-// filename.
-mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index,
-                                   char *pFilename, mz_uint filename_buf_size);
-
-// Attempts to locates a file in the archive's central directory.
-// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
-// Returns -1 if the file cannot be found.
-int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName,
-                              const char *pComment, mz_uint flags);
-
-// Extracts a archive file to a memory buffer using no memory allocation.
-mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip,
-                                              mz_uint file_index, void *pBuf,
-                                              size_t buf_size, mz_uint flags,
-                                              void *pUser_read_buf,
-                                              size_t user_read_buf_size);
-mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(
-    mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size,
-    mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
-
-// Extracts a archive file to a memory buffer.
-mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index,
-                                     void *pBuf, size_t buf_size,
-                                     mz_uint flags);
-mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip,
-                                          const char *pFilename, void *pBuf,
-                                          size_t buf_size, mz_uint flags);
-
-// Extracts a archive file to a dynamically allocated heap buffer.
-void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index,
-                                    size_t *pSize, mz_uint flags);
-void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip,
-                                         const char *pFilename, size_t *pSize,
-                                         mz_uint flags);
-
-// Extracts a archive file using a callback function to output the file's data.
-mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip,
-                                          mz_uint file_index,
-                                          mz_file_write_func pCallback,
-                                          void *pOpaque, mz_uint flags);
-mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip,
-                                               const char *pFilename,
-                                               mz_file_write_func pCallback,
-                                               void *pOpaque, mz_uint flags);
-
-#ifndef MINIZ_NO_STDIO
-// Extracts a archive file to a disk file and sets its last accessed and
-// modified times.
-// This function only extracts files, not archive directory records.
-mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index,
-                                      const char *pDst_filename, mz_uint flags);
-mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip,
-                                           const char *pArchive_filename,
-                                           const char *pDst_filename,
-                                           mz_uint flags);
-#endif
-
-// Ends archive reading, freeing all allocations, and closing the input archive
-// file if mz_zip_reader_init_file() was used.
-mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
-
-// ZIP archive writing
-
-#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-// Inits a ZIP archive writer.
-mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
-mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip,
-                                size_t size_to_reserve_at_beginning,
-                                size_t initial_allocation_size);
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename,
-                                mz_uint64 size_to_reserve_at_beginning);
-#endif
-
-// Converts a ZIP archive reader object into a writer object, to allow efficient
-// in-place file appends to occur on an existing archive.
-// For archives opened using mz_zip_reader_init_file, pFilename must be the
-// archive's filename so it can be reopened for writing. If the file can't be
-// reopened, mz_zip_reader_end() will be called.
-// For archives opened using mz_zip_reader_init_mem, the memory block must be
-// growable using the realloc callback (which defaults to realloc unless you've
-// overridden it).
-// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's
-// user provided m_pWrite function cannot be NULL.
-// Note: In-place archive modification is not recommended unless you know what
-// you're doing, because if execution stops or something goes wrong before
-// the archive is finalized the file's central directory will be hosed.
-mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip,
-                                       const char *pFilename);
-
-// Adds the contents of a memory buffer to an archive. These functions record
-// the current local time into the archive.
-// To add a directory entry, call this method with an archive name ending in a
-// forwardslash with empty buffer.
-// level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
-// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
-// just set to MZ_DEFAULT_COMPRESSION.
-mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name,
-                              const void *pBuf, size_t buf_size,
-                              mz_uint level_and_flags);
-mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip,
-                                 const char *pArchive_name, const void *pBuf,
-                                 size_t buf_size, const void *pComment,
-                                 mz_uint16 comment_size,
-                                 mz_uint level_and_flags, mz_uint64 uncomp_size,
-                                 mz_uint32 uncomp_crc32);
-
-#ifndef MINIZ_NO_STDIO
-// Adds the contents of a disk file to an archive. This function also records
-// the disk file's modified time into the archive.
-// level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
-// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
-// just set to MZ_DEFAULT_COMPRESSION.
-mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name,
-                               const char *pSrc_filename, const void *pComment,
-                               mz_uint16 comment_size, mz_uint level_and_flags);
-#endif
-
-// Adds a file to an archive by fully cloning the data from another archive.
-// This function fully clones the source file's compressed data (no
-// recompression), along with its full filename, extra data, and comment fields.
-mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip,
-                                          mz_zip_archive *pSource_zip,
-                                          mz_uint file_index);
-
-// Finalizes the archive by writing the central directory records followed by
-// the end of central directory record.
-// After an archive is finalized, the only valid call on the mz_zip_archive
-// struct is mz_zip_writer_end().
-// An archive must be manually finalized by calling this function for it to be
-// valid.
-mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
-mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf,
-                                            size_t *pSize);
-
-// Ends archive writing, freeing all allocations, and closing the output file if
-// mz_zip_writer_init_file() was used.
-// Note for the archive to be valid, it must have been finalized before ending.
-mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
-
-// Misc. high-level helper functions:
-
-// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically)
-// appends a memory blob to a ZIP archive.
-// level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
-// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
-// just set to MZ_DEFAULT_COMPRESSION.
-mz_bool mz_zip_add_mem_to_archive_file_in_place(
-    const char *pZip_filename, const char *pArchive_name, const void *pBuf,
-    size_t buf_size, const void *pComment, mz_uint16 comment_size,
-    mz_uint level_and_flags);
-
-// Reads a single file from an archive into a heap block.
-// Returns NULL on failure.
-void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename,
-                                          const char *pArchive_name,
-                                          size_t *pSize, mz_uint zip_flags);
-
-#endif  // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-#endif  // #ifndef MINIZ_NO_ARCHIVE_APIS
-
-// ------------------- Low-level Decompression API Definitions
-
-// Decompression flags used by tinfl_decompress().
-// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and
-// ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the
-// input is a raw deflate stream.
-// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available
-// beyond the end of the supplied input buffer. If clear, the input buffer
-// contains all remaining input.
-// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large
-// enough to hold the entire decompressed stream. If clear, the output buffer is
-// at least the size of the dictionary (typically 32KB).
-// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the
-// decompressed bytes.
-enum {
-  TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
-  TINFL_FLAG_HAS_MORE_INPUT = 2,
-  TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
-  TINFL_FLAG_COMPUTE_ADLER32 = 8
-};
-
-// High level decompression functions:
-// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block
-// allocated via malloc().
-// On entry:
-//  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data
-//  to decompress.
-// On return:
-//  Function returns a pointer to the decompressed data, or NULL on failure.
-//  *pOut_len will be set to the decompressed data's size, which could be larger
-//  than src_buf_len on uncompressible data.
-//  The caller must call mz_free() on the returned block when it's no longer
-//  needed.
-void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len,
-                                   size_t *pOut_len, int flags);
-
-// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block
-// in memory.
-// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes
-// written on success.
-#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
-size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len,
-                                   const void *pSrc_buf, size_t src_buf_len,
-                                   int flags);
-
-// tinfl_decompress_mem_to_callback() decompresses a block in memory to an
-// internal 32KB buffer, and a user provided callback function will be called to
-// flush the buffer.
-// Returns 1 on success or 0 on failure.
-typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);
-int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size,
-                                     tinfl_put_buf_func_ptr pPut_buf_func,
-                                     void *pPut_buf_user, int flags);
-
-struct tinfl_decompressor_tag;
-typedef struct tinfl_decompressor_tag tinfl_decompressor;
-
-// Max size of LZ dictionary.
-#define TINFL_LZ_DICT_SIZE 32768
-
-// Return status.
-typedef enum {
-  TINFL_STATUS_BAD_PARAM = -3,
-  TINFL_STATUS_ADLER32_MISMATCH = -2,
-  TINFL_STATUS_FAILED = -1,
-  TINFL_STATUS_DONE = 0,
-  TINFL_STATUS_NEEDS_MORE_INPUT = 1,
-  TINFL_STATUS_HAS_MORE_OUTPUT = 2
-} tinfl_status;
-
-// Initializes the decompressor to its initial state.
-#define tinfl_init(r) \
-  do {                \
-    (r)->m_state = 0; \
-  }                   \
-  MZ_MACRO_END
-#define tinfl_get_adler32(r) (r)->m_check_adler32
-
-// Main low-level decompressor coroutine function. This is the only function
-// actually needed for decompression. All the other functions are just
-// high-level helpers for improved usability.
-// This is a universal API, i.e. it can be used as a building block to build any
-// desired higher level decompression API. In the limit case, it can be called
-// once per every byte input or output.
-tinfl_status tinfl_decompress(tinfl_decompressor *r,
-                              const mz_uint8 *pIn_buf_next,
-                              size_t *pIn_buf_size, mz_uint8 *pOut_buf_start,
-                              mz_uint8 *pOut_buf_next, size_t *pOut_buf_size,
-                              const mz_uint32 decomp_flags);
-
-// Internal/private bits follow.
-enum {
-  TINFL_MAX_HUFF_TABLES = 3,
-  TINFL_MAX_HUFF_SYMBOLS_0 = 288,
-  TINFL_MAX_HUFF_SYMBOLS_1 = 32,
-  TINFL_MAX_HUFF_SYMBOLS_2 = 19,
-  TINFL_FAST_LOOKUP_BITS = 10,
-  TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
-};
-
-typedef struct {
-  mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
-  mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE],
-      m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
-} tinfl_huff_table;
-
-#if MINIZ_HAS_64BIT_REGISTERS
-#define TINFL_USE_64BIT_BITBUF 1
-#endif
-
-#if TINFL_USE_64BIT_BITBUF
-typedef mz_uint64 tinfl_bit_buf_t;
-#define TINFL_BITBUF_SIZE (64)
-#else
-typedef mz_uint32 tinfl_bit_buf_t;
-#define TINFL_BITBUF_SIZE (32)
-#endif
-
-struct tinfl_decompressor_tag {
-  mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type,
-      m_check_adler32, m_dist, m_counter, m_num_extra,
-      m_table_sizes[TINFL_MAX_HUFF_TABLES];
-  tinfl_bit_buf_t m_bit_buf;
-  size_t m_dist_from_out_buf_start;
-  tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
-  mz_uint8 m_raw_header[4],
-      m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
-};
-
-// ------------------- Low-level Compression API Definitions
-
-// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly
-// slower, and raw/dynamic blocks will be output more frequently).
-#define TDEFL_LESS_MEMORY 0
-
-// tdefl_init() compression flags logically OR'd together (low 12 bits contain
-// the max. number of probes per dictionary search):
-// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes
-// per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap
-// compression), 4095=Huffman+LZ (slowest/best compression).
-enum {
-  TDEFL_HUFFMAN_ONLY = 0,
-  TDEFL_DEFAULT_MAX_PROBES = 128,
-  TDEFL_MAX_PROBES_MASK = 0xFFF
-};
-
-// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before
-// the deflate data, and the Adler-32 of the source data at the end. Otherwise,
-// you'll get raw deflate data.
-// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even
-// when not writing zlib headers).
-// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more
-// efficient lazy parsing.
-// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's
-// initialization time to the minimum, but the output may vary from run to run
-// given the same input (depending on the contents of memory).
-// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
-// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
-// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
-// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
-// The low 12 bits are reserved to control the max # of hash probes per
-// dictionary lookup (see TDEFL_MAX_PROBES_MASK).
-enum {
-  TDEFL_WRITE_ZLIB_HEADER = 0x01000,
-  TDEFL_COMPUTE_ADLER32 = 0x02000,
-  TDEFL_GREEDY_PARSING_FLAG = 0x04000,
-  TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
-  TDEFL_RLE_MATCHES = 0x10000,
-  TDEFL_FILTER_MATCHES = 0x20000,
-  TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
-  TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
-};
-
-// High level compression functions:
-// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block
-// allocated via malloc().
-// On entry:
-//  pSrc_buf, src_buf_len: Pointer and size of source block to compress.
-//  flags: The max match finder probes (default is 128) logically OR'd against
-//  the above flags. Higher probes are slower but improve compression.
-// On return:
-//  Function returns a pointer to the compressed data, or NULL on failure.
-//  *pOut_len will be set to the compressed data's size, which could be larger
-//  than src_buf_len on uncompressible data.
-//  The caller must free() the returned block when it's no longer needed.
-void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len,
-                                 size_t *pOut_len, int flags);
-
-// tdefl_compress_mem_to_mem() compresses a block in memory to another block in
-// memory.
-// Returns 0 on failure.
-size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len,
-                                 const void *pSrc_buf, size_t src_buf_len,
-                                 int flags);
-
-// Compresses an image to a compressed PNG file in memory.
-// On entry:
-//  pImage, w, h, and num_chans describe the image to compress. num_chans may be
-//  1, 2, 3, or 4.
-//  The image pitch in bytes per scanline will be w*num_chans. The leftmost
-//  pixel on the top scanline is stored first in memory.
-//  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED,
-//  MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
-//  If flip is true, the image will be flipped on the Y axis (useful for OpenGL
-//  apps).
-// On return:
-//  Function returns a pointer to the compressed data, or NULL on failure.
-//  *pLen_out will be set to the size of the PNG image file.
-//  The caller must mz_free() the returned heap block (which will typically be
-//  larger than *pLen_out) when it's no longer needed.
-void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w,
-                                                 int h, int num_chans,
-                                                 size_t *pLen_out,
-                                                 mz_uint level, mz_bool flip);
-void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h,
-                                              int num_chans, size_t *pLen_out);
-
-// Output stream interface. The compressor uses this interface to write
-// compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
-typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len,
-                                          void *pUser);
-
-// tdefl_compress_mem_to_output() compresses a block to an output stream. The
-// above helpers use this function internally.
-mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len,
-                                     tdefl_put_buf_func_ptr pPut_buf_func,
-                                     void *pPut_buf_user, int flags);
-
-enum {
-  TDEFL_MAX_HUFF_TABLES = 3,
-  TDEFL_MAX_HUFF_SYMBOLS_0 = 288,
-  TDEFL_MAX_HUFF_SYMBOLS_1 = 32,
-  TDEFL_MAX_HUFF_SYMBOLS_2 = 19,
-  TDEFL_LZ_DICT_SIZE = 32768,
-  TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1,
-  TDEFL_MIN_MATCH_LEN = 3,
-  TDEFL_MAX_MATCH_LEN = 258
-};
-
-// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed
-// output block (using static/fixed Huffman codes).
-#if TDEFL_LESS_MEMORY
-enum {
-  TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024,
-  TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
-  TDEFL_MAX_HUFF_SYMBOLS = 288,
-  TDEFL_LZ_HASH_BITS = 12,
-  TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
-  TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
-  TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
-};
-#else
-enum {
-  TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024,
-  TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
-  TDEFL_MAX_HUFF_SYMBOLS = 288,
-  TDEFL_LZ_HASH_BITS = 15,
-  TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
-  TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
-  TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
-};
-#endif
-
-// The low-level tdefl functions below may be used directly if the above helper
-// functions aren't flexible enough. The low-level functions don't make any heap
-// allocations, unlike the above helper functions.
-typedef enum {
-  TDEFL_STATUS_BAD_PARAM = -2,
-  TDEFL_STATUS_PUT_BUF_FAILED = -1,
-  TDEFL_STATUS_OKAY = 0,
-  TDEFL_STATUS_DONE = 1
-} tdefl_status;
-
-// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
-typedef enum {
-  TDEFL_NO_FLUSH = 0,
-  TDEFL_SYNC_FLUSH = 2,
-  TDEFL_FULL_FLUSH = 3,
-  TDEFL_FINISH = 4
-} tdefl_flush;
-
-// tdefl's compression state structure.
-typedef struct {
-  tdefl_put_buf_func_ptr m_pPut_buf_func;
-  void *m_pPut_buf_user;
-  mz_uint m_flags, m_max_probes[2];
-  int m_greedy_parsing;
-  mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
-  mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
-  mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in,
-      m_bit_buffer;
-  mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit,
-      m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index,
-      m_wants_to_finish;
-  tdefl_status m_prev_return_status;
-  const void *m_pIn_buf;
-  void *m_pOut_buf;
-  size_t *m_pIn_buf_size, *m_pOut_buf_size;
-  tdefl_flush m_flush;
-  const mz_uint8 *m_pSrc;
-  size_t m_src_buf_left, m_out_buf_ofs;
-  mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
-  mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-  mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-  mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-  mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
-  mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
-  mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
-  mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
-} tdefl_compressor;
-
-// Initializes the compressor.
-// There is no corresponding deinit() function because the tdefl API's do not
-// dynamically allocate memory.
-// pBut_buf_func: If NULL, output data will be supplied to the specified
-// callback. In this case, the user should call the tdefl_compress_buffer() API
-// for compression.
-// If pBut_buf_func is NULL the user should always call the tdefl_compress()
-// API.
-// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER,
-// etc.)
-tdefl_status tdefl_init(tdefl_compressor *d,
-                        tdefl_put_buf_func_ptr pPut_buf_func,
-                        void *pPut_buf_user, int flags);
-
-// Compresses a block of data, consuming as much of the specified input buffer
-// as possible, and writing as much compressed data to the specified output
-// buffer as possible.
-tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf,
-                            size_t *pIn_buf_size, void *pOut_buf,
-                            size_t *pOut_buf_size, tdefl_flush flush);
-
-// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a
-// non-NULL tdefl_put_buf_func_ptr.
-// tdefl_compress_buffer() always consumes the entire input buffer.
-tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf,
-                                   size_t in_buf_size, tdefl_flush flush);
-
-tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
-mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
-
-// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't
-// defined, because it uses some of its macros.
-#ifndef MINIZ_NO_ZLIB_APIS
-// Create tdefl_compress() flags given zlib-style compression parameters.
-// level may range from [0,10] (where 10 is absolute max compression, but may be
-// much slower on some files)
-// window_bits may be -15 (raw deflate) or 15 (zlib)
-// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY,
-// MZ_RLE, or MZ_FIXED
-mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits,
-                                                int strategy);
-#endif  // #ifndef MINIZ_NO_ZLIB_APIS
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINIZ_HEADER_INCLUDED
-
-// ------------------- End of Header: Implementation follows. (If you only want
-// the header, define MINIZ_HEADER_FILE_ONLY.)
-
-#ifndef MINIZ_HEADER_FILE_ONLY
-
-typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1];
-typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1];
-typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1];
-
-//#include <assert.h>
-//#include <string.h>
-
-#define MZ_ASSERT(x) assert(x)
-
-#ifdef MINIZ_NO_MALLOC
-#define MZ_MALLOC(x) NULL
-#define MZ_FREE(x) (void)x, ((void)0)
-#define MZ_REALLOC(p, x) NULL
-#else
-#define MZ_MALLOC(x) malloc(x)
-#define MZ_FREE(x) free(x)
-#define MZ_REALLOC(p, x) realloc(p, x)
-#endif
-
-#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b))
-#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b))
-#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-#define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
-#define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
-#else
-#define MZ_READ_LE16(p)                      \
-  ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \
-   ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
-#define MZ_READ_LE32(p)                               \
-  ((mz_uint32)(((const mz_uint8 *)(p))[0]) |          \
-   ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) |  \
-   ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | \
-   ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
-#endif
-
-#ifdef _MSC_VER
-#define MZ_FORCEINLINE __forceinline
-#elif defined(__GNUC__)
-#define MZ_FORCEINLINE inline __attribute__((__always_inline__))
-#else
-#define MZ_FORCEINLINE inline
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// ------------------- zlib-style API's
-
-mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) {
-  mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16);
-  size_t block_len = buf_len % 5552;
-  if (!ptr) return MZ_ADLER32_INIT;
-  while (buf_len) {
-    for (i = 0; i + 7 < block_len; i += 8, ptr += 8) {
-      s1 += ptr[0], s2 += s1;
-      s1 += ptr[1], s2 += s1;
-      s1 += ptr[2], s2 += s1;
-      s1 += ptr[3], s2 += s1;
-      s1 += ptr[4], s2 += s1;
-      s1 += ptr[5], s2 += s1;
-      s1 += ptr[6], s2 += s1;
-      s1 += ptr[7], s2 += s1;
-    }
-    for (; i < block_len; ++i) s1 += *ptr++, s2 += s1;
-    s1 %= 65521U, s2 %= 65521U;
-    buf_len -= block_len;
-    block_len = 5552;
-  }
-  return (s2 << 16) + s1;
-}
-
-// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C
-// implementation that balances processor cache usage against speed":
-// http://www.geocities.com/malbrain/
-mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) {
-  static const mz_uint32 s_crc32[16] = {
-      0,          0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4,
-      0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
-      0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c};
-  mz_uint32 crcu32 = (mz_uint32)crc;
-  if (!ptr) return MZ_CRC32_INIT;
-  crcu32 = ~crcu32;
-  while (buf_len--) {
-    mz_uint8 b = *ptr++;
-    crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)];
-    crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)];
-  }
-  return ~crcu32;
-}
-
-void mz_free(void *p) { MZ_FREE(p); }
-
-#ifndef MINIZ_NO_ZLIB_APIS
-
-static void *def_alloc_func(void *opaque, size_t items, size_t size) {
-  (void)opaque, (void)items, (void)size;
-  return MZ_MALLOC(items * size);
-}
-static void def_free_func(void *opaque, void *address) {
-  (void)opaque, (void)address;
-  MZ_FREE(address);
-}
-// static void *def_realloc_func(void *opaque, void *address, size_t items,
-//                              size_t size) {
-//  (void)opaque, (void)address, (void)items, (void)size;
-//  return MZ_REALLOC(address, items * size);
-//}
-
-const char *mz_version(void) { return MZ_VERSION; }
-
-int mz_deflateInit(mz_streamp pStream, int level) {
-  return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9,
-                         MZ_DEFAULT_STRATEGY);
-}
-
-int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits,
-                    int mem_level, int strategy) {
-  tdefl_compressor *pComp;
-  mz_uint comp_flags =
-      TDEFL_COMPUTE_ADLER32 |
-      tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy);
-
-  if (!pStream) return MZ_STREAM_ERROR;
-  if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) ||
-      ((window_bits != MZ_DEFAULT_WINDOW_BITS) &&
-       (-window_bits != MZ_DEFAULT_WINDOW_BITS)))
-    return MZ_PARAM_ERROR;
-
-  pStream->data_type = 0;
-  pStream->adler = MZ_ADLER32_INIT;
-  pStream->msg = NULL;
-  pStream->reserved = 0;
-  pStream->total_in = 0;
-  pStream->total_out = 0;
-  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
-  if (!pStream->zfree) pStream->zfree = def_free_func;
-
-  pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1,
-                                              sizeof(tdefl_compressor));
-  if (!pComp) return MZ_MEM_ERROR;
-
-  pStream->state = (struct mz_internal_state *)pComp;
-
-  if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) {
-    mz_deflateEnd(pStream);
-    return MZ_PARAM_ERROR;
-  }
-
-  return MZ_OK;
-}
-
-int mz_deflateReset(mz_streamp pStream) {
-  if ((!pStream) || (!pStream->state) || (!pStream->zalloc) ||
-      (!pStream->zfree))
-    return MZ_STREAM_ERROR;
-  pStream->total_in = pStream->total_out = 0;
-  tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL,
-             ((tdefl_compressor *)pStream->state)->m_flags);
-  return MZ_OK;
-}
-
-int mz_deflate(mz_streamp pStream, int flush) {
-  size_t in_bytes, out_bytes;
-  mz_ulong orig_total_in, orig_total_out;
-  int mz_status = MZ_OK;
-
-  if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) ||
-      (!pStream->next_out))
-    return MZ_STREAM_ERROR;
-  if (!pStream->avail_out) return MZ_BUF_ERROR;
-
-  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
-
-  if (((tdefl_compressor *)pStream->state)->m_prev_return_status ==
-      TDEFL_STATUS_DONE)
-    return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR;
-
-  orig_total_in = pStream->total_in;
-  orig_total_out = pStream->total_out;
-  for (;;) {
-    tdefl_status defl_status;
-    in_bytes = pStream->avail_in;
-    out_bytes = pStream->avail_out;
-
-    defl_status = tdefl_compress((tdefl_compressor *)pStream->state,
-                                 pStream->next_in, &in_bytes, pStream->next_out,
-                                 &out_bytes, (tdefl_flush)flush);
-    pStream->next_in += (mz_uint)in_bytes;
-    pStream->avail_in -= (mz_uint)in_bytes;
-    pStream->total_in += (mz_uint)in_bytes;
-    pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state);
-
-    pStream->next_out += (mz_uint)out_bytes;
-    pStream->avail_out -= (mz_uint)out_bytes;
-    pStream->total_out += (mz_uint)out_bytes;
-
-    if (defl_status < 0) {
-      mz_status = MZ_STREAM_ERROR;
-      break;
-    } else if (defl_status == TDEFL_STATUS_DONE) {
-      mz_status = MZ_STREAM_END;
-      break;
-    } else if (!pStream->avail_out)
-      break;
-    else if ((!pStream->avail_in) && (flush != MZ_FINISH)) {
-      if ((flush) || (pStream->total_in != orig_total_in) ||
-          (pStream->total_out != orig_total_out))
-        break;
-      return MZ_BUF_ERROR;  // Can't make forward progress without some input.
-    }
-  }
-  return mz_status;
-}
-
-int mz_deflateEnd(mz_streamp pStream) {
-  if (!pStream) return MZ_STREAM_ERROR;
-  if (pStream->state) {
-    pStream->zfree(pStream->opaque, pStream->state);
-    pStream->state = NULL;
-  }
-  return MZ_OK;
-}
-
-mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) {
-  (void)pStream;
-  // This is really over conservative. (And lame, but it's actually pretty
-  // tricky to compute a true upper bound given the way tdefl's blocking works.)
-  return MZ_MAX(128 + (source_len * 110) / 100,
-                128 + source_len + ((source_len / (31 * 1024)) + 1) * 5);
-}
-
-int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len,
-                 const unsigned char *pSource, mz_ulong source_len, int level) {
-  int status;
-  mz_stream stream;
-  memset(&stream, 0, sizeof(stream));
-
-  // In case mz_ulong is 64-bits (argh I hate longs).
-  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
-
-  stream.next_in = pSource;
-  stream.avail_in = (mz_uint32)source_len;
-  stream.next_out = pDest;
-  stream.avail_out = (mz_uint32)*pDest_len;
-
-  status = mz_deflateInit(&stream, level);
-  if (status != MZ_OK) return status;
-
-  status = mz_deflate(&stream, MZ_FINISH);
-  if (status != MZ_STREAM_END) {
-    mz_deflateEnd(&stream);
-    return (status == MZ_OK) ? MZ_BUF_ERROR : status;
-  }
-
-  *pDest_len = stream.total_out;
-  return mz_deflateEnd(&stream);
-}
-
-int mz_compress(unsigned char *pDest, mz_ulong *pDest_len,
-                const unsigned char *pSource, mz_ulong source_len) {
-  return mz_compress2(pDest, pDest_len, pSource, source_len,
-                      MZ_DEFAULT_COMPRESSION);
-}
-
-mz_ulong mz_compressBound(mz_ulong source_len) {
-  return mz_deflateBound(NULL, source_len);
-}
-
-typedef struct {
-  tinfl_decompressor m_decomp;
-  mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed;
-  int m_window_bits;
-  mz_uint8 m_dict[TINFL_LZ_DICT_SIZE];
-  tinfl_status m_last_status;
-} inflate_state;
-
-int mz_inflateInit2(mz_streamp pStream, int window_bits) {
-  inflate_state *pDecomp;
-  if (!pStream) return MZ_STREAM_ERROR;
-  if ((window_bits != MZ_DEFAULT_WINDOW_BITS) &&
-      (-window_bits != MZ_DEFAULT_WINDOW_BITS))
-    return MZ_PARAM_ERROR;
-
-  pStream->data_type = 0;
-  pStream->adler = 0;
-  pStream->msg = NULL;
-  pStream->total_in = 0;
-  pStream->total_out = 0;
-  pStream->reserved = 0;
-  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
-  if (!pStream->zfree) pStream->zfree = def_free_func;
-
-  pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1,
-                                             sizeof(inflate_state));
-  if (!pDecomp) return MZ_MEM_ERROR;
-
-  pStream->state = (struct mz_internal_state *)pDecomp;
-
-  tinfl_init(&pDecomp->m_decomp);
-  pDecomp->m_dict_ofs = 0;
-  pDecomp->m_dict_avail = 0;
-  pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
-  pDecomp->m_first_call = 1;
-  pDecomp->m_has_flushed = 0;
-  pDecomp->m_window_bits = window_bits;
-
-  return MZ_OK;
-}
-
-int mz_inflateInit(mz_streamp pStream) {
-  return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS);
-}
-
-int mz_inflate(mz_streamp pStream, int flush) {
-  inflate_state *pState;
-  mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32;
-  size_t in_bytes, out_bytes, orig_avail_in;
-  tinfl_status status;
-
-  if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR;
-  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
-  if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH))
-    return MZ_STREAM_ERROR;
-
-  pState = (inflate_state *)pStream->state;
-  if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
-  orig_avail_in = pStream->avail_in;
-
-  first_call = pState->m_first_call;
-  pState->m_first_call = 0;
-  if (pState->m_last_status < 0) return MZ_DATA_ERROR;
-
-  if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR;
-  pState->m_has_flushed |= (flush == MZ_FINISH);
-
-  if ((flush == MZ_FINISH) && (first_call)) {
-    // MZ_FINISH on the first call implies that the input and output buffers are
-    // large enough to hold the entire compressed/decompressed file.
-    decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
-    in_bytes = pStream->avail_in;
-    out_bytes = pStream->avail_out;
-    status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes,
-                              pStream->next_out, pStream->next_out, &out_bytes,
-                              decomp_flags);
-    pState->m_last_status = status;
-    pStream->next_in += (mz_uint)in_bytes;
-    pStream->avail_in -= (mz_uint)in_bytes;
-    pStream->total_in += (mz_uint)in_bytes;
-    pStream->adler = tinfl_get_adler32(&pState->m_decomp);
-    pStream->next_out += (mz_uint)out_bytes;
-    pStream->avail_out -= (mz_uint)out_bytes;
-    pStream->total_out += (mz_uint)out_bytes;
-
-    if (status < 0)
-      return MZ_DATA_ERROR;
-    else if (status != TINFL_STATUS_DONE) {
-      pState->m_last_status = TINFL_STATUS_FAILED;
-      return MZ_BUF_ERROR;
-    }
-    return MZ_STREAM_END;
-  }
-  // flush != MZ_FINISH then we must assume there's more input.
-  if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;
-
-  if (pState->m_dict_avail) {
-    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
-    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
-    pStream->next_out += n;
-    pStream->avail_out -= n;
-    pStream->total_out += n;
-    pState->m_dict_avail -= n;
-    pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
-    return ((pState->m_last_status == TINFL_STATUS_DONE) &&
-            (!pState->m_dict_avail))
-               ? MZ_STREAM_END
-               : MZ_OK;
-  }
-
-  for (;;) {
-    in_bytes = pStream->avail_in;
-    out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;
-
-    status = tinfl_decompress(
-        &pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict,
-        pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags);
-    pState->m_last_status = status;
-
-    pStream->next_in += (mz_uint)in_bytes;
-    pStream->avail_in -= (mz_uint)in_bytes;
-    pStream->total_in += (mz_uint)in_bytes;
-    pStream->adler = tinfl_get_adler32(&pState->m_decomp);
-
-    pState->m_dict_avail = (mz_uint)out_bytes;
-
-    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
-    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
-    pStream->next_out += n;
-    pStream->avail_out -= n;
-    pStream->total_out += n;
-    pState->m_dict_avail -= n;
-    pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
-
-    if (status < 0)
-      return MZ_DATA_ERROR;  // Stream is corrupted (there could be some
-    // uncompressed data left in the output dictionary -
-    // oh well).
-    else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in))
-      return MZ_BUF_ERROR;  // Signal caller that we can't make forward progress
-                            // without supplying more input or by setting flush
-                            // to MZ_FINISH.
-    else if (flush == MZ_FINISH) {
-      // The output buffer MUST be large to hold the remaining uncompressed data
-      // when flush==MZ_FINISH.
-      if (status == TINFL_STATUS_DONE)
-        return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
-      // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's
-      // at least 1 more byte on the way. If there's no more room left in the
-      // output buffer then something is wrong.
-      else if (!pStream->avail_out)
-        return MZ_BUF_ERROR;
-    } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) ||
-               (!pStream->avail_out) || (pState->m_dict_avail))
-      break;
-  }
-
-  return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail))
-             ? MZ_STREAM_END
-             : MZ_OK;
-}
-
-int mz_inflateEnd(mz_streamp pStream) {
-  if (!pStream) return MZ_STREAM_ERROR;
-  if (pStream->state) {
-    pStream->zfree(pStream->opaque, pStream->state);
-    pStream->state = NULL;
-  }
-  return MZ_OK;
-}
-
-int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len,
-                  const unsigned char *pSource, mz_ulong source_len) {
-  mz_stream stream;
-  int status;
-  memset(&stream, 0, sizeof(stream));
-
-  // In case mz_ulong is 64-bits (argh I hate longs).
-  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
-
-  stream.next_in = pSource;
-  stream.avail_in = (mz_uint32)source_len;
-  stream.next_out = pDest;
-  stream.avail_out = (mz_uint32)*pDest_len;
-
-  status = mz_inflateInit(&stream);
-  if (status != MZ_OK) return status;
-
-  status = mz_inflate(&stream, MZ_FINISH);
-  if (status != MZ_STREAM_END) {
-    mz_inflateEnd(&stream);
-    return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR
-                                                            : status;
-  }
-  *pDest_len = stream.total_out;
-
-  return mz_inflateEnd(&stream);
-}
-
-const char *mz_error(int err) {
-  static struct {
-    int m_err;
-    const char *m_pDesc;
-  } s_error_descs[] = {{MZ_OK, ""},
-                       {MZ_STREAM_END, "stream end"},
-                       {MZ_NEED_DICT, "need dictionary"},
-                       {MZ_ERRNO, "file error"},
-                       {MZ_STREAM_ERROR, "stream error"},
-                       {MZ_DATA_ERROR, "data error"},
-                       {MZ_MEM_ERROR, "out of memory"},
-                       {MZ_BUF_ERROR, "buf error"},
-                       {MZ_VERSION_ERROR, "version error"},
-                       {MZ_PARAM_ERROR, "parameter error"}};
-  mz_uint i;
-  for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i)
-    if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc;
-  return NULL;
-}
-
-#endif  // MINIZ_NO_ZLIB_APIS
-
-// ------------------- Low-level Decompression (completely independent from all
-// compression API's)
-
-#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
-#define TINFL_MEMSET(p, c, l) memset(p, c, l)
-
-#define TINFL_CR_BEGIN  \
-  switch (r->m_state) { \
-    case 0:
-#define TINFL_CR_RETURN(state_index, result) \
-  do {                                       \
-    status = result;                         \
-    r->m_state = state_index;                \
-    goto common_exit;                        \
-    case state_index:;                       \
-  }                                          \
-  MZ_MACRO_END
-#define TINFL_CR_RETURN_FOREVER(state_index, result) \
-  do {                                               \
-    for (;;) {                                       \
-      TINFL_CR_RETURN(state_index, result);          \
-    }                                                \
-  }                                                  \
-  MZ_MACRO_END
-#define TINFL_CR_FINISH }
-
-// TODO: If the caller has indicated that there's no more input, and we attempt
-// to read beyond the input buf, then something is wrong with the input because
-// the inflator never
-// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of
-// the stream with 0's in this scenario.
-#define TINFL_GET_BYTE(state_index, c)                                 \
-  do {                                                                 \
-    if (pIn_buf_cur >= pIn_buf_end) {                                  \
-      for (;;) {                                                       \
-        if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) {                \
-          TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \
-          if (pIn_buf_cur < pIn_buf_end) {                             \
-            c = *pIn_buf_cur++;                                        \
-            break;                                                     \
-          }                                                            \
-        } else {                                                       \
-          c = 0;                                                       \
-          break;                                                       \
-        }                                                              \
-      }                                                                \
-    } else                                                             \
-      c = *pIn_buf_cur++;                                              \
-  }                                                                    \
-  MZ_MACRO_END
-
-#define TINFL_NEED_BITS(state_index, n)            \
-  do {                                             \
-    mz_uint c;                                     \
-    TINFL_GET_BYTE(state_index, c);                \
-    bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \
-    num_bits += 8;                                 \
-  } while (num_bits < (mz_uint)(n))
-#define TINFL_SKIP_BITS(state_index, n) \
-  do {                                  \
-    if (num_bits < (mz_uint)(n)) {      \
-      TINFL_NEED_BITS(state_index, n);  \
-    }                                   \
-    bit_buf >>= (n);                    \
-    num_bits -= (n);                    \
-  }                                     \
-  MZ_MACRO_END
-#define TINFL_GET_BITS(state_index, b, n) \
-  do {                                    \
-    if (num_bits < (mz_uint)(n)) {        \
-      TINFL_NEED_BITS(state_index, n);    \
-    }                                     \
-    b = bit_buf & ((1 << (n)) - 1);       \
-    bit_buf >>= (n);                      \
-    num_bits -= (n);                      \
-  }                                       \
-  MZ_MACRO_END
-
-// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes
-// remaining in the input buffer falls below 2.
-// It reads just enough bytes from the input stream that are needed to decode
-// the next Huffman code (and absolutely no more). It works by trying to fully
-// decode a
-// Huffman code by using whatever bits are currently present in the bit buffer.
-// If this fails, it reads another byte, and tries again until it succeeds or
-// until the
-// bit buffer contains >=15 bits (deflate's max. Huffman code size).
-#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff)                     \
-  do {                                                                 \
-    temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \
-    if (temp >= 0) {                                                   \
-      code_len = temp >> 9;                                            \
-      if ((code_len) && (num_bits >= code_len)) break;                 \
-    } else if (num_bits > TINFL_FAST_LOOKUP_BITS) {                    \
-      code_len = TINFL_FAST_LOOKUP_BITS;                               \
-      do {                                                             \
-        temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \
-      } while ((temp < 0) && (num_bits >= (code_len + 1)));            \
-      if (temp >= 0) break;                                            \
-    }                                                                  \
-    TINFL_GET_BYTE(state_index, c);                                    \
-    bit_buf |= (((tinfl_bit_buf_t)c) << num_bits);                     \
-    num_bits += 8;                                                     \
-  } while (num_bits < 15);
-
-// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex
-// than you would initially expect because the zlib API expects the decompressor
-// to never read
-// beyond the final byte of the deflate stream. (In other words, when this macro
-// wants to read another byte from the input, it REALLY needs another byte in
-// order to fully
-// decode the next Huffman code.) Handling this properly is particularly
-// important on raw deflate (non-zlib) streams, which aren't followed by a byte
-// aligned adler-32.
-// The slow path is only executed at the very end of the input buffer.
-#define TINFL_HUFF_DECODE(state_index, sym, pHuff)                             \
-  do {                                                                         \
-    int temp;                                                                  \
-    mz_uint code_len, c;                                                       \
-    if (num_bits < 15) {                                                       \
-      if ((pIn_buf_end - pIn_buf_cur) < 2) {                                   \
-        TINFL_HUFF_BITBUF_FILL(state_index, pHuff);                            \
-      } else {                                                                 \
-        bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) |           \
-                   (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8));      \
-        pIn_buf_cur += 2;                                                      \
-        num_bits += 16;                                                        \
-      }                                                                        \
-    }                                                                          \
-    if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= \
-        0)                                                                     \
-      code_len = temp >> 9, temp &= 511;                                       \
-    else {                                                                     \
-      code_len = TINFL_FAST_LOOKUP_BITS;                                       \
-      do {                                                                     \
-        temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)];         \
-      } while (temp < 0);                                                      \
-    }                                                                          \
-    sym = temp;                                                                \
-    bit_buf >>= code_len;                                                      \
-    num_bits -= code_len;                                                      \
-  }                                                                            \
-  MZ_MACRO_END
-
-tinfl_status tinfl_decompress(tinfl_decompressor *r,
-                              const mz_uint8 *pIn_buf_next,
-                              size_t *pIn_buf_size, mz_uint8 *pOut_buf_start,
-                              mz_uint8 *pOut_buf_next, size_t *pOut_buf_size,
-                              const mz_uint32 decomp_flags) {
-  static const int s_length_base[31] = {
-      3,  4,  5,  6,  7,  8,  9,  10,  11,  13,  15,  17,  19,  23, 27, 31,
-      35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0,  0};
-  static const int s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
-                                         1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4,
-                                         4, 4, 5, 5, 5, 5, 0, 0, 0};
-  static const int s_dist_base[32] = {
-      1,    2,    3,    4,    5,    7,     9,     13,    17,  25,   33,
-      49,   65,   97,   129,  193,  257,   385,   513,   769, 1025, 1537,
-      2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0,   0};
-  static const int s_dist_extra[32] = {0, 0, 0,  0,  1,  1,  2,  2,  3,  3,
-                                       4, 4, 5,  5,  6,  6,  7,  7,  8,  8,
-                                       9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
-  static const mz_uint8 s_length_dezigzag[19] = {
-      16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-  static const int s_min_table_sizes[3] = {257, 1, 4};
-
-  tinfl_status status = TINFL_STATUS_FAILED;
-  mz_uint32 num_bits, dist, counter, num_extra;
-  tinfl_bit_buf_t bit_buf;
-  const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end =
-                                                  pIn_buf_next + *pIn_buf_size;
-  mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end =
-                                              pOut_buf_next + *pOut_buf_size;
-  size_t out_buf_size_mask =
-             (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)
-                 ? (size_t)-1
-                 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1,
-         dist_from_out_buf_start;
-
-  // Ensure the output buffer's size is a power of 2, unless the output buffer
-  // is large enough to hold the entire output file (in which case it doesn't
-  // matter).
-  if (((out_buf_size_mask + 1) & out_buf_size_mask) ||
-      (pOut_buf_next < pOut_buf_start)) {
-    *pIn_buf_size = *pOut_buf_size = 0;
-    return TINFL_STATUS_BAD_PARAM;
-  }
-
-  num_bits = r->m_num_bits;
-  bit_buf = r->m_bit_buf;
-  dist = r->m_dist;
-  counter = r->m_counter;
-  num_extra = r->m_num_extra;
-  dist_from_out_buf_start = r->m_dist_from_out_buf_start;
-  TINFL_CR_BEGIN
-
-  bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0;
-  r->m_z_adler32 = r->m_check_adler32 = 1;
-  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) {
-    TINFL_GET_BYTE(1, r->m_zhdr0);
-    TINFL_GET_BYTE(2, r->m_zhdr1);
-    counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) ||
-               (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
-    if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
-      counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) ||
-                  ((out_buf_size_mask + 1) <
-                   (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4)))));
-    if (counter) {
-      TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED);
-    }
-  }
-
-  do {
-    TINFL_GET_BITS(3, r->m_final, 3);
-    r->m_type = r->m_final >> 1;
-    if (r->m_type == 0) {
-      TINFL_SKIP_BITS(5, num_bits & 7);
-      for (counter = 0; counter < 4; ++counter) {
-        if (num_bits)
-          TINFL_GET_BITS(6, r->m_raw_header[counter], 8);
-        else
-          TINFL_GET_BYTE(7, r->m_raw_header[counter]);
-      }
-      if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) !=
-          (mz_uint)(0xFFFF ^
-                    (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) {
-        TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED);
-      }
-      while ((counter) && (num_bits)) {
-        TINFL_GET_BITS(51, dist, 8);
-        while (pOut_buf_cur >= pOut_buf_end) {
-          TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT);
-        }
-        *pOut_buf_cur++ = (mz_uint8)dist;
-        counter--;
-      }
-      while (counter) {
-        size_t n;
-        while (pOut_buf_cur >= pOut_buf_end) {
-          TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT);
-        }
-        while (pIn_buf_cur >= pIn_buf_end) {
-          if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) {
-            TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT);
-          } else {
-            TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED);
-          }
-        }
-        n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur),
-                          (size_t)(pIn_buf_end - pIn_buf_cur)),
-                   counter);
-        TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n);
-        pIn_buf_cur += n;
-        pOut_buf_cur += n;
-        counter -= (mz_uint)n;
-      }
-    } else if (r->m_type == 3) {
-      TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED);
-    } else {
-      if (r->m_type == 1) {
-        mz_uint8 *p = r->m_tables[0].m_code_size;
-        mz_uint i;
-        r->m_table_sizes[0] = 288;
-        r->m_table_sizes[1] = 32;
-        TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32);
-        for (i = 0; i <= 143; ++i) *p++ = 8;
-        for (; i <= 255; ++i) *p++ = 9;
-        for (; i <= 279; ++i) *p++ = 7;
-        for (; i <= 287; ++i) *p++ = 8;
-      } else {
-        for (counter = 0; counter < 3; counter++) {
-          TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]);
-          r->m_table_sizes[counter] += s_min_table_sizes[counter];
-        }
-        MZ_CLEAR_OBJ(r->m_tables[2].m_code_size);
-        for (counter = 0; counter < r->m_table_sizes[2]; counter++) {
-          mz_uint s;
-          TINFL_GET_BITS(14, s, 3);
-          r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s;
-        }
-        r->m_table_sizes[2] = 19;
-      }
-      for (; (int)r->m_type >= 0; r->m_type--) {
-        int tree_next, tree_cur;
-        tinfl_huff_table *pTable;
-        mz_uint i, j, used_syms, total, sym_index, next_code[17],
-            total_syms[16];
-        pTable = &r->m_tables[r->m_type];
-        MZ_CLEAR_OBJ(total_syms);
-        MZ_CLEAR_OBJ(pTable->m_look_up);
-        MZ_CLEAR_OBJ(pTable->m_tree);
-        for (i = 0; i < r->m_table_sizes[r->m_type]; ++i)
-          total_syms[pTable->m_code_size[i]]++;
-        used_syms = 0, total = 0;
-        next_code[0] = next_code[1] = 0;
-        for (i = 1; i <= 15; ++i) {
-          used_syms += total_syms[i];
-          next_code[i + 1] = (total = ((total + total_syms[i]) << 1));
-        }
-        if ((65536 != total) && (used_syms > 1)) {
-          TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED);
-        }
-        for (tree_next = -1, sym_index = 0;
-             sym_index < r->m_table_sizes[r->m_type]; ++sym_index) {
-          mz_uint rev_code = 0, l, cur_code,
-                  code_size = pTable->m_code_size[sym_index];
-          if (!code_size) continue;
-          cur_code = next_code[code_size]++;
-          for (l = code_size; l > 0; l--, cur_code >>= 1)
-            rev_code = (rev_code << 1) | (cur_code & 1);
-          if (code_size <= TINFL_FAST_LOOKUP_BITS) {
-            mz_int16 k = (mz_int16)((code_size << 9) | sym_index);
-            while (rev_code < TINFL_FAST_LOOKUP_SIZE) {
-              pTable->m_look_up[rev_code] = k;
-              rev_code += (1 << code_size);
-            }
-            continue;
-          }
-          if (0 ==
-              (tree_cur = pTable->m_look_up[rev_code &
-                                            (TINFL_FAST_LOOKUP_SIZE - 1)])) {
-            pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] =
-                (mz_int16)tree_next;
-            tree_cur = tree_next;
-            tree_next -= 2;
-          }
-          rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1);
-          for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) {
-            tree_cur -= ((rev_code >>= 1) & 1);
-            if (!pTable->m_tree[-tree_cur - 1]) {
-              pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next;
-              tree_cur = tree_next;
-              tree_next -= 2;
-            } else
-              tree_cur = pTable->m_tree[-tree_cur - 1];
-          }
-          tree_cur -= ((rev_code >>= 1) & 1);
-          pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index;
-        }
-        if (r->m_type == 2) {
-          for (counter = 0;
-               counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) {
-            mz_uint s;
-            TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]);
-            if (dist < 16) {
-              r->m_len_codes[counter++] = (mz_uint8)dist;
-              continue;
-            }
-            if ((dist == 16) && (!counter)) {
-              TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED);
-            }
-            num_extra = "\02\03\07"[dist - 16];
-            TINFL_GET_BITS(18, s, num_extra);
-            s += "\03\03\013"[dist - 16];
-            TINFL_MEMSET(r->m_len_codes + counter,
-                         (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
-            counter += s;
-          }
-          if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) {
-            TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
-          }
-          TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes,
-                       r->m_table_sizes[0]);
-          TINFL_MEMCPY(r->m_tables[1].m_code_size,
-                       r->m_len_codes + r->m_table_sizes[0],
-                       r->m_table_sizes[1]);
-        }
-      }
-      for (;;) {
-        mz_uint8 *pSrc;
-        for (;;) {
-          if (((pIn_buf_end - pIn_buf_cur) < 4) ||
-              ((pOut_buf_end - pOut_buf_cur) < 2)) {
-            TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]);
-            if (counter >= 256) break;
-            while (pOut_buf_cur >= pOut_buf_end) {
-              TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT);
-            }
-            *pOut_buf_cur++ = (mz_uint8)counter;
-          } else {
-            int sym2;
-            mz_uint code_len;
-#if TINFL_USE_64BIT_BITBUF
-            if (num_bits < 30) {
-              bit_buf |=
-                  (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits);
-              pIn_buf_cur += 4;
-              num_bits += 32;
-            }
-#else
-            if (num_bits < 15) {
-              bit_buf |=
-                  (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits);
-              pIn_buf_cur += 2;
-              num_bits += 16;
-            }
-#endif
-            if ((sym2 =
-                     r->m_tables[0]
-                         .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >=
-                0)
-              code_len = sym2 >> 9;
-            else {
-              code_len = TINFL_FAST_LOOKUP_BITS;
-              do {
-                sym2 = r->m_tables[0]
-                           .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)];
-              } while (sym2 < 0);
-            }
-            counter = sym2;
-            bit_buf >>= code_len;
-            num_bits -= code_len;
-            if (counter & 256) break;
-
-#if !TINFL_USE_64BIT_BITBUF
-            if (num_bits < 15) {
-              bit_buf |=
-                  (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits);
-              pIn_buf_cur += 2;
-              num_bits += 16;
-            }
-#endif
-            if ((sym2 =
-                     r->m_tables[0]
-                         .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >=
-                0)
-              code_len = sym2 >> 9;
-            else {
-              code_len = TINFL_FAST_LOOKUP_BITS;
-              do {
-                sym2 = r->m_tables[0]
-                           .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)];
-              } while (sym2 < 0);
-            }
-            bit_buf >>= code_len;
-            num_bits -= code_len;
-
-            pOut_buf_cur[0] = (mz_uint8)counter;
-            if (sym2 & 256) {
-              pOut_buf_cur++;
-              counter = sym2;
-              break;
-            }
-            pOut_buf_cur[1] = (mz_uint8)sym2;
-            pOut_buf_cur += 2;
-          }
-        }
-        if ((counter &= 511) == 256) break;
-
-        num_extra = s_length_extra[counter - 257];
-        counter = s_length_base[counter - 257];
-        if (num_extra) {
-          mz_uint extra_bits;
-          TINFL_GET_BITS(25, extra_bits, num_extra);
-          counter += extra_bits;
-        }
-
-        TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]);
-        num_extra = s_dist_extra[dist];
-        dist = s_dist_base[dist];
-        if (num_extra) {
-          mz_uint extra_bits;
-          TINFL_GET_BITS(27, extra_bits, num_extra);
-          dist += extra_bits;
-        }
-
-        dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
-        if ((dist > dist_from_out_buf_start) &&
-            (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) {
-          TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED);
-        }
-
-        pSrc = pOut_buf_start +
-               ((dist_from_out_buf_start - dist) & out_buf_size_mask);
-
-        if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) {
-          while (counter--) {
-            while (pOut_buf_cur >= pOut_buf_end) {
-              TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT);
-            }
-            *pOut_buf_cur++ =
-                pOut_buf_start[(dist_from_out_buf_start++ - dist) &
-                               out_buf_size_mask];
-          }
-          continue;
-        }
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
-        else if ((counter >= 9) && (counter <= dist)) {
-          const mz_uint8 *pSrc_end = pSrc + (counter & ~7);
-          do {
-            ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0];
-            ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1];
-            pOut_buf_cur += 8;
-          } while ((pSrc += 8) < pSrc_end);
-          if ((counter &= 7) < 3) {
-            if (counter) {
-              pOut_buf_cur[0] = pSrc[0];
-              if (counter > 1) pOut_buf_cur[1] = pSrc[1];
-              pOut_buf_cur += counter;
-            }
-            continue;
-          }
-        }
-#endif
-        do {
-          pOut_buf_cur[0] = pSrc[0];
-          pOut_buf_cur[1] = pSrc[1];
-          pOut_buf_cur[2] = pSrc[2];
-          pOut_buf_cur += 3;
-          pSrc += 3;
-        } while ((int)(counter -= 3) > 2);
-        if ((int)counter > 0) {
-          pOut_buf_cur[0] = pSrc[0];
-          if ((int)counter > 1) pOut_buf_cur[1] = pSrc[1];
-          pOut_buf_cur += counter;
-        }
-      }
-    }
-  } while (!(r->m_final & 1));
-  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) {
-    TINFL_SKIP_BITS(32, num_bits & 7);
-    for (counter = 0; counter < 4; ++counter) {
-      mz_uint s;
-      if (num_bits)
-        TINFL_GET_BITS(41, s, 8);
-      else
-        TINFL_GET_BYTE(42, s);
-      r->m_z_adler32 = (r->m_z_adler32 << 8) | s;
-    }
-  }
-  TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE);
-  TINFL_CR_FINISH
-
-common_exit:
-  r->m_num_bits = num_bits;
-  r->m_bit_buf = bit_buf;
-  r->m_dist = dist;
-  r->m_counter = counter;
-  r->m_num_extra = num_extra;
-  r->m_dist_from_out_buf_start = dist_from_out_buf_start;
-  *pIn_buf_size = pIn_buf_cur - pIn_buf_next;
-  *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
-  if ((decomp_flags &
-       (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) &&
-      (status >= 0)) {
-    const mz_uint8 *ptr = pOut_buf_next;
-    size_t buf_len = *pOut_buf_size;
-    mz_uint32 i, s1 = r->m_check_adler32 & 0xffff,
-                 s2 = r->m_check_adler32 >> 16;
-    size_t block_len = buf_len % 5552;
-    while (buf_len) {
-      for (i = 0; i + 7 < block_len; i += 8, ptr += 8) {
-        s1 += ptr[0], s2 += s1;
-        s1 += ptr[1], s2 += s1;
-        s1 += ptr[2], s2 += s1;
-        s1 += ptr[3], s2 += s1;
-        s1 += ptr[4], s2 += s1;
-        s1 += ptr[5], s2 += s1;
-        s1 += ptr[6], s2 += s1;
-        s1 += ptr[7], s2 += s1;
-      }
-      for (; i < block_len; ++i) s1 += *ptr++, s2 += s1;
-      s1 %= 65521U, s2 %= 65521U;
-      buf_len -= block_len;
-      block_len = 5552;
-    }
-    r->m_check_adler32 = (s2 << 16) + s1;
-    if ((status == TINFL_STATUS_DONE) &&
-        (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) &&
-        (r->m_check_adler32 != r->m_z_adler32))
-      status = TINFL_STATUS_ADLER32_MISMATCH;
-  }
-  return status;
-}
-
-// Higher level helper functions.
-void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len,
-                                   size_t *pOut_len, int flags) {
-  tinfl_decompressor decomp;
-  void *pBuf = NULL, *pNew_buf;
-  size_t src_buf_ofs = 0, out_buf_capacity = 0;
-  *pOut_len = 0;
-  tinfl_init(&decomp);
-  for (;;) {
-    size_t src_buf_size = src_buf_len - src_buf_ofs,
-           dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
-    tinfl_status status = tinfl_decompress(
-        &decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size,
-        (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL,
-        &dst_buf_size,
-        (flags & ~TINFL_FLAG_HAS_MORE_INPUT) |
-            TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
-    if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) {
-      MZ_FREE(pBuf);
-      *pOut_len = 0;
-      return NULL;
-    }
-    src_buf_ofs += src_buf_size;
-    *pOut_len += dst_buf_size;
-    if (status == TINFL_STATUS_DONE) break;
-    new_out_buf_capacity = out_buf_capacity * 2;
-    if (new_out_buf_capacity < 128) new_out_buf_capacity = 128;
-    pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity);
-    if (!pNew_buf) {
-      MZ_FREE(pBuf);
-      *pOut_len = 0;
-      return NULL;
-    }
-    pBuf = pNew_buf;
-    out_buf_capacity = new_out_buf_capacity;
-  }
-  return pBuf;
-}
-
-size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len,
-                                   const void *pSrc_buf, size_t src_buf_len,
-                                   int flags) {
-  tinfl_decompressor decomp;
-  tinfl_status status;
-  tinfl_init(&decomp);
-  status =
-      tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len,
-                       (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len,
-                       (flags & ~TINFL_FLAG_HAS_MORE_INPUT) |
-                           TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
-  return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED
-                                       : out_buf_len;
-}
-
-int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size,
-                                     tinfl_put_buf_func_ptr pPut_buf_func,
-                                     void *pPut_buf_user, int flags) {
-  int result = 0;
-  tinfl_decompressor decomp;
-  mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE);
-  size_t in_buf_ofs = 0, dict_ofs = 0;
-  if (!pDict) return TINFL_STATUS_FAILED;
-  tinfl_init(&decomp);
-  for (;;) {
-    size_t in_buf_size = *pIn_buf_size - in_buf_ofs,
-           dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
-    tinfl_status status =
-        tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs,
-                         &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size,
-                         (flags & ~(TINFL_FLAG_HAS_MORE_INPUT |
-                                    TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)));
-    in_buf_ofs += in_buf_size;
-    if ((dst_buf_size) &&
-        (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user)))
-      break;
-    if (status != TINFL_STATUS_HAS_MORE_OUTPUT) {
-      result = (status == TINFL_STATUS_DONE);
-      break;
-    }
-    dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1);
-  }
-  MZ_FREE(pDict);
-  *pIn_buf_size = in_buf_ofs;
-  return result;
-}
-
-// ------------------- Low-level Compression (independent from all decompression
-// API's)
-
-// Purposely making these tables static for faster init and thread safety.
-static const mz_uint16 s_tdefl_len_sym[256] = {
-    257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268,
-    268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272,
-    272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274,
-    274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276,
-    276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
-    277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
-    278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279,
-    279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280,
-    280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281,
-    281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
-    281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282,
-    282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
-    282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283,
-    283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
-    283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284,
-    284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
-    284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
-    285};
-
-static const mz_uint8 s_tdefl_len_extra[256] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0};
-
-static const mz_uint8 s_tdefl_small_dist_sym[512] = {
-    0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,
-    8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-    14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-    15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-    17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17};
-
-static const mz_uint8 s_tdefl_small_dist_extra[512] = {
-    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
-
-static const mz_uint8 s_tdefl_large_dist_sym[128] = {
-    0,  0,  18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24,
-    24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26,
-    26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-    27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-    28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-    28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-    29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29};
-
-static const mz_uint8 s_tdefl_large_dist_extra[128] = {
-    0,  0,  8,  8,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
-    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13};
-
-// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted
-// values.
-typedef struct {
-  mz_uint16 m_key, m_sym_index;
-} tdefl_sym_freq;
-static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms,
-                                             tdefl_sym_freq *pSyms0,
-                                             tdefl_sym_freq *pSyms1) {
-  mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2];
-  tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
-  MZ_CLEAR_OBJ(hist);
-  for (i = 0; i < num_syms; i++) {
-    mz_uint freq = pSyms0[i].m_key;
-    hist[freq & 0xFF]++;
-    hist[256 + ((freq >> 8) & 0xFF)]++;
-  }
-  while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
-    total_passes--;
-  for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) {
-    const mz_uint32 *pHist = &hist[pass << 8];
-    mz_uint offsets[256], cur_ofs = 0;
-    for (i = 0; i < 256; i++) {
-      offsets[i] = cur_ofs;
-      cur_ofs += pHist[i];
-    }
-    for (i = 0; i < num_syms; i++)
-      pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] =
-          pCur_syms[i];
-    {
-      tdefl_sym_freq *t = pCur_syms;
-      pCur_syms = pNew_syms;
-      pNew_syms = t;
-    }
-  }
-  return pCur_syms;
-}
-
-// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat,
-// alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
-static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) {
-  int root, leaf, next, avbl, used, dpth;
-  if (n == 0)
-    return;
-  else if (n == 1) {
-    A[0].m_key = 1;
-    return;
-  }
-  A[0].m_key += A[1].m_key;
-  root = 0;
-  leaf = 2;
-  for (next = 1; next < n - 1; next++) {
-    if (leaf >= n || A[root].m_key < A[leaf].m_key) {
-      A[next].m_key = A[root].m_key;
-      A[root++].m_key = (mz_uint16)next;
-    } else
-      A[next].m_key = A[leaf++].m_key;
-    if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) {
-      A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key);
-      A[root++].m_key = (mz_uint16)next;
-    } else
-      A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key);
-  }
-  A[n - 2].m_key = 0;
-  for (next = n - 3; next >= 0; next--)
-    A[next].m_key = A[A[next].m_key].m_key + 1;
-  avbl = 1;
-  used = dpth = 0;
-  root = n - 2;
-  next = n - 1;
-  while (avbl > 0) {
-    while (root >= 0 && (int)A[root].m_key == dpth) {
-      used++;
-      root--;
-    }
-    while (avbl > used) {
-      A[next--].m_key = (mz_uint16)(dpth);
-      avbl--;
-    }
-    avbl = 2 * used;
-    dpth++;
-    used = 0;
-  }
-}
-
-// Limits canonical Huffman code table's max code size.
-enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 };
-static void tdefl_huffman_enforce_max_code_size(int *pNum_codes,
-                                                int code_list_len,
-                                                int max_code_size) {
-  int i;
-  mz_uint32 total = 0;
-  if (code_list_len <= 1) return;
-  for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++)
-    pNum_codes[max_code_size] += pNum_codes[i];
-  for (i = max_code_size; i > 0; i--)
-    total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i));
-  while (total != (1UL << max_code_size)) {
-    pNum_codes[max_code_size]--;
-    for (i = max_code_size - 1; i > 0; i--)
-      if (pNum_codes[i]) {
-        pNum_codes[i]--;
-        pNum_codes[i + 1] += 2;
-        break;
-      }
-    total--;
-  }
-}
-
-static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num,
-                                         int table_len, int code_size_limit,
-                                         int static_table) {
-  int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE];
-  mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1];
-  MZ_CLEAR_OBJ(num_codes);
-  if (static_table) {
-    for (i = 0; i < table_len; i++)
-      num_codes[d->m_huff_code_sizes[table_num][i]]++;
-  } else {
-    tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS],
-        *pSyms;
-    int num_used_syms = 0;
-    const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0];
-    for (i = 0; i < table_len; i++)
-      if (pSym_count[i]) {
-        syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i];
-        syms0[num_used_syms++].m_sym_index = (mz_uint16)i;
-      }
-
-    pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1);
-    tdefl_calculate_minimum_redundancy(pSyms, num_used_syms);
-
-    for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
-
-    tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms,
-                                        code_size_limit);
-
-    MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]);
-    MZ_CLEAR_OBJ(d->m_huff_codes[table_num]);
-    for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
-      for (l = num_codes[i]; l > 0; l--)
-        d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i);
-  }
-
-  next_code[1] = 0;
-  for (j = 0, i = 2; i <= code_size_limit; i++)
-    next_code[i] = j = ((j + num_codes[i - 1]) << 1);
-
-  for (i = 0; i < table_len; i++) {
-    mz_uint rev_code = 0, code, code_size;
-    if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue;
-    code = next_code[code_size]++;
-    for (l = code_size; l > 0; l--, code >>= 1)
-      rev_code = (rev_code << 1) | (code & 1);
-    d->m_huff_codes[table_num][i] = (mz_uint16)rev_code;
-  }
-}
-
-#define TDEFL_PUT_BITS(b, l)                               \
-  do {                                                     \
-    mz_uint bits = b;                                      \
-    mz_uint len = l;                                       \
-    MZ_ASSERT(bits <= ((1U << len) - 1U));                 \
-    d->m_bit_buffer |= (bits << d->m_bits_in);             \
-    d->m_bits_in += len;                                   \
-    while (d->m_bits_in >= 8) {                            \
-      if (d->m_pOutput_buf < d->m_pOutput_buf_end)         \
-        *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \
-      d->m_bit_buffer >>= 8;                               \
-      d->m_bits_in -= 8;                                   \
-    }                                                      \
-  }                                                        \
-  MZ_MACRO_END
-
-#define TDEFL_RLE_PREV_CODE_SIZE()                                        \
-  {                                                                       \
-    if (rle_repeat_count) {                                               \
-      if (rle_repeat_count < 3) {                                         \
-        d->m_huff_count[2][prev_code_size] = (mz_uint16)(                 \
-            d->m_huff_count[2][prev_code_size] + rle_repeat_count);       \
-        while (rle_repeat_count--)                                        \
-          packed_code_sizes[num_packed_code_sizes++] = prev_code_size;    \
-      } else {                                                            \
-        d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \
-        packed_code_sizes[num_packed_code_sizes++] = 16;                  \
-        packed_code_sizes[num_packed_code_sizes++] =                      \
-            (mz_uint8)(rle_repeat_count - 3);                             \
-      }                                                                   \
-      rle_repeat_count = 0;                                               \
-    }                                                                     \
-  }
-
-#define TDEFL_RLE_ZERO_CODE_SIZE()                                            \
-  {                                                                           \
-    if (rle_z_count) {                                                        \
-      if (rle_z_count < 3) {                                                  \
-        d->m_huff_count[2][0] =                                               \
-            (mz_uint16)(d->m_huff_count[2][0] + rle_z_count);                 \
-        while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \
-      } else if (rle_z_count <= 10) {                                         \
-        d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1);     \
-        packed_code_sizes[num_packed_code_sizes++] = 17;                      \
-        packed_code_sizes[num_packed_code_sizes++] =                          \
-            (mz_uint8)(rle_z_count - 3);                                      \
-      } else {                                                                \
-        d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1);     \
-        packed_code_sizes[num_packed_code_sizes++] = 18;                      \
-        packed_code_sizes[num_packed_code_sizes++] =                          \
-            (mz_uint8)(rle_z_count - 11);                                     \
-      }                                                                       \
-      rle_z_count = 0;                                                        \
-    }                                                                         \
-  }
-
-static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = {
-    16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
-static void tdefl_start_dynamic_block(tdefl_compressor *d) {
-  int num_lit_codes, num_dist_codes, num_bit_lengths;
-  mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count,
-      rle_repeat_count, packed_code_sizes_index;
-  mz_uint8
-      code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1],
-      packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1],
-      prev_code_size = 0xFF;
-
-  d->m_huff_count[0][256] = 1;
-
-  tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE);
-  tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE);
-
-  for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--)
-    if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break;
-  for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--)
-    if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break;
-
-  memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
-  memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0],
-         num_dist_codes);
-  total_code_sizes_to_pack = num_lit_codes + num_dist_codes;
-  num_packed_code_sizes = 0;
-  rle_z_count = 0;
-  rle_repeat_count = 0;
-
-  memset(&d->m_huff_count[2][0], 0,
-         sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2);
-  for (i = 0; i < total_code_sizes_to_pack; i++) {
-    mz_uint8 code_size = code_sizes_to_pack[i];
-    if (!code_size) {
-      TDEFL_RLE_PREV_CODE_SIZE();
-      if (++rle_z_count == 138) {
-        TDEFL_RLE_ZERO_CODE_SIZE();
-      }
-    } else {
-      TDEFL_RLE_ZERO_CODE_SIZE();
-      if (code_size != prev_code_size) {
-        TDEFL_RLE_PREV_CODE_SIZE();
-        d->m_huff_count[2][code_size] =
-            (mz_uint16)(d->m_huff_count[2][code_size] + 1);
-        packed_code_sizes[num_packed_code_sizes++] = code_size;
-      } else if (++rle_repeat_count == 6) {
-        TDEFL_RLE_PREV_CODE_SIZE();
-      }
-    }
-    prev_code_size = code_size;
-  }
-  if (rle_repeat_count) {
-    TDEFL_RLE_PREV_CODE_SIZE();
-  } else {
-    TDEFL_RLE_ZERO_CODE_SIZE();
-  }
-
-  tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE);
-
-  TDEFL_PUT_BITS(2, 2);
-
-  TDEFL_PUT_BITS(num_lit_codes - 257, 5);
-  TDEFL_PUT_BITS(num_dist_codes - 1, 5);
-
-  for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--)
-    if (d->m_huff_code_sizes
-            [2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]])
-      break;
-  num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1));
-  TDEFL_PUT_BITS(num_bit_lengths - 4, 4);
-  for (i = 0; (int)i < num_bit_lengths; i++)
-    TDEFL_PUT_BITS(
-        d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3);
-
-  for (packed_code_sizes_index = 0;
-       packed_code_sizes_index < num_packed_code_sizes;) {
-    mz_uint code = packed_code_sizes[packed_code_sizes_index++];
-    MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2);
-    TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
-    if (code >= 16)
-      TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++],
-                     "\02\03\07"[code - 16]);
-  }
-}
-
-static void tdefl_start_static_block(tdefl_compressor *d) {
-  mz_uint i;
-  mz_uint8 *p = &d->m_huff_code_sizes[0][0];
-
-  for (i = 0; i <= 143; ++i) *p++ = 8;
-  for (; i <= 255; ++i) *p++ = 9;
-  for (; i <= 279; ++i) *p++ = 7;
-  for (; i <= 287; ++i) *p++ = 8;
-
-  memset(d->m_huff_code_sizes[1], 5, 32);
-
-  tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE);
-  tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE);
-
-  TDEFL_PUT_BITS(1, 2);
-}
-
-static const mz_uint mz_bitmasks[17] = {
-    0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
-    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF};
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && \
-    MINIZ_HAS_64BIT_REGISTERS
-static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) {
-  mz_uint flags;
-  mz_uint8 *pLZ_codes;
-  mz_uint8 *pOutput_buf = d->m_pOutput_buf;
-  mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf;
-  mz_uint64 bit_buffer = d->m_bit_buffer;
-  mz_uint bits_in = d->m_bits_in;
-
-#define TDEFL_PUT_BITS_FAST(b, l)                \
-  {                                              \
-    bit_buffer |= (((mz_uint64)(b)) << bits_in); \
-    bits_in += (l);                              \
-  }
-
-  flags = 1;
-  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end;
-       flags >>= 1) {
-    if (flags == 1) flags = *pLZ_codes++ | 0x100;
-
-    if (flags & 1) {
-      mz_uint s0, s1, n0, n1, sym, num_extra_bits;
-      mz_uint match_len = pLZ_codes[0],
-              match_dist = *(const mz_uint16 *)(pLZ_codes + 1);
-      pLZ_codes += 3;
-
-      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]],
-                          d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-      TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]],
-                          s_tdefl_len_extra[match_len]);
-
-      // This sequence coaxes MSVC into using cmov's vs. jmp's.
-      s0 = s_tdefl_small_dist_sym[match_dist & 511];
-      n0 = s_tdefl_small_dist_extra[match_dist & 511];
-      s1 = s_tdefl_large_dist_sym[match_dist >> 8];
-      n1 = s_tdefl_large_dist_extra[match_dist >> 8];
-      sym = (match_dist < 512) ? s0 : s1;
-      num_extra_bits = (match_dist < 512) ? n0 : n1;
-
-      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
-      TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym],
-                          d->m_huff_code_sizes[1][sym]);
-      TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits],
-                          num_extra_bits);
-    } else {
-      mz_uint lit = *pLZ_codes++;
-      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit],
-                          d->m_huff_code_sizes[0][lit]);
-
-      if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) {
-        flags >>= 1;
-        lit = *pLZ_codes++;
-        MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-        TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit],
-                            d->m_huff_code_sizes[0][lit]);
-
-        if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) {
-          flags >>= 1;
-          lit = *pLZ_codes++;
-          MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-          TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit],
-                              d->m_huff_code_sizes[0][lit]);
-        }
-      }
-    }
-
-    if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE;
-
-    *(mz_uint64 *)pOutput_buf = bit_buffer;
-    pOutput_buf += (bits_in >> 3);
-    bit_buffer >>= (bits_in & ~7);
-    bits_in &= 7;
-  }
-
-#undef TDEFL_PUT_BITS_FAST
-
-  d->m_pOutput_buf = pOutput_buf;
-  d->m_bits_in = 0;
-  d->m_bit_buffer = 0;
-
-  while (bits_in) {
-    mz_uint32 n = MZ_MIN(bits_in, 16);
-    TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n);
-    bit_buffer >>= n;
-    bits_in -= n;
-  }
-
-  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
-
-  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
-}
-#else
-static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) {
-  mz_uint flags;
-  mz_uint8 *pLZ_codes;
-
-  flags = 1;
-  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf;
-       flags >>= 1) {
-    if (flags == 1) flags = *pLZ_codes++ | 0x100;
-    if (flags & 1) {
-      mz_uint sym, num_extra_bits;
-      mz_uint match_len = pLZ_codes[0],
-              match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8));
-      pLZ_codes += 3;
-
-      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-      TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]],
-                     d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-      TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]],
-                     s_tdefl_len_extra[match_len]);
-
-      if (match_dist < 512) {
-        sym = s_tdefl_small_dist_sym[match_dist];
-        num_extra_bits = s_tdefl_small_dist_extra[match_dist];
-      } else {
-        sym = s_tdefl_large_dist_sym[match_dist >> 8];
-        num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
-      }
-      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
-      TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
-      TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
-    } else {
-      mz_uint lit = *pLZ_codes++;
-      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-      TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
-    }
-  }
-
-  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
-
-  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
-}
-#endif  // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN &&
-        // MINIZ_HAS_64BIT_REGISTERS
-
-static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) {
-  if (static_block)
-    tdefl_start_static_block(d);
-  else
-    tdefl_start_dynamic_block(d);
-  return tdefl_compress_lz_codes(d);
-}
-
-static int tdefl_flush_block(tdefl_compressor *d, int flush) {
-  mz_uint saved_bit_buf, saved_bits_in;
-  mz_uint8 *pSaved_output_buf;
-  mz_bool comp_block_succeeded = MZ_FALSE;
-  int n, use_raw_block =
-             ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) &&
-             (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
-  mz_uint8 *pOutput_buf_start =
-      ((d->m_pPut_buf_func == NULL) &&
-       ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE))
-          ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs)
-          : d->m_output_buf;
-
-  d->m_pOutput_buf = pOutput_buf_start;
-  d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;
-
-  MZ_ASSERT(!d->m_output_flush_remaining);
-  d->m_output_flush_ofs = 0;
-  d->m_output_flush_remaining = 0;
-
-  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left);
-  d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);
-
-  if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) {
-    TDEFL_PUT_BITS(0x78, 8);
-    TDEFL_PUT_BITS(0x01, 8);
-  }
-
-  TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1);
-
-  pSaved_output_buf = d->m_pOutput_buf;
-  saved_bit_buf = d->m_bit_buffer;
-  saved_bits_in = d->m_bits_in;
-
-  if (!use_raw_block)
-    comp_block_succeeded =
-        tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) ||
-                                    (d->m_total_lz_bytes < 48));
-
-  // If the block gets expanded, forget the current contents of the output
-  // buffer and send a raw block instead.
-  if (((use_raw_block) ||
-       ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >=
-                                  d->m_total_lz_bytes))) &&
-      ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) {
-    mz_uint i;
-    d->m_pOutput_buf = pSaved_output_buf;
-    d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
-    TDEFL_PUT_BITS(0, 2);
-    if (d->m_bits_in) {
-      TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-    }
-    for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) {
-      TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
-    }
-    for (i = 0; i < d->m_total_lz_bytes; ++i) {
-      TDEFL_PUT_BITS(
-          d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK],
-          8);
-    }
-  }
-  // Check for the extremely unlikely (if not impossible) case of the compressed
-  // block not fitting into the output buffer when using dynamic codes.
-  else if (!comp_block_succeeded) {
-    d->m_pOutput_buf = pSaved_output_buf;
-    d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
-    tdefl_compress_block(d, MZ_TRUE);
-  }
-
-  if (flush) {
-    if (flush == TDEFL_FINISH) {
-      if (d->m_bits_in) {
-        TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-      }
-      if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) {
-        mz_uint i, a = d->m_adler32;
-        for (i = 0; i < 4; i++) {
-          TDEFL_PUT_BITS((a >> 24) & 0xFF, 8);
-          a <<= 8;
-        }
-      }
-    } else {
-      mz_uint i, z = 0;
-      TDEFL_PUT_BITS(0, 3);
-      if (d->m_bits_in) {
-        TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-      }
-      for (i = 2; i; --i, z ^= 0xFFFF) {
-        TDEFL_PUT_BITS(z & 0xFFFF, 16);
-      }
-    }
-  }
-
-  MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);
-
-  memset(&d->m_huff_count[0][0], 0,
-         sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
-  memset(&d->m_huff_count[1][0], 0,
-         sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
-
-  d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
-  d->m_pLZ_flags = d->m_lz_code_buf;
-  d->m_num_flags_left = 8;
-  d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes;
-  d->m_total_lz_bytes = 0;
-  d->m_block_index++;
-
-  if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) {
-    if (d->m_pPut_buf_func) {
-      *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
-      if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
-        return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED);
-    } else if (pOutput_buf_start == d->m_output_buf) {
-      int bytes_to_copy = (int)MZ_MIN(
-          (size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs));
-      memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf,
-             bytes_to_copy);
-      d->m_out_buf_ofs += bytes_to_copy;
-      if ((n -= bytes_to_copy) != 0) {
-        d->m_output_flush_ofs = bytes_to_copy;
-        d->m_output_flush_remaining = n;
-      }
-    } else {
-      d->m_out_buf_ofs += n;
-    }
-  }
-
-  return d->m_output_flush_remaining;
-}
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
-#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p)
-static MZ_FORCEINLINE void tdefl_find_match(
-    tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist,
-    mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) {
-  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK,
-                match_len = *pMatch_len, probe_pos = pos, next_probe_pos,
-                probe_len;
-  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
-  const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q;
-  mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]),
-            s01 = TDEFL_READ_UNALIGNED_WORD(s);
-  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
-  if (max_match_len <= match_len) return;
-  for (;;) {
-    for (;;) {
-      if (--num_probes_left == 0) return;
-#define TDEFL_PROBE                                                            \
-  next_probe_pos = d->m_next[probe_pos];                                       \
-  if ((!next_probe_pos) ||                                                     \
-      ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist))       \
-    return;                                                                    \
-  probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK;                        \
-  if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \
-    break;
-      TDEFL_PROBE;
-      TDEFL_PROBE;
-      TDEFL_PROBE;
-    }
-    if (!dist) break;
-    q = (const mz_uint16 *)(d->m_dict + probe_pos);
-    if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue;
-    p = s;
-    probe_len = 32;
-    do {
-    } while (
-        (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
-        (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
-        (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
-        (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
-        (--probe_len > 0));
-    if (!probe_len) {
-      *pMatch_dist = dist;
-      *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN);
-      break;
-    } else if ((probe_len = ((mz_uint)(p - s) * 2) +
-                            (mz_uint)(*(const mz_uint8 *)p ==
-                                      *(const mz_uint8 *)q)) > match_len) {
-      *pMatch_dist = dist;
-      if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) ==
-          max_match_len)
-        break;
-      c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]);
-    }
-  }
-}
-#else
-static MZ_FORCEINLINE void tdefl_find_match(
-    tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist,
-    mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) {
-  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK,
-                match_len = *pMatch_len, probe_pos = pos, next_probe_pos,
-                probe_len;
-  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
-  const mz_uint8 *s = d->m_dict + pos, *p, *q;
-  mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
-  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
-  if (max_match_len <= match_len) return;
-  for (;;) {
-    for (;;) {
-      if (--num_probes_left == 0) return;
-#define TDEFL_PROBE                                                      \
-  next_probe_pos = d->m_next[probe_pos];                                 \
-  if ((!next_probe_pos) ||                                               \
-      ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \
-    return;                                                              \
-  probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK;                  \
-  if ((d->m_dict[probe_pos + match_len] == c0) &&                        \
-      (d->m_dict[probe_pos + match_len - 1] == c1))                      \
-    break;
-      TDEFL_PROBE;
-      TDEFL_PROBE;
-      TDEFL_PROBE;
-    }
-    if (!dist) break;
-    p = s;
-    q = d->m_dict + probe_pos;
-    for (probe_len = 0; probe_len < max_match_len; probe_len++)
-      if (*p++ != *q++) break;
-    if (probe_len > match_len) {
-      *pMatch_dist = dist;
-      if ((*pMatch_len = match_len = probe_len) == max_match_len) return;
-      c0 = d->m_dict[pos + match_len];
-      c1 = d->m_dict[pos + match_len - 1];
-    }
-  }
-}
-#endif  // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-static mz_bool tdefl_compress_fast(tdefl_compressor *d) {
-  // Faster, minimally featured LZRW1-style match+parse loop with better
-  // register utilization. Intended for applications where raw throughput is
-  // valued more highly than ratio.
-  mz_uint lookahead_pos = d->m_lookahead_pos,
-          lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size,
-          total_lz_bytes = d->m_total_lz_bytes,
-          num_flags_left = d->m_num_flags_left;
-  mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
-  mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
-
-  while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) {
-    const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
-    mz_uint dst_pos =
-        (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
-    mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(
-        d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size);
-    d->m_src_buf_left -= num_bytes_to_process;
-    lookahead_size += num_bytes_to_process;
-
-    while (num_bytes_to_process) {
-      mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process);
-      memcpy(d->m_dict + dst_pos, d->m_pSrc, n);
-      if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-        memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc,
-               MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos));
-      d->m_pSrc += n;
-      dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK;
-      num_bytes_to_process -= n;
-    }
-
-    dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size);
-    if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE))
-      break;
-
-    while (lookahead_size >= 4) {
-      mz_uint cur_match_dist, cur_match_len = 1;
-      mz_uint8 *pCur_dict = d->m_dict + cur_pos;
-      mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF;
-      mz_uint hash =
-          (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) &
-          TDEFL_LEVEL1_HASH_SIZE_MASK;
-      mz_uint probe_pos = d->m_hash[hash];
-      d->m_hash[hash] = (mz_uint16)lookahead_pos;
-
-      if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <=
-           dict_size) &&
-          ((*(const mz_uint32 *)(d->m_dict +
-                                 (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) &
-            0xFFFFFF) == first_trigram)) {
-        const mz_uint16 *p = (const mz_uint16 *)pCur_dict;
-        const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos);
-        mz_uint32 probe_len = 32;
-        do {
-        } while ((TDEFL_READ_UNALIGNED_WORD(++p) ==
-                  TDEFL_READ_UNALIGNED_WORD(++q)) &&
-                 (TDEFL_READ_UNALIGNED_WORD(++p) ==
-                  TDEFL_READ_UNALIGNED_WORD(++q)) &&
-                 (TDEFL_READ_UNALIGNED_WORD(++p) ==
-                  TDEFL_READ_UNALIGNED_WORD(++q)) &&
-                 (TDEFL_READ_UNALIGNED_WORD(++p) ==
-                  TDEFL_READ_UNALIGNED_WORD(++q)) &&
-                 (--probe_len > 0));
-        cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) +
-                        (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q);
-        if (!probe_len)
-          cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;
-
-        if ((cur_match_len < TDEFL_MIN_MATCH_LEN) ||
-            ((cur_match_len == TDEFL_MIN_MATCH_LEN) &&
-             (cur_match_dist >= 8U * 1024U))) {
-          cur_match_len = 1;
-          *pLZ_code_buf++ = (mz_uint8)first_trigram;
-          *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
-          d->m_huff_count[0][(mz_uint8)first_trigram]++;
-        } else {
-          mz_uint32 s0, s1;
-          cur_match_len = MZ_MIN(cur_match_len, lookahead_size);
-
-          MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) &&
-                    (cur_match_dist >= 1) &&
-                    (cur_match_dist <= TDEFL_LZ_DICT_SIZE));
-
-          cur_match_dist--;
-
-          pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN);
-          *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist;
-          pLZ_code_buf += 3;
-          *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80);
-
-          s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
-          s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
-          d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++;
-
-          d->m_huff_count[0][s_tdefl_len_sym[cur_match_len -
-                                             TDEFL_MIN_MATCH_LEN]]++;
-        }
-      } else {
-        *pLZ_code_buf++ = (mz_uint8)first_trigram;
-        *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
-        d->m_huff_count[0][(mz_uint8)first_trigram]++;
-      }
-
-      if (--num_flags_left == 0) {
-        num_flags_left = 8;
-        pLZ_flags = pLZ_code_buf++;
-      }
-
-      total_lz_bytes += cur_match_len;
-      lookahead_pos += cur_match_len;
-      dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE);
-      cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
-      MZ_ASSERT(lookahead_size >= cur_match_len);
-      lookahead_size -= cur_match_len;
-
-      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) {
-        int n;
-        d->m_lookahead_pos = lookahead_pos;
-        d->m_lookahead_size = lookahead_size;
-        d->m_dict_size = dict_size;
-        d->m_total_lz_bytes = total_lz_bytes;
-        d->m_pLZ_code_buf = pLZ_code_buf;
-        d->m_pLZ_flags = pLZ_flags;
-        d->m_num_flags_left = num_flags_left;
-        if ((n = tdefl_flush_block(d, 0)) != 0)
-          return (n < 0) ? MZ_FALSE : MZ_TRUE;
-        total_lz_bytes = d->m_total_lz_bytes;
-        pLZ_code_buf = d->m_pLZ_code_buf;
-        pLZ_flags = d->m_pLZ_flags;
-        num_flags_left = d->m_num_flags_left;
-      }
-    }
-
-    while (lookahead_size) {
-      mz_uint8 lit = d->m_dict[cur_pos];
-
-      total_lz_bytes++;
-      *pLZ_code_buf++ = lit;
-      *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
-      if (--num_flags_left == 0) {
-        num_flags_left = 8;
-        pLZ_flags = pLZ_code_buf++;
-      }
-
-      d->m_huff_count[0][lit]++;
-
-      lookahead_pos++;
-      dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE);
-      cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
-      lookahead_size--;
-
-      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) {
-        int n;
-        d->m_lookahead_pos = lookahead_pos;
-        d->m_lookahead_size = lookahead_size;
-        d->m_dict_size = dict_size;
-        d->m_total_lz_bytes = total_lz_bytes;
-        d->m_pLZ_code_buf = pLZ_code_buf;
-        d->m_pLZ_flags = pLZ_flags;
-        d->m_num_flags_left = num_flags_left;
-        if ((n = tdefl_flush_block(d, 0)) != 0)
-          return (n < 0) ? MZ_FALSE : MZ_TRUE;
-        total_lz_bytes = d->m_total_lz_bytes;
-        pLZ_code_buf = d->m_pLZ_code_buf;
-        pLZ_flags = d->m_pLZ_flags;
-        num_flags_left = d->m_num_flags_left;
-      }
-    }
-  }
-
-  d->m_lookahead_pos = lookahead_pos;
-  d->m_lookahead_size = lookahead_size;
-  d->m_dict_size = dict_size;
-  d->m_total_lz_bytes = total_lz_bytes;
-  d->m_pLZ_code_buf = pLZ_code_buf;
-  d->m_pLZ_flags = pLZ_flags;
-  d->m_num_flags_left = num_flags_left;
-  return MZ_TRUE;
-}
-#endif  // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-
-static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d,
-                                                mz_uint8 lit) {
-  d->m_total_lz_bytes++;
-  *d->m_pLZ_code_buf++ = lit;
-  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1);
-  if (--d->m_num_flags_left == 0) {
-    d->m_num_flags_left = 8;
-    d->m_pLZ_flags = d->m_pLZ_code_buf++;
-  }
-  d->m_huff_count[0][lit]++;
-}
-
-static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d,
-                                              mz_uint match_len,
-                                              mz_uint match_dist) {
-  mz_uint32 s0, s1;
-
-  MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) &&
-            (match_dist <= TDEFL_LZ_DICT_SIZE));
-
-  d->m_total_lz_bytes += match_len;
-
-  d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN);
-
-  match_dist -= 1;
-  d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF);
-  d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8);
-  d->m_pLZ_code_buf += 3;
-
-  *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80);
-  if (--d->m_num_flags_left == 0) {
-    d->m_num_flags_left = 8;
-    d->m_pLZ_flags = d->m_pLZ_code_buf++;
-  }
-
-  s0 = s_tdefl_small_dist_sym[match_dist & 511];
-  s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127];
-  d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
-
-  if (match_len >= TDEFL_MIN_MATCH_LEN)
-    d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
-}
-
-static mz_bool tdefl_compress_normal(tdefl_compressor *d) {
-  const mz_uint8 *pSrc = d->m_pSrc;
-  size_t src_buf_left = d->m_src_buf_left;
-  tdefl_flush flush = d->m_flush;
-
-  while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) {
-    mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
-    // Update dictionary and hash chains. Keeps the lookahead size equal to
-    // TDEFL_MAX_MATCH_LEN.
-    if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) {
-      mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) &
-                        TDEFL_LZ_DICT_SIZE_MASK,
-              ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
-      mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK]
-                      << TDEFL_LZ_HASH_SHIFT) ^
-                     d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK];
-      mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(
-          src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size);
-      const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process;
-      src_buf_left -= num_bytes_to_process;
-      d->m_lookahead_size += num_bytes_to_process;
-      while (pSrc != pSrc_end) {
-        mz_uint8 c = *pSrc++;
-        d->m_dict[dst_pos] = c;
-        if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-          d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
-        hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
-        d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
-        d->m_hash[hash] = (mz_uint16)(ins_pos);
-        dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
-        ins_pos++;
-      }
-    } else {
-      while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) {
-        mz_uint8 c = *pSrc++;
-        mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) &
-                          TDEFL_LZ_DICT_SIZE_MASK;
-        src_buf_left--;
-        d->m_dict[dst_pos] = c;
-        if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-          d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
-        if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) {
-          mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
-          mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK]
-                           << (TDEFL_LZ_HASH_SHIFT * 2)) ^
-                          (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]
-                           << TDEFL_LZ_HASH_SHIFT) ^
-                          c) &
-                         (TDEFL_LZ_HASH_SIZE - 1);
-          d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
-          d->m_hash[hash] = (mz_uint16)(ins_pos);
-        }
-      }
-    }
-    d->m_dict_size =
-        MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size);
-    if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break;
-
-    // Simple lazy/greedy parsing state machine.
-    len_to_move = 1;
-    cur_match_dist = 0;
-    cur_match_len =
-        d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1);
-    cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
-    if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) {
-      if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) {
-        mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK];
-        cur_match_len = 0;
-        while (cur_match_len < d->m_lookahead_size) {
-          if (d->m_dict[cur_pos + cur_match_len] != c) break;
-          cur_match_len++;
-        }
-        if (cur_match_len < TDEFL_MIN_MATCH_LEN)
-          cur_match_len = 0;
-        else
-          cur_match_dist = 1;
-      }
-    } else {
-      tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size,
-                       d->m_lookahead_size, &cur_match_dist, &cur_match_len);
-    }
-    if (((cur_match_len == TDEFL_MIN_MATCH_LEN) &&
-         (cur_match_dist >= 8U * 1024U)) ||
-        (cur_pos == cur_match_dist) ||
-        ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) {
-      cur_match_dist = cur_match_len = 0;
-    }
-    if (d->m_saved_match_len) {
-      if (cur_match_len > d->m_saved_match_len) {
-        tdefl_record_literal(d, (mz_uint8)d->m_saved_lit);
-        if (cur_match_len >= 128) {
-          tdefl_record_match(d, cur_match_len, cur_match_dist);
-          d->m_saved_match_len = 0;
-          len_to_move = cur_match_len;
-        } else {
-          d->m_saved_lit = d->m_dict[cur_pos];
-          d->m_saved_match_dist = cur_match_dist;
-          d->m_saved_match_len = cur_match_len;
-        }
-      } else {
-        tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
-        len_to_move = d->m_saved_match_len - 1;
-        d->m_saved_match_len = 0;
-      }
-    } else if (!cur_match_dist)
-      tdefl_record_literal(d,
-                           d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
-    else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) ||
-             (cur_match_len >= 128)) {
-      tdefl_record_match(d, cur_match_len, cur_match_dist);
-      len_to_move = cur_match_len;
-    } else {
-      d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)];
-      d->m_saved_match_dist = cur_match_dist;
-      d->m_saved_match_len = cur_match_len;
-    }
-    // Move the lookahead forward by len_to_move bytes.
-    d->m_lookahead_pos += len_to_move;
-    MZ_ASSERT(d->m_lookahead_size >= len_to_move);
-    d->m_lookahead_size -= len_to_move;
-    d->m_dict_size =
-        MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE);
-    // Check if it's time to flush the current LZ codes to the internal output
-    // buffer.
-    if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
-        ((d->m_total_lz_bytes > 31 * 1024) &&
-         (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >=
-           d->m_total_lz_bytes) ||
-          (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) {
-      int n;
-      d->m_pSrc = pSrc;
-      d->m_src_buf_left = src_buf_left;
-      if ((n = tdefl_flush_block(d, 0)) != 0)
-        return (n < 0) ? MZ_FALSE : MZ_TRUE;
-    }
-  }
-
-  d->m_pSrc = pSrc;
-  d->m_src_buf_left = src_buf_left;
-  return MZ_TRUE;
-}
-
-static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) {
-  if (d->m_pIn_buf_size) {
-    *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
-  }
-
-  if (d->m_pOut_buf_size) {
-    size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs,
-                      d->m_output_flush_remaining);
-    memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs,
-           d->m_output_buf + d->m_output_flush_ofs, n);
-    d->m_output_flush_ofs += (mz_uint)n;
-    d->m_output_flush_remaining -= (mz_uint)n;
-    d->m_out_buf_ofs += n;
-
-    *d->m_pOut_buf_size = d->m_out_buf_ofs;
-  }
-
-  return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE
-                                                         : TDEFL_STATUS_OKAY;
-}
-
-tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf,
-                            size_t *pIn_buf_size, void *pOut_buf,
-                            size_t *pOut_buf_size, tdefl_flush flush) {
-  if (!d) {
-    if (pIn_buf_size) *pIn_buf_size = 0;
-    if (pOut_buf_size) *pOut_buf_size = 0;
-    return TDEFL_STATUS_BAD_PARAM;
-  }
-
-  d->m_pIn_buf = pIn_buf;
-  d->m_pIn_buf_size = pIn_buf_size;
-  d->m_pOut_buf = pOut_buf;
-  d->m_pOut_buf_size = pOut_buf_size;
-  d->m_pSrc = (const mz_uint8 *)(pIn_buf);
-  d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0;
-  d->m_out_buf_ofs = 0;
-  d->m_flush = flush;
-
-  if (((d->m_pPut_buf_func != NULL) ==
-       ((pOut_buf != NULL) || (pOut_buf_size != NULL))) ||
-      (d->m_prev_return_status != TDEFL_STATUS_OKAY) ||
-      (d->m_wants_to_finish && (flush != TDEFL_FINISH)) ||
-      (pIn_buf_size && *pIn_buf_size && !pIn_buf) ||
-      (pOut_buf_size && *pOut_buf_size && !pOut_buf)) {
-    if (pIn_buf_size) *pIn_buf_size = 0;
-    if (pOut_buf_size) *pOut_buf_size = 0;
-    return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM);
-  }
-  d->m_wants_to_finish |= (flush == TDEFL_FINISH);
-
-  if ((d->m_output_flush_remaining) || (d->m_finished))
-    return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-  if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) &&
-      ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) &&
-      ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS |
-                      TDEFL_RLE_MATCHES)) == 0)) {
-    if (!tdefl_compress_fast(d)) return d->m_prev_return_status;
-  } else
-#endif  // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-  {
-    if (!tdefl_compress_normal(d)) return d->m_prev_return_status;
-  }
-
-  if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) &&
-      (pIn_buf))
-    d->m_adler32 =
-        (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf,
-                              d->m_pSrc - (const mz_uint8 *)pIn_buf);
-
-  if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) &&
-      (!d->m_output_flush_remaining)) {
-    if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status;
-    d->m_finished = (flush == TDEFL_FINISH);
-    if (flush == TDEFL_FULL_FLUSH) {
-      MZ_CLEAR_OBJ(d->m_hash);
-      MZ_CLEAR_OBJ(d->m_next);
-      d->m_dict_size = 0;
-    }
-  }
-
-  return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
-}
-
-tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf,
-                                   size_t in_buf_size, tdefl_flush flush) {
-  MZ_ASSERT(d->m_pPut_buf_func);
-  return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush);
-}
-
-tdefl_status tdefl_init(tdefl_compressor *d,
-                        tdefl_put_buf_func_ptr pPut_buf_func,
-                        void *pPut_buf_user, int flags) {
-  d->m_pPut_buf_func = pPut_buf_func;
-  d->m_pPut_buf_user = pPut_buf_user;
-  d->m_flags = (mz_uint)(flags);
-  d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3;
-  d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0;
-  d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
-  if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash);
-  d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size =
-      d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
-  d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished =
-      d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
-  d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
-  d->m_pLZ_flags = d->m_lz_code_buf;
-  d->m_num_flags_left = 8;
-  d->m_pOutput_buf = d->m_output_buf;
-  d->m_pOutput_buf_end = d->m_output_buf;
-  d->m_prev_return_status = TDEFL_STATUS_OKAY;
-  d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0;
-  d->m_adler32 = 1;
-  d->m_pIn_buf = NULL;
-  d->m_pOut_buf = NULL;
-  d->m_pIn_buf_size = NULL;
-  d->m_pOut_buf_size = NULL;
-  d->m_flush = TDEFL_NO_FLUSH;
-  d->m_pSrc = NULL;
-  d->m_src_buf_left = 0;
-  d->m_out_buf_ofs = 0;
-  memset(&d->m_huff_count[0][0], 0,
-         sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
-  memset(&d->m_huff_count[1][0], 0,
-         sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
-  return TDEFL_STATUS_OKAY;
-}
-
-tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) {
-  return d->m_prev_return_status;
-}
-
-mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; }
-
-mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len,
-                                     tdefl_put_buf_func_ptr pPut_buf_func,
-                                     void *pPut_buf_user, int flags) {
-  tdefl_compressor *pComp;
-  mz_bool succeeded;
-  if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE;
-  pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
-  if (!pComp) return MZ_FALSE;
-  succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) ==
-               TDEFL_STATUS_OKAY);
-  succeeded =
-      succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) ==
-                    TDEFL_STATUS_DONE);
-  MZ_FREE(pComp);
-  return succeeded;
-}
-
-typedef struct {
-  size_t m_size, m_capacity;
-  mz_uint8 *m_pBuf;
-  mz_bool m_expandable;
-} tdefl_output_buffer;
-
-static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len,
-                                          void *pUser) {
-  tdefl_output_buffer *p = (tdefl_output_buffer *)pUser;
-  size_t new_size = p->m_size + len;
-  if (new_size > p->m_capacity) {
-    size_t new_capacity = p->m_capacity;
-    mz_uint8 *pNew_buf;
-    if (!p->m_expandable) return MZ_FALSE;
-    do {
-      new_capacity = MZ_MAX(128U, new_capacity << 1U);
-    } while (new_size > new_capacity);
-    pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity);
-    if (!pNew_buf) return MZ_FALSE;
-    p->m_pBuf = pNew_buf;
-    p->m_capacity = new_capacity;
-  }
-  memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len);
-  p->m_size = new_size;
-  return MZ_TRUE;
-}
-
-void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len,
-                                 size_t *pOut_len, int flags) {
-  tdefl_output_buffer out_buf;
-  MZ_CLEAR_OBJ(out_buf);
-  if (!pOut_len)
-    return MZ_FALSE;
-  else
-    *pOut_len = 0;
-  out_buf.m_expandable = MZ_TRUE;
-  if (!tdefl_compress_mem_to_output(
-          pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags))
-    return NULL;
-  *pOut_len = out_buf.m_size;
-  return out_buf.m_pBuf;
-}
-
-size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len,
-                                 const void *pSrc_buf, size_t src_buf_len,
-                                 int flags) {
-  tdefl_output_buffer out_buf;
-  MZ_CLEAR_OBJ(out_buf);
-  if (!pOut_buf) return 0;
-  out_buf.m_pBuf = (mz_uint8 *)pOut_buf;
-  out_buf.m_capacity = out_buf_len;
-  if (!tdefl_compress_mem_to_output(
-          pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags))
-    return 0;
-  return out_buf.m_size;
-}
-
-#ifndef MINIZ_NO_ZLIB_APIS
-static const mz_uint s_tdefl_num_probes[11] = {0,   1,   6,   32,  16,  32,
-                                               128, 256, 512, 768, 1500};
-
-// level may actually range from [0,10] (10 is a "hidden" max level, where we
-// want a bit more compression and it's fine if throughput to fall off a cliff
-// on some files).
-mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits,
-                                                int strategy) {
-  mz_uint comp_flags =
-      s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] |
-      ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0);
-  if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER;
-
-  if (!level)
-    comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
-  else if (strategy == MZ_FILTERED)
-    comp_flags |= TDEFL_FILTER_MATCHES;
-  else if (strategy == MZ_HUFFMAN_ONLY)
-    comp_flags &= ~TDEFL_MAX_PROBES_MASK;
-  else if (strategy == MZ_FIXED)
-    comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
-  else if (strategy == MZ_RLE)
-    comp_flags |= TDEFL_RLE_MATCHES;
-
-  return comp_flags;
-}
-#endif  // MINIZ_NO_ZLIB_APIS
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4204)  // nonstandard extension used : non-constant
-                                 // aggregate initializer (also supported by GNU
-                                 // C and C99, so no big deal)
-#pragma warning(disable : 4244)  // 'initializing': conversion from '__int64' to
-                                 // 'int', possible loss of data
-#pragma warning(disable : 4267)  // 'argument': conversion from '__int64' to
-                                 // 'int', possible loss of data
-#pragma warning(disable : 4996)  // 'strdup': The POSIX name for this item is
-                                 // deprecated. Instead, use the ISO C and C++
-                                 // conformant name: _strdup.
-#endif
-
-// Simple PNG writer function by Alex Evans, 2011. Released into the public
-// domain: https://gist.github.com/908299, more context at
-// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
-// This is actually a modification of Alex's original code so PNG files
-// generated by this function pass pngcheck.
-void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w,
-                                                 int h, int num_chans,
-                                                 size_t *pLen_out,
-                                                 mz_uint level, mz_bool flip) {
-  // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was
-  // defined.
-  static const mz_uint s_tdefl_png_num_probes[11] = {
-      0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500};
-  tdefl_compressor *pComp =
-      (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
-  tdefl_output_buffer out_buf;
-  int i, bpl = w * num_chans, y, z;
-  mz_uint32 c;
-  *pLen_out = 0;
-  if (!pComp) return NULL;
-  MZ_CLEAR_OBJ(out_buf);
-  out_buf.m_expandable = MZ_TRUE;
-  out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h);
-  if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) {
-    MZ_FREE(pComp);
-    return NULL;
-  }
-  // write dummy header
-  for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf);
-  // compress image data
-  tdefl_init(
-      pComp, tdefl_output_buffer_putter, &out_buf,
-      s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER);
-  for (y = 0; y < h; ++y) {
-    tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH);
-    tdefl_compress_buffer(pComp,
-                          (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl,
-                          bpl, TDEFL_NO_FLUSH);
-  }
-  if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) !=
-      TDEFL_STATUS_DONE) {
-    MZ_FREE(pComp);
-    MZ_FREE(out_buf.m_pBuf);
-    return NULL;
-  }
-  // write real header
-  *pLen_out = out_buf.m_size - 41;
-  {
-    static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06};
-    mz_uint8 pnghdr[41] = {0x89,
-                           0x50,
-                           0x4e,
-                           0x47,
-                           0x0d,
-                           0x0a,
-                           0x1a,
-                           0x0a,
-                           0x00,
-                           0x00,
-                           0x00,
-                           0x0d,
-                           0x49,
-                           0x48,
-                           0x44,
-                           0x52,
-                           0,
-                           0,
-                           (mz_uint8)(w >> 8),
-                           (mz_uint8)w,
-                           0,
-                           0,
-                           (mz_uint8)(h >> 8),
-                           (mz_uint8)h,
-                           8,
-                           chans[num_chans],
-                           0,
-                           0,
-                           0,
-                           0,
-                           0,
-                           0,
-                           0,
-                           (mz_uint8)(*pLen_out >> 24),
-                           (mz_uint8)(*pLen_out >> 16),
-                           (mz_uint8)(*pLen_out >> 8),
-                           (mz_uint8)*pLen_out,
-                           0x49,
-                           0x44,
-                           0x41,
-                           0x54};
-    c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17);
-    for (i = 0; i < 4; ++i, c <<= 8)
-      ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24);
-    memcpy(out_buf.m_pBuf, pnghdr, 41);
-  }
-  // write footer (IDAT CRC-32, followed by IEND chunk)
-  if (!tdefl_output_buffer_putter(
-          "\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) {
-    *pLen_out = 0;
-    MZ_FREE(pComp);
-    MZ_FREE(out_buf.m_pBuf);
-    return NULL;
-  }
-  c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4,
-                          *pLen_out + 4);
-  for (i = 0; i < 4; ++i, c <<= 8)
-    (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24);
-  // compute final size of file, grab compressed data buffer and return
-  *pLen_out += 57;
-  MZ_FREE(pComp);
-  return out_buf.m_pBuf;
-}
-void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h,
-                                              int num_chans, size_t *pLen_out) {
-  // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we
-  // can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's
-  // where #defined out)
-  return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans,
-                                                    pLen_out, 6, MZ_FALSE);
-}
-
-// ------------------- .ZIP archive reading
-
-#ifndef MINIZ_NO_ARCHIVE_APIS
-#error "No arvhive APIs"
-
-#ifdef MINIZ_NO_STDIO
-#define MZ_FILE void *
-#else
-#include <stdio.h>
-#include <sys/stat.h>
-
-#if defined(_MSC_VER) || defined(__MINGW64__)
-static FILE *mz_fopen(const char *pFilename, const char *pMode) {
-  FILE *pFile = NULL;
-  fopen_s(&pFile, pFilename, pMode);
-  return pFile;
-}
-static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) {
-  FILE *pFile = NULL;
-  if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL;
-  return pFile;
-}
-#ifndef MINIZ_NO_TIME
-#include <sys/utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN mz_fopen
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 _ftelli64
-#define MZ_FSEEK64 _fseeki64
-#define MZ_FILE_STAT_STRUCT _stat
-#define MZ_FILE_STAT _stat
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN mz_freopen
-#define MZ_DELETE_FILE remove
-#elif defined(__MINGW32__)
-#ifndef MINIZ_NO_TIME
-#include <sys/utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN(f, m) fopen(f, m)
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 ftello64
-#define MZ_FSEEK64 fseeko64
-#define MZ_FILE_STAT_STRUCT _stat
-#define MZ_FILE_STAT _stat
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-#define MZ_DELETE_FILE remove
-#elif defined(__TINYC__)
-#ifndef MINIZ_NO_TIME
-#include <sys/utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN(f, m) fopen(f, m)
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 ftell
-#define MZ_FSEEK64 fseek
-#define MZ_FILE_STAT_STRUCT stat
-#define MZ_FILE_STAT stat
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-#define MZ_DELETE_FILE remove
-#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) && _LARGEFILE64_SOURCE
-#ifndef MINIZ_NO_TIME
-#include <utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN(f, m) fopen64(f, m)
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 ftello64
-#define MZ_FSEEK64 fseeko64
-#define MZ_FILE_STAT_STRUCT stat64
-#define MZ_FILE_STAT stat64
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN(p, m, s) freopen64(p, m, s)
-#define MZ_DELETE_FILE remove
-#else
-#ifndef MINIZ_NO_TIME
-#include <utime.h>
-#endif
-#define MZ_FILE FILE
-#define MZ_FOPEN(f, m) fopen(f, m)
-#define MZ_FCLOSE fclose
-#define MZ_FREAD fread
-#define MZ_FWRITE fwrite
-#define MZ_FTELL64 ftello
-#define MZ_FSEEK64 fseeko
-#define MZ_FILE_STAT_STRUCT stat
-#define MZ_FILE_STAT stat
-#define MZ_FFLUSH fflush
-#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-#define MZ_DELETE_FILE remove
-#endif  // #ifdef _MSC_VER
-#endif  // #ifdef MINIZ_NO_STDIO
-
-#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c))
-
-// Various ZIP archive enums. To completely avoid cross platform compiler
-// alignment and platform endian issues, miniz.c doesn't use structs for any of
-// this stuff.
-enum {
-  // ZIP archive identifiers and record sizes
-  MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50,
-  MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50,
-  MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50,
-  MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30,
-  MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46,
-  MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22,
-  // Central directory header record offsets
-  MZ_ZIP_CDH_SIG_OFS = 0,
-  MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4,
-  MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6,
-  MZ_ZIP_CDH_BIT_FLAG_OFS = 8,
-  MZ_ZIP_CDH_METHOD_OFS = 10,
-  MZ_ZIP_CDH_FILE_TIME_OFS = 12,
-  MZ_ZIP_CDH_FILE_DATE_OFS = 14,
-  MZ_ZIP_CDH_CRC32_OFS = 16,
-  MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20,
-  MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24,
-  MZ_ZIP_CDH_FILENAME_LEN_OFS = 28,
-  MZ_ZIP_CDH_EXTRA_LEN_OFS = 30,
-  MZ_ZIP_CDH_COMMENT_LEN_OFS = 32,
-  MZ_ZIP_CDH_DISK_START_OFS = 34,
-  MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36,
-  MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38,
-  MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42,
-  // Local directory header offsets
-  MZ_ZIP_LDH_SIG_OFS = 0,
-  MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4,
-  MZ_ZIP_LDH_BIT_FLAG_OFS = 6,
-  MZ_ZIP_LDH_METHOD_OFS = 8,
-  MZ_ZIP_LDH_FILE_TIME_OFS = 10,
-  MZ_ZIP_LDH_FILE_DATE_OFS = 12,
-  MZ_ZIP_LDH_CRC32_OFS = 14,
-  MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18,
-  MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22,
-  MZ_ZIP_LDH_FILENAME_LEN_OFS = 26,
-  MZ_ZIP_LDH_EXTRA_LEN_OFS = 28,
-  // End of central directory offsets
-  MZ_ZIP_ECDH_SIG_OFS = 0,
-  MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4,
-  MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6,
-  MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8,
-  MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10,
-  MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12,
-  MZ_ZIP_ECDH_CDIR_OFS_OFS = 16,
-  MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20,
-};
-
-typedef struct {
-  void *m_p;
-  size_t m_size, m_capacity;
-  mz_uint m_element_size;
-} mz_zip_array;
-
-struct mz_zip_internal_state_tag {
-  mz_zip_array m_central_dir;
-  mz_zip_array m_central_dir_offsets;
-  mz_zip_array m_sorted_central_dir_offsets;
-  MZ_FILE *m_pFile;
-  void *m_pMem;
-  size_t m_mem_size;
-  size_t m_mem_capacity;
-};
-
-#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) \
-  (array_ptr)->m_element_size = element_size
-#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \
-  ((element_type *)((array_ptr)->m_p))[index]
-
-static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip,
-                                              mz_zip_array *pArray) {
-  pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p);
-  memset(pArray, 0, sizeof(mz_zip_array));
-}
-
-static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip,
-                                            mz_zip_array *pArray,
-                                            size_t min_new_capacity,
-                                            mz_uint growing) {
-  void *pNew_p;
-  size_t new_capacity = min_new_capacity;
-  MZ_ASSERT(pArray->m_element_size);
-  if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE;
-  if (growing) {
-    new_capacity = MZ_MAX(1, pArray->m_capacity);
-    while (new_capacity < min_new_capacity) new_capacity *= 2;
-  }
-  if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p,
-                                         pArray->m_element_size, new_capacity)))
-    return MZ_FALSE;
-  pArray->m_p = pNew_p;
-  pArray->m_capacity = new_capacity;
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip,
-                                                   mz_zip_array *pArray,
-                                                   size_t new_capacity,
-                                                   mz_uint growing) {
-  if (new_capacity > pArray->m_capacity) {
-    if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing))
-      return MZ_FALSE;
-  }
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip,
-                                                  mz_zip_array *pArray,
-                                                  size_t new_size,
-                                                  mz_uint growing) {
-  if (new_size > pArray->m_capacity) {
-    if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing))
-      return MZ_FALSE;
-  }
-  pArray->m_size = new_size;
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip,
-                                                       mz_zip_array *pArray,
-                                                       size_t n) {
-  return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE);
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip,
-                                                     mz_zip_array *pArray,
-                                                     const void *pElements,
-                                                     size_t n) {
-  size_t orig_size = pArray->m_size;
-  if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE))
-    return MZ_FALSE;
-  memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size,
-         pElements, n * pArray->m_element_size);
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_TIME
-static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) {
-  struct tm tm;
-  memset(&tm, 0, sizeof(tm));
-  tm.tm_isdst = -1;
-  tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900;
-  tm.tm_mon = ((dos_date >> 5) & 15) - 1;
-  tm.tm_mday = dos_date & 31;
-  tm.tm_hour = (dos_time >> 11) & 31;
-  tm.tm_min = (dos_time >> 5) & 63;
-  tm.tm_sec = (dos_time << 1) & 62;
-  return mktime(&tm);
-}
-
-static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time,
-                                    mz_uint16 *pDOS_date) {
-#ifdef _MSC_VER
-  struct tm tm_struct;
-  struct tm *tm = &tm_struct;
-  errno_t err = localtime_s(tm, &time);
-  if (err) {
-    *pDOS_date = 0;
-    *pDOS_time = 0;
-    return;
-  }
-#else
-  struct tm *tm = localtime(&time);
-#endif
-  *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) +
-                           ((tm->tm_sec) >> 1));
-  *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) +
-                           ((tm->tm_mon + 1) << 5) + tm->tm_mday);
-}
-#endif
-
-#ifndef MINIZ_NO_STDIO
-static mz_bool mz_zip_get_file_modified_time(const char *pFilename,
-                                             mz_uint16 *pDOS_time,
-                                             mz_uint16 *pDOS_date) {
-#ifdef MINIZ_NO_TIME
-  (void)pFilename;
-  *pDOS_date = *pDOS_time = 0;
-#else
-  struct MZ_FILE_STAT_STRUCT file_stat;
-  // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000
-  // bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh.
-  if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE;
-  mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date);
-#endif  // #ifdef MINIZ_NO_TIME
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_TIME
-static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time,
-                                     time_t modified_time) {
-  struct utimbuf t;
-  t.actime = access_time;
-  t.modtime = modified_time;
-  return !utime(pFilename, &t);
-}
-#endif  // #ifndef MINIZ_NO_TIME
-#endif  // #ifndef MINIZ_NO_STDIO
-
-static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip,
-                                           mz_uint32 flags) {
-  (void)flags;
-  if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
-    return MZ_FALSE;
-
-  if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func;
-  if (!pZip->m_pFree) pZip->m_pFree = def_free_func;
-  if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func;
-
-  pZip->m_zip_mode = MZ_ZIP_MODE_READING;
-  pZip->m_archive_size = 0;
-  pZip->m_central_directory_file_ofs = 0;
-  pZip->m_total_files = 0;
-
-  if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(
-                   pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
-    return MZ_FALSE;
-  memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir,
-                                sizeof(mz_uint8));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets,
-                                sizeof(mz_uint32));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets,
-                                sizeof(mz_uint32));
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool
-mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array,
-                            const mz_zip_array *pCentral_dir_offsets,
-                            mz_uint l_index, mz_uint r_index) {
-  const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(
-                     pCentral_dir_array, mz_uint8,
-                     MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32,
-                                          l_index)),
-                 *pE;
-  const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(
-      pCentral_dir_array, mz_uint8,
-      MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index));
-  mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS),
-          r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  mz_uint8 l = 0, r = 0;
-  pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-  pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-  pE = pL + MZ_MIN(l_len, r_len);
-  while (pL < pE) {
-    if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break;
-    pL++;
-    pR++;
-  }
-  return (pL == pE) ? (l_len < r_len) : (l < r);
-}
-
-#define MZ_SWAP_UINT32(a, b) \
-  do {                       \
-    mz_uint32 t = a;         \
-    a = b;                   \
-    b = t;                   \
-  }                          \
-  MZ_MACRO_END
-
-// Heap sort of lowercased filenames, used to help accelerate plain central
-// directory searches by mz_zip_reader_locate_file(). (Could also use qsort(),
-// but it could allocate memory.)
-static void mz_zip_reader_sort_central_dir_offsets_by_filename(
-    mz_zip_archive *pZip) {
-  mz_zip_internal_state *pState = pZip->m_pState;
-  const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
-  const mz_zip_array *pCentral_dir = &pState->m_central_dir;
-  mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(
-      &pState->m_sorted_central_dir_offsets, mz_uint32, 0);
-  const int size = pZip->m_total_files;
-  int start = (size - 2) >> 1, end;
-  while (start >= 0) {
-    int child, root = start;
-    for (;;) {
-      if ((child = (root << 1) + 1) >= size) break;
-      child +=
-          (((child + 1) < size) &&
-           (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                        pIndices[child], pIndices[child + 1])));
-      if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                       pIndices[root], pIndices[child]))
-        break;
-      MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
-      root = child;
-    }
-    start--;
-  }
-
-  end = size - 1;
-  while (end > 0) {
-    int child, root = 0;
-    MZ_SWAP_UINT32(pIndices[end], pIndices[0]);
-    for (;;) {
-      if ((child = (root << 1) + 1) >= end) break;
-      child +=
-          (((child + 1) < end) &&
-           mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                       pIndices[child], pIndices[child + 1]));
-      if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                       pIndices[root], pIndices[child]))
-        break;
-      MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
-      root = child;
-    }
-    end--;
-  }
-}
-
-static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip,
-                                              mz_uint32 flags) {
-  mz_uint cdir_size, num_this_disk, cdir_disk_index;
-  mz_uint64 cdir_ofs;
-  mz_int64 cur_file_ofs;
-  const mz_uint8 *p;
-  mz_uint32 buf_u32[4096 / sizeof(mz_uint32)];
-  mz_uint8 *pBuf = (mz_uint8 *)buf_u32;
-  mz_bool sort_central_dir =
-      ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0);
-  // Basic sanity checks - reject files which are too small, and check the first
-  // 4 bytes of the file to make sure a local header is there.
-  if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  // Find the end of central directory record by scanning the file from the end
-  // towards the beginning.
-  cur_file_ofs =
-      MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0);
-  for (;;) {
-    int i,
-        n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs);
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n)
-      return MZ_FALSE;
-    for (i = n - 4; i >= 0; --i)
-      if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) break;
-    if (i >= 0) {
-      cur_file_ofs += i;
-      break;
-    }
-    if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >=
-                            (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)))
-      return MZ_FALSE;
-    cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0);
-  }
-  // Read and verify the end of central directory record.
-  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf,
-                    MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) !=
-       MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) ||
-      ((pZip->m_total_files =
-            MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) !=
-       MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS)))
-    return MZ_FALSE;
-
-  num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS);
-  cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS);
-  if (((num_this_disk | cdir_disk_index) != 0) &&
-      ((num_this_disk != 1) || (cdir_disk_index != 1)))
-    return MZ_FALSE;
-
-  if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) <
-      pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-
-  cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS);
-  if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return MZ_FALSE;
-
-  pZip->m_central_directory_file_ofs = cdir_ofs;
-
-  if (pZip->m_total_files) {
-    mz_uint i, n;
-
-    // Read the entire central directory into a heap block, and allocate another
-    // heap block to hold the unsorted central dir file record offsets, and
-    // another to hold the sorted indices.
-    if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size,
-                              MZ_FALSE)) ||
-        (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets,
-                              pZip->m_total_files, MZ_FALSE)))
-      return MZ_FALSE;
-
-    if (sort_central_dir) {
-      if (!mz_zip_array_resize(pZip,
-                               &pZip->m_pState->m_sorted_central_dir_offsets,
-                               pZip->m_total_files, MZ_FALSE))
-        return MZ_FALSE;
-    }
-
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs,
-                      pZip->m_pState->m_central_dir.m_p,
-                      cdir_size) != cdir_size)
-      return MZ_FALSE;
-
-    // Now create an index into the central directory file records, do some
-    // basic sanity checking on each record, and check for zip64 entries (which
-    // are not yet supported).
-    p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p;
-    for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) {
-      mz_uint total_header_size, comp_size, decomp_size, disk_index;
-      if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) ||
-          (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG))
-        return MZ_FALSE;
-      MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32,
-                           i) =
-          (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p);
-      if (sort_central_dir)
-        MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets,
-                             mz_uint32, i) = i;
-      comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-      decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
-      if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) &&
-           (decomp_size != comp_size)) ||
-          (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) ||
-          (comp_size == 0xFFFFFFFF))
-        return MZ_FALSE;
-      disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS);
-      if ((disk_index != num_this_disk) && (disk_index != 1)) return MZ_FALSE;
-      if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) +
-           MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size)
-        return MZ_FALSE;
-      if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-                               MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) +
-                               MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) +
-                               MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) >
-          n)
-        return MZ_FALSE;
-      n -= total_header_size;
-      p += total_header_size;
-    }
-  }
-
-  if (sort_central_dir)
-    mz_zip_reader_sort_central_dir_offsets_by_filename(pZip);
-
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size,
-                           mz_uint32 flags) {
-  if ((!pZip) || (!pZip->m_pRead)) return MZ_FALSE;
-  if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE;
-  pZip->m_archive_size = size;
-  if (!mz_zip_reader_read_central_dir(pZip, flags)) {
-    mz_zip_reader_end(pZip);
-    return MZ_FALSE;
-  }
-  return MZ_TRUE;
-}
-
-static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs,
-                                   void *pBuf, size_t n) {
-  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
-  size_t s = (file_ofs >= pZip->m_archive_size)
-                 ? 0
-                 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n);
-  memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s);
-  return s;
-}
-
-mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem,
-                               size_t size, mz_uint32 flags) {
-  if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE;
-  pZip->m_archive_size = size;
-  pZip->m_pRead = mz_zip_mem_read_func;
-  pZip->m_pIO_opaque = pZip;
-#ifdef __cplusplus
-  pZip->m_pState->m_pMem = const_cast<void *>(pMem);
-#else
-  pZip->m_pState->m_pMem = (void *)pMem;
-#endif
-  pZip->m_pState->m_mem_size = size;
-  if (!mz_zip_reader_read_central_dir(pZip, flags)) {
-    mz_zip_reader_end(pZip);
-    return MZ_FALSE;
-  }
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_STDIO
-static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs,
-                                    void *pBuf, size_t n) {
-  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
-  mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
-  if (((mz_int64)file_ofs < 0) ||
-      (((cur_ofs != (mz_int64)file_ofs)) &&
-       (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
-    return 0;
-  return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile);
-}
-
-mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename,
-                                mz_uint32 flags) {
-  mz_uint64 file_size;
-  MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb");
-  if (!pFile) return MZ_FALSE;
-  if (MZ_FSEEK64(pFile, 0, SEEK_END)) {
-    MZ_FCLOSE(pFile);
-    return MZ_FALSE;
-  }
-  file_size = MZ_FTELL64(pFile);
-  if (!mz_zip_reader_init_internal(pZip, flags)) {
-    MZ_FCLOSE(pFile);
-    return MZ_FALSE;
-  }
-  pZip->m_pRead = mz_zip_file_read_func;
-  pZip->m_pIO_opaque = pZip;
-  pZip->m_pState->m_pFile = pFile;
-  pZip->m_archive_size = file_size;
-  if (!mz_zip_reader_read_central_dir(pZip, flags)) {
-    mz_zip_reader_end(pZip);
-    return MZ_FALSE;
-  }
-  return MZ_TRUE;
-}
-#endif  // #ifndef MINIZ_NO_STDIO
-
-mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) {
-  return pZip ? pZip->m_total_files : 0;
-}
-
-static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh(
-    mz_zip_archive *pZip, mz_uint file_index) {
-  if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    return NULL;
-  return &MZ_ZIP_ARRAY_ELEMENT(
-      &pZip->m_pState->m_central_dir, mz_uint8,
-      MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32,
-                           file_index));
-}
-
-mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip,
-                                        mz_uint file_index) {
-  mz_uint m_bit_flag;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  if (!p) return MZ_FALSE;
-  m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
-  return (m_bit_flag & 1);
-}
-
-mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip,
-                                          mz_uint file_index) {
-  mz_uint filename_len, external_attr;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  if (!p) return MZ_FALSE;
-
-  // First see if the filename ends with a '/' character.
-  filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  if (filename_len) {
-    if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/')
-      return MZ_TRUE;
-  }
-
-  // Bugfix: This code was also checking if the internal attribute was non-zero,
-  // which wasn't correct.
-  // Most/all zip writers (hopefully) set DOS file/directory attributes in the
-  // low 16-bits, so check for the DOS directory flag and ignore the source OS
-  // ID in the created by field.
-  // FIXME: Remove this check? Is it necessary - we already check the filename.
-  external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
-  if ((external_attr & 0x10) != 0) return MZ_TRUE;
-
-  return MZ_FALSE;
-}
-
-mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index,
-                                mz_zip_archive_file_stat *pStat) {
-  mz_uint n;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  if ((!p) || (!pStat)) return MZ_FALSE;
-
-  // Unpack the central directory record.
-  pStat->m_file_index = file_index;
-  pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(
-      &pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index);
-  pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS);
-  pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS);
-  pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
-  pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS);
-#ifndef MINIZ_NO_TIME
-  pStat->m_time =
-      mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS),
-                           MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS));
-#endif
-  pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS);
-  pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-  pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
-  pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS);
-  pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
-  pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
-
-  // Copy as much of the filename and comment as possible.
-  n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1);
-  memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
-  pStat->m_filename[n] = '\0';
-
-  n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-  n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1);
-  pStat->m_comment_size = n;
-  memcpy(pStat->m_comment,
-         p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-             MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) +
-             MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS),
-         n);
-  pStat->m_comment[n] = '\0';
-
-  return MZ_TRUE;
-}
-
-mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index,
-                                   char *pFilename, mz_uint filename_buf_size) {
-  mz_uint n;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  if (!p) {
-    if (filename_buf_size) pFilename[0] = '\0';
-    return 0;
-  }
-  n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  if (filename_buf_size) {
-    n = MZ_MIN(n, filename_buf_size - 1);
-    memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
-    pFilename[n] = '\0';
-  }
-  return n + 1;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA,
-                                                         const char *pB,
-                                                         mz_uint len,
-                                                         mz_uint flags) {
-  mz_uint i;
-  if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len);
-  for (i = 0; i < len; ++i)
-    if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE;
-  return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE int mz_zip_reader_filename_compare(
-    const mz_zip_array *pCentral_dir_array,
-    const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR,
-    mz_uint r_len) {
-  const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(
-                     pCentral_dir_array, mz_uint8,
-                     MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32,
-                                          l_index)),
-                 *pE;
-  mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-  mz_uint8 l = 0, r = 0;
-  pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-  pE = pL + MZ_MIN(l_len, r_len);
-  while (pL < pE) {
-    if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break;
-    pL++;
-    pR++;
-  }
-  return (pL == pE) ? (int)(l_len - r_len) : (l - r);
-}
-
-static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip,
-                                                   const char *pFilename) {
-  mz_zip_internal_state *pState = pZip->m_pState;
-  const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
-  const mz_zip_array *pCentral_dir = &pState->m_central_dir;
-  mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(
-      &pState->m_sorted_central_dir_offsets, mz_uint32, 0);
-  const int size = pZip->m_total_files;
-  const mz_uint filename_len = (mz_uint)strlen(pFilename);
-  int l = 0, h = size - 1;
-  while (l <= h) {
-    int m = (l + h) >> 1, file_index = pIndices[m],
-        comp =
-            mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets,
-                                           file_index, pFilename, filename_len);
-    if (!comp)
-      return file_index;
-    else if (comp < 0)
-      l = m + 1;
-    else
-      h = m - 1;
-  }
-  return -1;
-}
-
-int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName,
-                              const char *pComment, mz_uint flags) {
-  mz_uint file_index;
-  size_t name_len, comment_len;
-  if ((!pZip) || (!pZip->m_pState) || (!pName) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    return -1;
-  if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) &&
-      (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size))
-    return mz_zip_reader_locate_file_binary_search(pZip, pName);
-  name_len = strlen(pName);
-  if (name_len > 0xFFFF) return -1;
-  comment_len = pComment ? strlen(pComment) : 0;
-  if (comment_len > 0xFFFF) return -1;
-  for (file_index = 0; file_index < pZip->m_total_files; file_index++) {
-    const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(
-        &pZip->m_pState->m_central_dir, mz_uint8,
-        MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32,
-                             file_index));
-    mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    const char *pFilename =
-        (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-    if (filename_len < name_len) continue;
-    if (comment_len) {
-      mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS),
-              file_comment_len =
-                  MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-      const char *pFile_comment = pFilename + filename_len + file_extra_len;
-      if ((file_comment_len != comment_len) ||
-          (!mz_zip_reader_string_equal(pComment, pFile_comment,
-                                       file_comment_len, flags)))
-        continue;
-    }
-    if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) {
-      int ofs = filename_len - 1;
-      do {
-        if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') ||
-            (pFilename[ofs] == ':'))
-          break;
-      } while (--ofs >= 0);
-      ofs++;
-      pFilename += ofs;
-      filename_len -= ofs;
-    }
-    if ((filename_len == name_len) &&
-        (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags)))
-      return file_index;
-  }
-  return -1;
-}
-
-mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip,
-                                              mz_uint file_index, void *pBuf,
-                                              size_t buf_size, mz_uint flags,
-                                              void *pUser_read_buf,
-                                              size_t user_read_buf_size) {
-  int status = TINFL_STATUS_DONE;
-  mz_uint64 needed_size, cur_file_ofs, comp_remaining,
-      out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail;
-  mz_zip_archive_file_stat file_stat;
-  void *pRead_buf;
-  mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) /
-                       sizeof(mz_uint32)];
-  mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
-  tinfl_decompressor inflator;
-
-  if ((buf_size) && (!pBuf)) return MZ_FALSE;
-
-  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE;
-
-  // Empty file, or a directory (but not always a directory - I've seen odd zips
-  // with directories that have compressed data which inflates to 0 bytes)
-  if (!file_stat.m_comp_size) return MZ_TRUE;
-
-  // Entry is a subdirectory (I've seen old zips with dir entries which have
-  // compressed deflate data which inflates to 0 bytes, but these entries claim
-  // to uncompress to 512 bytes in the headers).
-  // I'm torn how to handle this case - should it fail instead?
-  if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE;
-
-  // Encryption and patch files are not supported.
-  if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE;
-
-  // This function only supports stored and deflate.
-  if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) &&
-      (file_stat.m_method != MZ_DEFLATED))
-    return MZ_FALSE;
-
-  // Ensure supplied output buffer is large enough.
-  needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size
-                                                      : file_stat.m_uncomp_size;
-  if (buf_size < needed_size) return MZ_FALSE;
-
-  // Read and parse the local directory entry.
-  cur_file_ofs = file_stat.m_local_header_ofs;
-  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header,
-                    MZ_ZIP_LOCAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-    return MZ_FALSE;
-
-  cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE +
-                  MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) +
-                  MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-  if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
-    return MZ_FALSE;
-
-  if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) {
-    // The file is stored or the caller has requested the compressed data.
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf,
-                      (size_t)needed_size) != needed_size)
-      return MZ_FALSE;
-    return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) ||
-           (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf,
-                     (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32);
-  }
-
-  // Decompress the file either directly from memory or from a file input
-  // buffer.
-  tinfl_init(&inflator);
-
-  if (pZip->m_pState->m_pMem) {
-    // Read directly from the archive in memory.
-    pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
-    read_buf_size = read_buf_avail = file_stat.m_comp_size;
-    comp_remaining = 0;
-  } else if (pUser_read_buf) {
-    // Use a user provided read buffer.
-    if (!user_read_buf_size) return MZ_FALSE;
-    pRead_buf = (mz_uint8 *)pUser_read_buf;
-    read_buf_size = user_read_buf_size;
-    read_buf_avail = 0;
-    comp_remaining = file_stat.m_comp_size;
-  } else {
-    // Temporarily allocate a read buffer.
-    read_buf_size =
-        MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE);
-#ifdef _MSC_VER
-    if (((0, sizeof(size_t) == sizeof(mz_uint32))) &&
-        (read_buf_size > 0x7FFFFFFF))
-#else
-    if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF))
-#endif
-      return MZ_FALSE;
-    if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                            (size_t)read_buf_size)))
-      return MZ_FALSE;
-    read_buf_avail = 0;
-    comp_remaining = file_stat.m_comp_size;
-  }
-
-  do {
-    size_t in_buf_size,
-        out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs);
-    if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) {
-      read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-      if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf,
-                        (size_t)read_buf_avail) != read_buf_avail) {
-        status = TINFL_STATUS_FAILED;
-        break;
-      }
-      cur_file_ofs += read_buf_avail;
-      comp_remaining -= read_buf_avail;
-      read_buf_ofs = 0;
-    }
-    in_buf_size = (size_t)read_buf_avail;
-    status = tinfl_decompress(
-        &inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size,
-        (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size,
-        TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF |
-            (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0));
-    read_buf_avail -= in_buf_size;
-    read_buf_ofs += in_buf_size;
-    out_buf_ofs += out_buf_size;
-  } while (status == TINFL_STATUS_NEEDS_MORE_INPUT);
-
-  if (status == TINFL_STATUS_DONE) {
-    // Make sure the entire file was decompressed, and check its CRC.
-    if ((out_buf_ofs != file_stat.m_uncomp_size) ||
-        (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf,
-                  (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32))
-      status = TINFL_STATUS_FAILED;
-  }
-
-  if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf))
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-
-  return status == TINFL_STATUS_DONE;
-}
-
-mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(
-    mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size,
-    mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) {
-  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
-  if (file_index < 0) return MZ_FALSE;
-  return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size,
-                                               flags, pUser_read_buf,
-                                               user_read_buf_size);
-}
-
-mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index,
-                                     void *pBuf, size_t buf_size,
-                                     mz_uint flags) {
-  return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size,
-                                               flags, NULL, 0);
-}
-
-mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip,
-                                          const char *pFilename, void *pBuf,
-                                          size_t buf_size, mz_uint flags) {
-  return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf,
-                                                    buf_size, flags, NULL, 0);
-}
-
-void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index,
-                                    size_t *pSize, mz_uint flags) {
-  mz_uint64 comp_size, uncomp_size, alloc_size;
-  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
-  void *pBuf;
-
-  if (pSize) *pSize = 0;
-  if (!p) return NULL;
-
-  comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-  uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
-
-  alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size;
-#ifdef _MSC_VER
-  if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
-#else
-  if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
-#endif
-    return NULL;
-  if (NULL ==
-      (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size)))
-    return NULL;
-
-  if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size,
-                                    flags)) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-    return NULL;
-  }
-
-  if (pSize) *pSize = (size_t)alloc_size;
-  return pBuf;
-}
-
-void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip,
-                                         const char *pFilename, size_t *pSize,
-                                         mz_uint flags) {
-  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
-  if (file_index < 0) {
-    if (pSize) *pSize = 0;
-    return MZ_FALSE;
-  }
-  return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags);
-}
-
-mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip,
-                                          mz_uint file_index,
-                                          mz_file_write_func pCallback,
-                                          void *pOpaque, mz_uint flags) {
-  int status = TINFL_STATUS_DONE;
-  mz_uint file_crc32 = MZ_CRC32_INIT;
-  mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining,
-                           out_buf_ofs = 0, cur_file_ofs;
-  mz_zip_archive_file_stat file_stat;
-  void *pRead_buf = NULL;
-  void *pWrite_buf = NULL;
-  mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) /
-                       sizeof(mz_uint32)];
-  mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
-
-  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE;
-
-  // Empty file, or a directory (but not always a directory - I've seen odd zips
-  // with directories that have compressed data which inflates to 0 bytes)
-  if (!file_stat.m_comp_size) return MZ_TRUE;
-
-  // Entry is a subdirectory (I've seen old zips with dir entries which have
-  // compressed deflate data which inflates to 0 bytes, but these entries claim
-  // to uncompress to 512 bytes in the headers).
-  // I'm torn how to handle this case - should it fail instead?
-  if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE;
-
-  // Encryption and patch files are not supported.
-  if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE;
-
-  // This function only supports stored and deflate.
-  if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) &&
-      (file_stat.m_method != MZ_DEFLATED))
-    return MZ_FALSE;
-
-  // Read and parse the local directory entry.
-  cur_file_ofs = file_stat.m_local_header_ofs;
-  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header,
-                    MZ_ZIP_LOCAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-    return MZ_FALSE;
-
-  cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE +
-                  MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) +
-                  MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-  if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
-    return MZ_FALSE;
-
-  // Decompress the file either directly from memory or from a file input
-  // buffer.
-  if (pZip->m_pState->m_pMem) {
-    pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
-    read_buf_size = read_buf_avail = file_stat.m_comp_size;
-    comp_remaining = 0;
-  } else {
-    read_buf_size =
-        MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE);
-    if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                            (size_t)read_buf_size)))
-      return MZ_FALSE;
-    read_buf_avail = 0;
-    comp_remaining = file_stat.m_comp_size;
-  }
-
-  if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) {
-    // The file is stored or the caller has requested the compressed data.
-    if (pZip->m_pState->m_pMem) {
-#ifdef _MSC_VER
-      if (((0, sizeof(size_t) == sizeof(mz_uint32))) &&
-          (file_stat.m_comp_size > 0xFFFFFFFF))
-#else
-      if (((sizeof(size_t) == sizeof(mz_uint32))) &&
-          (file_stat.m_comp_size > 0xFFFFFFFF))
-#endif
-        return MZ_FALSE;
-      if (pCallback(pOpaque, out_buf_ofs, pRead_buf,
-                    (size_t)file_stat.m_comp_size) != file_stat.m_comp_size)
-        status = TINFL_STATUS_FAILED;
-      else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-        file_crc32 =
-            (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf,
-                                (size_t)file_stat.m_comp_size);
-      cur_file_ofs += file_stat.m_comp_size;
-      out_buf_ofs += file_stat.m_comp_size;
-      comp_remaining = 0;
-    } else {
-      while (comp_remaining) {
-        read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf,
-                          (size_t)read_buf_avail) != read_buf_avail) {
-          status = TINFL_STATUS_FAILED;
-          break;
-        }
-
-        if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-          file_crc32 = (mz_uint32)mz_crc32(
-              file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail);
-
-        if (pCallback(pOpaque, out_buf_ofs, pRead_buf,
-                      (size_t)read_buf_avail) != read_buf_avail) {
-          status = TINFL_STATUS_FAILED;
-          break;
-        }
-        cur_file_ofs += read_buf_avail;
-        out_buf_ofs += read_buf_avail;
-        comp_remaining -= read_buf_avail;
-      }
-    }
-  } else {
-    tinfl_decompressor inflator;
-    tinfl_init(&inflator);
-
-    if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                             TINFL_LZ_DICT_SIZE)))
-      status = TINFL_STATUS_FAILED;
-    else {
-      do {
-        mz_uint8 *pWrite_buf_cur =
-            (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
-        size_t in_buf_size,
-            out_buf_size =
-                TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
-        if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) {
-          read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-          if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf,
-                            (size_t)read_buf_avail) != read_buf_avail) {
-            status = TINFL_STATUS_FAILED;
-            break;
-          }
-          cur_file_ofs += read_buf_avail;
-          comp_remaining -= read_buf_avail;
-          read_buf_ofs = 0;
-        }
-
-        in_buf_size = (size_t)read_buf_avail;
-        status = tinfl_decompress(
-            &inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size,
-            (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size,
-            comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0);
-        read_buf_avail -= in_buf_size;
-        read_buf_ofs += in_buf_size;
-
-        if (out_buf_size) {
-          if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) !=
-              out_buf_size) {
-            status = TINFL_STATUS_FAILED;
-            break;
-          }
-          file_crc32 =
-              (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size);
-          if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) {
-            status = TINFL_STATUS_FAILED;
-            break;
-          }
-        }
-      } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) ||
-               (status == TINFL_STATUS_HAS_MORE_OUTPUT));
-    }
-  }
-
-  if ((status == TINFL_STATUS_DONE) &&
-      (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) {
-    // Make sure the entire file was decompressed, and check its CRC.
-    if ((out_buf_ofs != file_stat.m_uncomp_size) ||
-        (file_crc32 != file_stat.m_crc32))
-      status = TINFL_STATUS_FAILED;
-  }
-
-  if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-  if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf);
-
-  return status == TINFL_STATUS_DONE;
-}
-
-mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip,
-                                               const char *pFilename,
-                                               mz_file_write_func pCallback,
-                                               void *pOpaque, mz_uint flags) {
-  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
-  if (file_index < 0) return MZ_FALSE;
-  return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque,
-                                           flags);
-}
-
-#ifndef MINIZ_NO_STDIO
-static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs,
-                                         const void *pBuf, size_t n) {
-  (void)ofs;
-  return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque);
-}
-
-mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index,
-                                      const char *pDst_filename,
-                                      mz_uint flags) {
-  mz_bool status;
-  mz_zip_archive_file_stat file_stat;
-  MZ_FILE *pFile;
-  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE;
-  pFile = MZ_FOPEN(pDst_filename, "wb");
-  if (!pFile) return MZ_FALSE;
-  status = mz_zip_reader_extract_to_callback(
-      pZip, file_index, mz_zip_file_write_callback, pFile, flags);
-  if (MZ_FCLOSE(pFile) == EOF) return MZ_FALSE;
-#ifndef MINIZ_NO_TIME
-  if (status)
-    mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time);
-#endif
-  return status;
-}
-#endif  // #ifndef MINIZ_NO_STDIO
-
-mz_bool mz_zip_reader_end(mz_zip_archive *pZip) {
-  if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    return MZ_FALSE;
-
-  if (pZip->m_pState) {
-    mz_zip_internal_state *pState = pZip->m_pState;
-    pZip->m_pState = NULL;
-    mz_zip_array_clear(pZip, &pState->m_central_dir);
-    mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
-    mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
-
-#ifndef MINIZ_NO_STDIO
-    if (pState->m_pFile) {
-      MZ_FCLOSE(pState->m_pFile);
-      pState->m_pFile = NULL;
-    }
-#endif  // #ifndef MINIZ_NO_STDIO
-
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-  }
-  pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
-
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip,
-                                           const char *pArchive_filename,
-                                           const char *pDst_filename,
-                                           mz_uint flags) {
-  int file_index =
-      mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags);
-  if (file_index < 0) return MZ_FALSE;
-  return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags);
-}
-#endif
-
-// ------------------- .ZIP archive writing
-
-#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-static void mz_write_le16(mz_uint8 *p, mz_uint16 v) {
-  p[0] = (mz_uint8)v;
-  p[1] = (mz_uint8)(v >> 8);
-}
-static void mz_write_le32(mz_uint8 *p, mz_uint32 v) {
-  p[0] = (mz_uint8)v;
-  p[1] = (mz_uint8)(v >> 8);
-  p[2] = (mz_uint8)(v >> 16);
-  p[3] = (mz_uint8)(v >> 24);
-}
-#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v))
-#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v))
-
-mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) {
-  if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
-    return MZ_FALSE;
-
-  if (pZip->m_file_offset_alignment) {
-    // Ensure user specified file offset alignment is a power of 2.
-    if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1))
-      return MZ_FALSE;
-  }
-
-  if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func;
-  if (!pZip->m_pFree) pZip->m_pFree = def_free_func;
-  if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func;
-
-  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
-  pZip->m_archive_size = existing_size;
-  pZip->m_central_directory_file_ofs = 0;
-  pZip->m_total_files = 0;
-
-  if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(
-                   pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
-    return MZ_FALSE;
-  memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir,
-                                sizeof(mz_uint8));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets,
-                                sizeof(mz_uint32));
-  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets,
-                                sizeof(mz_uint32));
-  return MZ_TRUE;
-}
-
-static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs,
-                                     const void *pBuf, size_t n) {
-  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
-  mz_zip_internal_state *pState = pZip->m_pState;
-  mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size);
-#ifdef _MSC_VER
-  if ((!n) ||
-      ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)))
-#else
-  if ((!n) ||
-      ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)))
-#endif
-    return 0;
-  if (new_size > pState->m_mem_capacity) {
-    void *pNew_block;
-    size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity);
-    while (new_capacity < new_size) new_capacity *= 2;
-    if (NULL == (pNew_block = pZip->m_pRealloc(
-                     pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity)))
-      return 0;
-    pState->m_pMem = pNew_block;
-    pState->m_mem_capacity = new_capacity;
-  }
-  memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n);
-  pState->m_mem_size = (size_t)new_size;
-  return n;
-}
-
-mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip,
-                                size_t size_to_reserve_at_beginning,
-                                size_t initial_allocation_size) {
-  pZip->m_pWrite = mz_zip_heap_write_func;
-  pZip->m_pIO_opaque = pZip;
-  if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE;
-  if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size,
-                                             size_to_reserve_at_beginning))) {
-    if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(
-                     pZip->m_pAlloc_opaque, 1, initial_allocation_size))) {
-      mz_zip_writer_end(pZip);
-      return MZ_FALSE;
-    }
-    pZip->m_pState->m_mem_capacity = initial_allocation_size;
-  }
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_STDIO
-static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs,
-                                     const void *pBuf, size_t n) {
-  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
-  mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
-  if (((mz_int64)file_ofs < 0) ||
-      (((cur_ofs != (mz_int64)file_ofs)) &&
-       (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
-    return 0;
-  return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile);
-}
-
-mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename,
-                                mz_uint64 size_to_reserve_at_beginning) {
-  MZ_FILE *pFile;
-  pZip->m_pWrite = mz_zip_file_write_func;
-  pZip->m_pIO_opaque = pZip;
-  if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE;
-  if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) {
-    mz_zip_writer_end(pZip);
-    return MZ_FALSE;
-  }
-  pZip->m_pState->m_pFile = pFile;
-  if (size_to_reserve_at_beginning) {
-    mz_uint64 cur_ofs = 0;
-    char buf[4096];
-    MZ_CLEAR_OBJ(buf);
-    do {
-      size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning);
-      if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) {
-        mz_zip_writer_end(pZip);
-        return MZ_FALSE;
-      }
-      cur_ofs += n;
-      size_to_reserve_at_beginning -= n;
-    } while (size_to_reserve_at_beginning);
-  }
-  return MZ_TRUE;
-}
-#endif  // #ifndef MINIZ_NO_STDIO
-
-mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip,
-                                       const char *pFilename) {
-  mz_zip_internal_state *pState;
-  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    return MZ_FALSE;
-  // No sense in trying to write to an archive that's already at the support max
-  // size
-  if ((pZip->m_total_files == 0xFFFF) ||
-      ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-        MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  pState = pZip->m_pState;
-
-  if (pState->m_pFile) {
-#ifdef MINIZ_NO_STDIO
-    pFilename;
-    return MZ_FALSE;
-#else
-    // Archive is being read from stdio - try to reopen as writable.
-    if (pZip->m_pIO_opaque != pZip) return MZ_FALSE;
-    if (!pFilename) return MZ_FALSE;
-    pZip->m_pWrite = mz_zip_file_write_func;
-    if (NULL ==
-        (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) {
-      // The mz_zip_archive is now in a bogus state because pState->m_pFile is
-      // NULL, so just close it.
-      mz_zip_reader_end(pZip);
-      return MZ_FALSE;
-    }
-#endif  // #ifdef MINIZ_NO_STDIO
-  } else if (pState->m_pMem) {
-    // Archive lives in a memory block. Assume it's from the heap that we can
-    // resize using the realloc callback.
-    if (pZip->m_pIO_opaque != pZip) return MZ_FALSE;
-    pState->m_mem_capacity = pState->m_mem_size;
-    pZip->m_pWrite = mz_zip_heap_write_func;
-  }
-  // Archive is being read via a user provided read function - make sure the
-  // user has specified a write function too.
-  else if (!pZip->m_pWrite)
-    return MZ_FALSE;
-
-  // Start writing new files at the archive's current central directory
-  // location.
-  pZip->m_archive_size = pZip->m_central_directory_file_ofs;
-  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
-  pZip->m_central_directory_file_ofs = 0;
-
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name,
-                              const void *pBuf, size_t buf_size,
-                              mz_uint level_and_flags) {
-  return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0,
-                                  level_and_flags, 0, 0);
-}
-
-typedef struct {
-  mz_zip_archive *m_pZip;
-  mz_uint64 m_cur_archive_file_ofs;
-  mz_uint64 m_comp_size;
-} mz_zip_writer_add_state;
-
-static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len,
-                                                  void *pUser) {
-  mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser;
-  if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque,
-                                    pState->m_cur_archive_file_ofs, pBuf,
-                                    len) != len)
-    return MZ_FALSE;
-  pState->m_cur_archive_file_ofs += len;
-  pState->m_comp_size += len;
-  return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_create_local_dir_header(
-    mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size,
-    mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size,
-    mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags,
-    mz_uint16 dos_time, mz_uint16 dos_date) {
-  (void)pZip;
-  memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size);
-  return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_create_central_dir_header(
-    mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size,
-    mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size,
-    mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method,
-    mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date,
-    mz_uint64 local_header_ofs, mz_uint32 ext_attributes) {
-  (void)pZip;
-  memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size);
-  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes);
-  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs);
-  return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_add_to_central_dir(
-    mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size,
-    const void *pExtra, mz_uint16 extra_size, const void *pComment,
-    mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size,
-    mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags,
-    mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs,
-    mz_uint32 ext_attributes) {
-  mz_zip_internal_state *pState = pZip->m_pState;
-  mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size;
-  size_t orig_central_dir_size = pState->m_central_dir.m_size;
-  mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
-
-  // No zip64 support yet
-  if ((local_header_ofs > 0xFFFFFFFF) ||
-      (((mz_uint64)pState->m_central_dir.m_size +
-        MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size +
-        comment_size) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_create_central_dir_header(
-          pZip, central_dir_header, filename_size, extra_size, comment_size,
-          uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time,
-          dos_date, local_header_ofs, ext_attributes))
-    return MZ_FALSE;
-
-  if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header,
-                               MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) ||
-      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename,
-                               filename_size)) ||
-      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra,
-                               extra_size)) ||
-      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment,
-                               comment_size)) ||
-      (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets,
-                               &central_dir_ofs, 1))) {
-    // Try to push the central directory array back into its original state.
-    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size,
-                        MZ_FALSE);
-    return MZ_FALSE;
-  }
-
-  return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) {
-  // Basic ZIP archive filename validity checks: Valid filenames cannot start
-  // with a forward slash, cannot contain a drive letter, and cannot use
-  // DOS-style backward slashes.
-  if (*pArchive_name == '/') return MZ_FALSE;
-  while (*pArchive_name) {
-    if ((*pArchive_name == '\\') || (*pArchive_name == ':')) return MZ_FALSE;
-    pArchive_name++;
-  }
-  return MZ_TRUE;
-}
-
-static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(
-    mz_zip_archive *pZip) {
-  mz_uint32 n;
-  if (!pZip->m_file_offset_alignment) return 0;
-  n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1));
-  return (pZip->m_file_offset_alignment - n) &
-         (pZip->m_file_offset_alignment - 1);
-}
-
-static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip,
-                                         mz_uint64 cur_file_ofs, mz_uint32 n) {
-  char buf[4096];
-  memset(buf, 0, MZ_MIN(sizeof(buf), n));
-  while (n) {
-    mz_uint32 s = MZ_MIN(sizeof(buf), n);
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s)
-      return MZ_FALSE;
-    cur_file_ofs += s;
-    n -= s;
-  }
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip,
-                                 const char *pArchive_name, const void *pBuf,
-                                 size_t buf_size, const void *pComment,
-                                 mz_uint16 comment_size,
-                                 mz_uint level_and_flags, mz_uint64 uncomp_size,
-                                 mz_uint32 uncomp_crc32) {
-  mz_uint16 method = 0, dos_time = 0, dos_date = 0;
-  mz_uint level, ext_attributes = 0, num_alignment_padding_bytes;
-  mz_uint64 local_dir_header_ofs = pZip->m_archive_size,
-            cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0;
-  size_t archive_name_size;
-  mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
-  tdefl_compressor *pComp = NULL;
-  mz_bool store_data_uncompressed;
-  mz_zip_internal_state *pState;
-
-  if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL;
-  level = level_and_flags & 0xF;
-  store_data_uncompressed =
-      ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA));
-
-  if ((!pZip) || (!pZip->m_pState) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) ||
-      (!pArchive_name) || ((comment_size) && (!pComment)) ||
-      (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION))
-    return MZ_FALSE;
-
-  pState = pZip->m_pState;
-
-  if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size))
-    return MZ_FALSE;
-  // No zip64 support yet
-  if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) return MZ_FALSE;
-  if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE;
-
-#ifndef MINIZ_NO_TIME
-  {
-    time_t cur_time;
-    time(&cur_time);
-    mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date);
-  }
-#endif  // #ifndef MINIZ_NO_TIME
-
-  archive_name_size = strlen(pArchive_name);
-  if (archive_name_size > 0xFFFF) return MZ_FALSE;
-
-  num_alignment_padding_bytes =
-      mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-  // no zip64 support yet
-  if ((pZip->m_total_files == 0xFFFF) ||
-      ((pZip->m_archive_size + num_alignment_padding_bytes +
-        MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-        comment_size + archive_name_size) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) {
-    // Set DOS Subdirectory attribute bit.
-    ext_attributes |= 0x10;
-    // Subdirectories cannot contain data.
-    if ((buf_size) || (uncomp_size)) return MZ_FALSE;
-  }
-
-  // Try to do any allocations before writing to the archive, so if an
-  // allocation fails the file remains unmodified. (A good idea if we're doing
-  // an in-place modification.)
-  if ((!mz_zip_array_ensure_room(
-          pZip, &pState->m_central_dir,
-          MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) ||
-      (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1)))
-    return MZ_FALSE;
-
-  if ((!store_data_uncompressed) && (buf_size)) {
-    if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(
-                     pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor))))
-      return MZ_FALSE;
-  }
-
-  if (!mz_zip_writer_write_zeros(
-          pZip, cur_archive_file_ofs,
-          num_alignment_padding_bytes + sizeof(local_dir_header))) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-    return MZ_FALSE;
-  }
-  local_dir_header_ofs += num_alignment_padding_bytes;
-  if (pZip->m_file_offset_alignment) {
-    MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) ==
-              0);
-  }
-  cur_archive_file_ofs +=
-      num_alignment_padding_bytes + sizeof(local_dir_header);
-
-  MZ_CLEAR_OBJ(local_dir_header);
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name,
-                     archive_name_size) != archive_name_size) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-    return MZ_FALSE;
-  }
-  cur_archive_file_ofs += archive_name_size;
-
-  if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) {
-    uncomp_crc32 =
-        (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size);
-    uncomp_size = buf_size;
-    if (uncomp_size <= 3) {
-      level = 0;
-      store_data_uncompressed = MZ_TRUE;
-    }
-  }
-
-  if (store_data_uncompressed) {
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf,
-                       buf_size) != buf_size) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-      return MZ_FALSE;
-    }
-
-    cur_archive_file_ofs += buf_size;
-    comp_size = buf_size;
-
-    if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) method = MZ_DEFLATED;
-  } else if (buf_size) {
-    mz_zip_writer_add_state state;
-
-    state.m_pZip = pZip;
-    state.m_cur_archive_file_ofs = cur_archive_file_ofs;
-    state.m_comp_size = 0;
-
-    if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state,
-                    tdefl_create_comp_flags_from_zip_params(
-                        level, -15, MZ_DEFAULT_STRATEGY)) !=
-         TDEFL_STATUS_OKAY) ||
-        (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) !=
-         TDEFL_STATUS_DONE)) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-      return MZ_FALSE;
-    }
-
-    comp_size = state.m_comp_size;
-    cur_archive_file_ofs = state.m_cur_archive_file_ofs;
-
-    method = MZ_DEFLATED;
-  }
-
-  pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-  pComp = NULL;
-
-  // no zip64 support yet
-  if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_create_local_dir_header(
-          pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size,
-          comp_size, uncomp_crc32, method, 0, dos_time, dos_date))
-    return MZ_FALSE;
-
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header,
-                     sizeof(local_dir_header)) != sizeof(local_dir_header))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_add_to_central_dir(
-          pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment,
-          comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0,
-          dos_time, dos_date, local_dir_header_ofs, ext_attributes))
-    return MZ_FALSE;
-
-  pZip->m_total_files++;
-  pZip->m_archive_size = cur_archive_file_ofs;
-
-  return MZ_TRUE;
-}
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name,
-                               const char *pSrc_filename, const void *pComment,
-                               mz_uint16 comment_size,
-                               mz_uint level_and_flags) {
-  mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes;
-  mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0;
-  mz_uint64 local_dir_header_ofs = pZip->m_archive_size,
-            cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0,
-            comp_size = 0;
-  size_t archive_name_size;
-  mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
-  MZ_FILE *pSrc_file = NULL;
-
-  if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL;
-  level = level_and_flags & 0xF;
-
-  if ((!pZip) || (!pZip->m_pState) ||
-      (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) ||
-      ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION))
-    return MZ_FALSE;
-  if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return MZ_FALSE;
-  if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE;
-
-  archive_name_size = strlen(pArchive_name);
-  if (archive_name_size > 0xFFFF) return MZ_FALSE;
-
-  num_alignment_padding_bytes =
-      mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-  // no zip64 support yet
-  if ((pZip->m_total_files == 0xFFFF) ||
-      ((pZip->m_archive_size + num_alignment_padding_bytes +
-        MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE +
-        comment_size + archive_name_size) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date))
-    return MZ_FALSE;
-
-  pSrc_file = MZ_FOPEN(pSrc_filename, "rb");
-  if (!pSrc_file) return MZ_FALSE;
-  MZ_FSEEK64(pSrc_file, 0, SEEK_END);
-  uncomp_size = MZ_FTELL64(pSrc_file);
-  MZ_FSEEK64(pSrc_file, 0, SEEK_SET);
-
-  if (uncomp_size > 0xFFFFFFFF) {
-    // No zip64 support yet
-    MZ_FCLOSE(pSrc_file);
-    return MZ_FALSE;
-  }
-  if (uncomp_size <= 3) level = 0;
-
-  if (!mz_zip_writer_write_zeros(
-          pZip, cur_archive_file_ofs,
-          num_alignment_padding_bytes + sizeof(local_dir_header))) {
-    MZ_FCLOSE(pSrc_file);
-    return MZ_FALSE;
-  }
-  local_dir_header_ofs += num_alignment_padding_bytes;
-  if (pZip->m_file_offset_alignment) {
-    MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) ==
-              0);
-  }
-  cur_archive_file_ofs +=
-      num_alignment_padding_bytes + sizeof(local_dir_header);
-
-  MZ_CLEAR_OBJ(local_dir_header);
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name,
-                     archive_name_size) != archive_name_size) {
-    MZ_FCLOSE(pSrc_file);
-    return MZ_FALSE;
-  }
-  cur_archive_file_ofs += archive_name_size;
-
-  if (uncomp_size) {
-    mz_uint64 uncomp_remaining = uncomp_size;
-    void *pRead_buf =
-        pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE);
-    if (!pRead_buf) {
-      MZ_FCLOSE(pSrc_file);
-      return MZ_FALSE;
-    }
-
-    if (!level) {
-      while (uncomp_remaining) {
-        mz_uint n =
-            (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining);
-        if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) ||
-            (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf,
-                            n) != n)) {
-          pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-          MZ_FCLOSE(pSrc_file);
-          return MZ_FALSE;
-        }
-        uncomp_crc32 =
-            (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n);
-        uncomp_remaining -= n;
-        cur_archive_file_ofs += n;
-      }
-      comp_size = uncomp_size;
-    } else {
-      mz_bool result = MZ_FALSE;
-      mz_zip_writer_add_state state;
-      tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(
-          pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor));
-      if (!pComp) {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-        MZ_FCLOSE(pSrc_file);
-        return MZ_FALSE;
-      }
-
-      state.m_pZip = pZip;
-      state.m_cur_archive_file_ofs = cur_archive_file_ofs;
-      state.m_comp_size = 0;
-
-      if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state,
-                     tdefl_create_comp_flags_from_zip_params(
-                         level, -15, MZ_DEFAULT_STRATEGY)) !=
-          TDEFL_STATUS_OKAY) {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-        MZ_FCLOSE(pSrc_file);
-        return MZ_FALSE;
-      }
-
-      for (;;) {
-        size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining,
-                                               (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE);
-        tdefl_status status;
-
-        if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size)
-          break;
-
-        uncomp_crc32 = (mz_uint32)mz_crc32(
-            uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size);
-        uncomp_remaining -= in_buf_size;
-
-        status = tdefl_compress_buffer(
-            pComp, pRead_buf, in_buf_size,
-            uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH);
-        if (status == TDEFL_STATUS_DONE) {
-          result = MZ_TRUE;
-          break;
-        } else if (status != TDEFL_STATUS_OKAY)
-          break;
-      }
-
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-
-      if (!result) {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-        MZ_FCLOSE(pSrc_file);
-        return MZ_FALSE;
-      }
-
-      comp_size = state.m_comp_size;
-      cur_archive_file_ofs = state.m_cur_archive_file_ofs;
-
-      method = MZ_DEFLATED;
-    }
-
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-  }
-
-  MZ_FCLOSE(pSrc_file);
-  pSrc_file = NULL;
-
-  // no zip64 support yet
-  if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_create_local_dir_header(
-          pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size,
-          comp_size, uncomp_crc32, method, 0, dos_time, dos_date))
-    return MZ_FALSE;
-
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header,
-                     sizeof(local_dir_header)) != sizeof(local_dir_header))
-    return MZ_FALSE;
-
-  if (!mz_zip_writer_add_to_central_dir(
-          pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment,
-          comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0,
-          dos_time, dos_date, local_dir_header_ofs, ext_attributes))
-    return MZ_FALSE;
-
-  pZip->m_total_files++;
-  pZip->m_archive_size = cur_archive_file_ofs;
-
-  return MZ_TRUE;
-}
-#endif  // #ifndef MINIZ_NO_STDIO
-
-mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip,
-                                          mz_zip_archive *pSource_zip,
-                                          mz_uint file_index) {
-  mz_uint n, bit_flags, num_alignment_padding_bytes;
-  mz_uint64 comp_bytes_remaining, local_dir_header_ofs;
-  mz_uint64 cur_src_file_ofs, cur_dst_file_ofs;
-  mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) /
-                       sizeof(mz_uint32)];
-  mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
-  mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
-  size_t orig_central_dir_size;
-  mz_zip_internal_state *pState;
-  void *pBuf;
-  const mz_uint8 *pSrc_central_header;
-
-  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
-    return MZ_FALSE;
-  if (NULL ==
-      (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index)))
-    return MZ_FALSE;
-  pState = pZip->m_pState;
-
-  num_alignment_padding_bytes =
-      mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-  // no zip64 support yet
-  if ((pZip->m_total_files == 0xFFFF) ||
-      ((pZip->m_archive_size + num_alignment_padding_bytes +
-        MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) >
-       0xFFFFFFFF))
-    return MZ_FALSE;
-
-  cur_src_file_ofs =
-      MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
-  cur_dst_file_ofs = pZip->m_archive_size;
-
-  if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs,
-                           pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-    return MZ_FALSE;
-  cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
-
-  if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs,
-                                 num_alignment_padding_bytes))
-    return MZ_FALSE;
-  cur_dst_file_ofs += num_alignment_padding_bytes;
-  local_dir_header_ofs = cur_dst_file_ofs;
-  if (pZip->m_file_offset_alignment) {
-    MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) ==
-              0);
-  }
-
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header,
-                     MZ_ZIP_LOCAL_DIR_HEADER_SIZE) !=
-      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    return MZ_FALSE;
-  cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
-
-  n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) +
-      MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-  comp_bytes_remaining =
-      n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-
-  if (NULL == (pBuf = pZip->m_pAlloc(
-                   pZip->m_pAlloc_opaque, 1,
-                   (size_t)MZ_MAX(sizeof(mz_uint32) * 4,
-                                  MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE,
-                                         comp_bytes_remaining)))))
-    return MZ_FALSE;
-
-  while (comp_bytes_remaining) {
-    n = (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining);
-    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf,
-                             n) != n) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-      return MZ_FALSE;
-    }
-    cur_src_file_ofs += n;
-
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-      return MZ_FALSE;
-    }
-    cur_dst_file_ofs += n;
-
-    comp_bytes_remaining -= n;
-  }
-
-  bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS);
-  if (bit_flags & 8) {
-    // Copy data descriptor
-    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf,
-                             sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-      return MZ_FALSE;
-    }
-
-    n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3);
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) {
-      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-      return MZ_FALSE;
-    }
-
-    cur_src_file_ofs += n;
-    cur_dst_file_ofs += n;
-  }
-  pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-
-  // no zip64 support yet
-  if (cur_dst_file_ofs > 0xFFFFFFFF) return MZ_FALSE;
-
-  orig_central_dir_size = pState->m_central_dir.m_size;
-
-  memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
-  MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS,
-                local_dir_header_ofs);
-  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header,
-                              MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
-    return MZ_FALSE;
-
-  n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) +
-      MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) +
-      MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-  if (!mz_zip_array_push_back(
-          pZip, &pState->m_central_dir,
-          pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) {
-    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size,
-                        MZ_FALSE);
-    return MZ_FALSE;
-  }
-
-  if (pState->m_central_dir.m_size > 0xFFFFFFFF) return MZ_FALSE;
-  n = (mz_uint32)orig_central_dir_size;
-  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) {
-    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size,
-                        MZ_FALSE);
-    return MZ_FALSE;
-  }
-
-  pZip->m_total_files++;
-  pZip->m_archive_size = cur_dst_file_ofs;
-
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) {
-  mz_zip_internal_state *pState;
-  mz_uint64 central_dir_ofs, central_dir_size;
-  mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE];
-
-  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
-    return MZ_FALSE;
-
-  pState = pZip->m_pState;
-
-  // no zip64 support yet
-  if ((pZip->m_total_files > 0xFFFF) ||
-      ((pZip->m_archive_size + pState->m_central_dir.m_size +
-        MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
-    return MZ_FALSE;
-
-  central_dir_ofs = 0;
-  central_dir_size = 0;
-  if (pZip->m_total_files) {
-    // Write central directory
-    central_dir_ofs = pZip->m_archive_size;
-    central_dir_size = pState->m_central_dir.m_size;
-    pZip->m_central_directory_file_ofs = central_dir_ofs;
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs,
-                       pState->m_central_dir.m_p,
-                       (size_t)central_dir_size) != central_dir_size)
-      return MZ_FALSE;
-    pZip->m_archive_size += central_dir_size;
-  }
-
-  // Write end of central directory record
-  MZ_CLEAR_OBJ(hdr);
-  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS,
-                MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG);
-  MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS,
-                pZip->m_total_files);
-  MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files);
-  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size);
-  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs);
-
-  if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr,
-                     sizeof(hdr)) != sizeof(hdr))
-    return MZ_FALSE;
-#ifndef MINIZ_NO_STDIO
-  if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return MZ_FALSE;
-#endif  // #ifndef MINIZ_NO_STDIO
-
-  pZip->m_archive_size += sizeof(hdr);
-
-  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED;
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf,
-                                            size_t *pSize) {
-  if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) return MZ_FALSE;
-  if (pZip->m_pWrite != mz_zip_heap_write_func) return MZ_FALSE;
-  if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE;
-
-  *pBuf = pZip->m_pState->m_pMem;
-  *pSize = pZip->m_pState->m_mem_size;
-  pZip->m_pState->m_pMem = NULL;
-  pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0;
-  return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_end(mz_zip_archive *pZip) {
-  mz_zip_internal_state *pState;
-  mz_bool status = MZ_TRUE;
-  if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) ||
-      ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) &&
-       (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)))
-    return MZ_FALSE;
-
-  pState = pZip->m_pState;
-  pZip->m_pState = NULL;
-  mz_zip_array_clear(pZip, &pState->m_central_dir);
-  mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
-  mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
-
-#ifndef MINIZ_NO_STDIO
-  if (pState->m_pFile) {
-    MZ_FCLOSE(pState->m_pFile);
-    pState->m_pFile = NULL;
-  }
-#endif  // #ifndef MINIZ_NO_STDIO
-
-  if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem);
-    pState->m_pMem = NULL;
-  }
-
-  pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-  pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
-  return status;
-}
-
-#ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_add_mem_to_archive_file_in_place(
-    const char *pZip_filename, const char *pArchive_name, const void *pBuf,
-    size_t buf_size, const void *pComment, mz_uint16 comment_size,
-    mz_uint level_and_flags) {
-  mz_bool status, created_new_archive = MZ_FALSE;
-  mz_zip_archive zip_archive;
-  struct MZ_FILE_STAT_STRUCT file_stat;
-  MZ_CLEAR_OBJ(zip_archive);
-  if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL;
-  if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) ||
-      ((comment_size) && (!pComment)) ||
-      ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION))
-    return MZ_FALSE;
-  if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE;
-  if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) {
-    // Create a new archive.
-    if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0))
-      return MZ_FALSE;
-    created_new_archive = MZ_TRUE;
-  } else {
-    // Append to an existing archive.
-    if (!mz_zip_reader_init_file(
-            &zip_archive, pZip_filename,
-            level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
-      return MZ_FALSE;
-    if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) {
-      mz_zip_reader_end(&zip_archive);
-      return MZ_FALSE;
-    }
-  }
-  status =
-      mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size,
-                               pComment, comment_size, level_and_flags, 0, 0);
-  // Always finalize, even if adding failed for some reason, so we have a valid
-  // central directory. (This may not always succeed, but we can try.)
-  if (!mz_zip_writer_finalize_archive(&zip_archive)) status = MZ_FALSE;
-  if (!mz_zip_writer_end(&zip_archive)) status = MZ_FALSE;
-  if ((!status) && (created_new_archive)) {
-    // It's a new archive and something went wrong, so just delete it.
-    int ignoredStatus = MZ_DELETE_FILE(pZip_filename);
-    (void)ignoredStatus;
-  }
-  return status;
-}
-
-void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename,
-                                          const char *pArchive_name,
-                                          size_t *pSize, mz_uint flags) {
-  int file_index;
-  mz_zip_archive zip_archive;
-  void *p = NULL;
-
-  if (pSize) *pSize = 0;
-
-  if ((!pZip_filename) || (!pArchive_name)) return NULL;
-
-  MZ_CLEAR_OBJ(zip_archive);
-  if (!mz_zip_reader_init_file(
-          &zip_archive, pZip_filename,
-          flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
-    return NULL;
-
-  if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL,
-                                              flags)) >= 0)
-    p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags);
-
-  mz_zip_reader_end(&zip_archive);
-  return p;
-}
-
-#endif  // #ifndef MINIZ_NO_STDIO
-
-#endif  // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-#endif  // #ifndef MINIZ_NO_ARCHIVE_APIS
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINIZ_HEADER_FILE_ONLY
-
-/*
-  This is free and unencumbered software released into the public domain.
-
-  Anyone is free to copy, modify, publish, use, compile, sell, or
-  distribute this software, either in source code form or as a compiled
-  binary, for any purpose, commercial or non-commercial, and by any
-  means.
-
-  In jurisdictions that recognize copyright laws, the author or authors
-  of this software dedicate any and all copyright interest in the
-  software to the public domain. We make this dedication for the benefit
-  of the public at large and to the detriment of our heirs and
-  successors. We intend this dedication to be an overt act of
-  relinquishment in perpetuity of all present and future rights to this
-  software under copyright law.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-  OTHER DEALINGS IN THE SOFTWARE.
-
-  For more information, please refer to <http://unlicense.org/>
-*/
-
-// ---------------------- end of miniz ----------------------------------------
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-}  // namespace miniz
-#else
-
-// Reuse MINIZ_LITTE_ENDIAN macro
-
-#if defined(__sparcv9)
-// Big endian
-#else
-#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
-// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
-#define MINIZ_LITTLE_ENDIAN 1
-#endif
-#endif
-
-#endif  // TINYEXR_USE_MINIZ
-
-// static bool IsBigEndian(void) {
-//  union {
-//    unsigned int i;
-//    char c[4];
-//  } bint = {0x01020304};
-//
-//  return bint.c[0] == 1;
-//}
-
-static void SetErrorMessage(const std::string &msg, const char **err) {
-  if (err) {
-#ifdef _WIN32
-    (*err) = _strdup(msg.c_str());
-#else
-    (*err) = strdup(msg.c_str());
-#endif
-  }
-}
-
-static const int kEXRVersionSize = 8;
-
-static void cpy2(unsigned short *dst_val, const unsigned short *src_val) {
-  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
-  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
-
-  dst[0] = src[0];
-  dst[1] = src[1];
-}
-
-static void swap2(unsigned short *val) {
-#ifdef MINIZ_LITTLE_ENDIAN
-  (void)val;
-#else
-  unsigned short tmp = *val;
-  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
-  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
-
-  dst[0] = src[1];
-  dst[1] = src[0];
-#endif
-}
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-function"
-#endif
-
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-function"
-#endif
-static void cpy4(int *dst_val, const int *src_val) {
-  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
-  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
-
-  dst[0] = src[0];
-  dst[1] = src[1];
-  dst[2] = src[2];
-  dst[3] = src[3];
-}
-
-static void cpy4(unsigned int *dst_val, const unsigned int *src_val) {
-  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
-  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
-
-  dst[0] = src[0];
-  dst[1] = src[1];
-  dst[2] = src[2];
-  dst[3] = src[3];
-}
-
-static void cpy4(float *dst_val, const float *src_val) {
-  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
-  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
-
-  dst[0] = src[0];
-  dst[1] = src[1];
-  dst[2] = src[2];
-  dst[3] = src[3];
-}
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-static void swap4(unsigned int *val) {
-#ifdef MINIZ_LITTLE_ENDIAN
-  (void)val;
-#else
-  unsigned int tmp = *val;
-  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
-  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
-
-  dst[0] = src[3];
-  dst[1] = src[2];
-  dst[2] = src[1];
-  dst[3] = src[0];
-#endif
-}
-
-#if 0
-static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) {
-  unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
-  const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
-
-  dst[0] = src[0];
-  dst[1] = src[1];
-  dst[2] = src[2];
-  dst[3] = src[3];
-  dst[4] = src[4];
-  dst[5] = src[5];
-  dst[6] = src[6];
-  dst[7] = src[7];
-}
-#endif
-
-static void swap8(tinyexr::tinyexr_uint64 *val) {
-#ifdef MINIZ_LITTLE_ENDIAN
-  (void)val;
-#else
-  tinyexr::tinyexr_uint64 tmp = (*val);
-  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
-  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
-
-  dst[0] = src[7];
-  dst[1] = src[6];
-  dst[2] = src[5];
-  dst[3] = src[4];
-  dst[4] = src[3];
-  dst[5] = src[2];
-  dst[6] = src[1];
-  dst[7] = src[0];
-#endif
-}
-
-// https://gist.github.com/rygorous/2156668
-// Reuse MINIZ_LITTLE_ENDIAN flag from miniz.
-union FP32 {
-  unsigned int u;
-  float f;
-  struct {
-#if MINIZ_LITTLE_ENDIAN
-    unsigned int Mantissa : 23;
-    unsigned int Exponent : 8;
-    unsigned int Sign : 1;
-#else
-    unsigned int Sign : 1;
-    unsigned int Exponent : 8;
-    unsigned int Mantissa : 23;
-#endif
-  } s;
-};
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wpadded"
-#endif
-
-union FP16 {
-  unsigned short u;
-  struct {
-#if MINIZ_LITTLE_ENDIAN
-    unsigned int Mantissa : 10;
-    unsigned int Exponent : 5;
-    unsigned int Sign : 1;
-#else
-    unsigned int Sign : 1;
-    unsigned int Exponent : 5;
-    unsigned int Mantissa : 10;
-#endif
-  } s;
-};
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-static FP32 half_to_float(FP16 h) {
-  static const FP32 magic = {113 << 23};
-  static const unsigned int shifted_exp = 0x7c00
-                                          << 13;  // exponent mask after shift
-  FP32 o;
-
-  o.u = (h.u & 0x7fffU) << 13U;           // exponent/mantissa bits
-  unsigned int exp_ = shifted_exp & o.u;  // just the exponent
-  o.u += (127 - 15) << 23;                // exponent adjust
-
-  // handle exponent special cases
-  if (exp_ == shifted_exp)    // Inf/NaN?
-    o.u += (128 - 16) << 23;  // extra exp adjust
-  else if (exp_ == 0)         // Zero/Denormal?
-  {
-    o.u += 1 << 23;  // extra exp adjust
-    o.f -= magic.f;  // renormalize
-  }
-
-  o.u |= (h.u & 0x8000U) << 16U;  // sign bit
-  return o;
-}
-
-static FP16 float_to_half_full(FP32 f) {
-  FP16 o = {0};
-
-  // Based on ISPC reference code (with minor modifications)
-  if (f.s.Exponent == 0)  // Signed zero/denormal (which will underflow)
-    o.s.Exponent = 0;
-  else if (f.s.Exponent == 255)  // Inf or NaN (all exponent bits set)
-  {
-    o.s.Exponent = 31;
-    o.s.Mantissa = f.s.Mantissa ? 0x200 : 0;  // NaN->qNaN and Inf->Inf
-  } else                                      // Normalized number
-  {
-    // Exponent unbias the single, then bias the halfp
-    int newexp = f.s.Exponent - 127 + 15;
-    if (newexp >= 31)  // Overflow, return signed infinity
-      o.s.Exponent = 31;
-    else if (newexp <= 0)  // Underflow
-    {
-      if ((14 - newexp) <= 24)  // Mantissa might be non-zero
-      {
-        unsigned int mant = f.s.Mantissa | 0x800000;  // Hidden 1 bit
-        o.s.Mantissa = mant >> (14 - newexp);
-        if ((mant >> (13 - newexp)) & 1)  // Check for rounding
-          o.u++;  // Round, might overflow into exp bit, but this is OK
-      }
-    } else {
-      o.s.Exponent = static_cast<unsigned int>(newexp);
-      o.s.Mantissa = f.s.Mantissa >> 13;
-      if (f.s.Mantissa & 0x1000)  // Check for rounding
-        o.u++;                    // Round, might overflow to inf, this is OK
-    }
-  }
-
-  o.s.Sign = f.s.Sign;
-  return o;
-}
-
-// NOTE: From OpenEXR code
-// #define IMF_INCREASING_Y  0
-// #define IMF_DECREASING_Y  1
-// #define IMF_RAMDOM_Y    2
-//
-// #define IMF_NO_COMPRESSION  0
-// #define IMF_RLE_COMPRESSION 1
-// #define IMF_ZIPS_COMPRESSION  2
-// #define IMF_ZIP_COMPRESSION 3
-// #define IMF_PIZ_COMPRESSION 4
-// #define IMF_PXR24_COMPRESSION 5
-// #define IMF_B44_COMPRESSION 6
-// #define IMF_B44A_COMPRESSION  7
-
-#ifdef __clang__
-#pragma clang diagnostic push
-
-#if __has_warning("-Wzero-as-null-pointer-constant")
-#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
-#endif
-
-#endif
-
-static const char *ReadString(std::string *s, const char *ptr, size_t len) {
-  // Read untile NULL(\0).
-  const char *p = ptr;
-  const char *q = ptr;
-  while ((size_t(q - ptr) < len) && (*q) != 0) {
-    q++;
-  }
-
-  if (size_t(q - ptr) >= len) {
-    (*s) = std::string();
-    return NULL;
-  }
-
-  (*s) = std::string(p, q);
-
-  return q + 1;  // skip '\0'
-}
-
-static bool ReadAttribute(std::string *name, std::string *type,
-                          std::vector<unsigned char> *data, size_t *marker_size,
-                          const char *marker, size_t size) {
-  size_t name_len = strnlen(marker, size);
-  if (name_len == size) {
-    // String does not have a terminating character.
-    return false;
-  }
-  *name = std::string(marker, name_len);
-
-  marker += name_len + 1;
-  size -= name_len + 1;
-
-  size_t type_len = strnlen(marker, size);
-  if (type_len == size) {
-    return false;
-  }
-  *type = std::string(marker, type_len);
-
-  marker += type_len + 1;
-  size -= type_len + 1;
-
-  if (size < sizeof(uint32_t)) {
-    return false;
-  }
-
-  uint32_t data_len;
-  memcpy(&data_len, marker, sizeof(uint32_t));
-  tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
-
-  if (data_len == 0) {
-    if ((*type).compare("string") == 0) {
-      // Accept empty string attribute.
-
-      marker += sizeof(uint32_t);
-      size -= sizeof(uint32_t);
-
-      *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t);
-
-      data->resize(1);
-      (*data)[0] = '\0';
-
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  marker += sizeof(uint32_t);
-  size -= sizeof(uint32_t);
-
-  if (size < data_len) {
-    return false;
-  }
-
-  data->resize(static_cast<size_t>(data_len));
-  memcpy(&data->at(0), marker, static_cast<size_t>(data_len));
-
-  *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len;
-  return true;
-}
-
-static void WriteAttributeToMemory(std::vector<unsigned char> *out,
-                                   const char *name, const char *type,
-                                   const unsigned char *data, int len) {
-  out->insert(out->end(), name, name + strlen(name) + 1);
-  out->insert(out->end(), type, type + strlen(type) + 1);
-
-  int outLen = len;
-  tinyexr::swap4(reinterpret_cast<unsigned int *>(&outLen));
-  out->insert(out->end(), reinterpret_cast<unsigned char *>(&outLen),
-              reinterpret_cast<unsigned char *>(&outLen) + sizeof(int));
-  out->insert(out->end(), data, data + len);
-}
-
-typedef struct {
-  std::string name;  // less than 255 bytes long
-  int pixel_type;
-  int x_sampling;
-  int y_sampling;
-  unsigned char p_linear;
-  unsigned char pad[3];
-} ChannelInfo;
-
-typedef struct HeaderInfo {
-  std::vector<tinyexr::ChannelInfo> channels;
-  std::vector<EXRAttribute> attributes;
-
-  int data_window[4];
-  int line_order;
-  int display_window[4];
-  float screen_window_center[2];
-  float screen_window_width;
-  float pixel_aspect_ratio;
-
-  int chunk_count;
-
-  // Tiled format
-  int tile_size_x;
-  int tile_size_y;
-  int tile_level_mode;
-  int tile_rounding_mode;
-
-  unsigned int header_len;
-
-  int compression_type;
-
-  void clear() {
-    channels.clear();
-    attributes.clear();
-
-    data_window[0] = 0;
-    data_window[1] = 0;
-    data_window[2] = 0;
-    data_window[3] = 0;
-    line_order = 0;
-    display_window[0] = 0;
-    display_window[1] = 0;
-    display_window[2] = 0;
-    display_window[3] = 0;
-    screen_window_center[0] = 0.0f;
-    screen_window_center[1] = 0.0f;
-    screen_window_width = 0.0f;
-    pixel_aspect_ratio = 0.0f;
-
-    chunk_count = 0;
-
-    // Tiled format
-    tile_size_x = 0;
-    tile_size_y = 0;
-    tile_level_mode = 0;
-    tile_rounding_mode = 0;
-
-    header_len = 0;
-    compression_type = 0;
-  }
-} HeaderInfo;
-
-static bool ReadChannelInfo(std::vector<ChannelInfo> &channels,
-                            const std::vector<unsigned char> &data) {
-  const char *p = reinterpret_cast<const char *>(&data.at(0));
-
-  for (;;) {
-    if ((*p) == 0) {
-      break;
-    }
-    ChannelInfo info;
-
-    tinyexr_int64 data_len = static_cast<tinyexr_int64>(data.size()) -
-                             (p - reinterpret_cast<const char *>(data.data()));
-    if (data_len < 0) {
-      return false;
-    }
-
-    p = ReadString(&info.name, p, size_t(data_len));
-    if ((p == NULL) && (info.name.empty())) {
-      // Buffer overrun. Issue #51.
-      return false;
-    }
-
-    const unsigned char *data_end =
-        reinterpret_cast<const unsigned char *>(p) + 16;
-    if (data_end >= (data.data() + data.size())) {
-      return false;
-    }
-
-    memcpy(&info.pixel_type, p, sizeof(int));
-    p += 4;
-    info.p_linear = static_cast<unsigned char>(p[0]);  // uchar
-    p += 1 + 3;                                        // reserved: uchar[3]
-    memcpy(&info.x_sampling, p, sizeof(int));          // int
-    p += 4;
-    memcpy(&info.y_sampling, p, sizeof(int));  // int
-    p += 4;
-
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.pixel_type));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.x_sampling));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&info.y_sampling));
-
-    channels.push_back(info);
-  }
-
-  return true;
-}
-
-static void WriteChannelInfo(std::vector<unsigned char> &data,
-                             const std::vector<ChannelInfo> &channels) {
-  size_t sz = 0;
-
-  // Calculate total size.
-  for (size_t c = 0; c < channels.size(); c++) {
-    sz += strlen(channels[c].name.c_str()) + 1;  // +1 for \0
-    sz += 16;                                    // 4 * int
-  }
-  data.resize(sz + 1);
-
-  unsigned char *p = &data.at(0);
-
-  for (size_t c = 0; c < channels.size(); c++) {
-    memcpy(p, channels[c].name.c_str(), strlen(channels[c].name.c_str()));
-    p += strlen(channels[c].name.c_str());
-    (*p) = '\0';
-    p++;
-
-    int pixel_type = channels[c].pixel_type;
-    int x_sampling = channels[c].x_sampling;
-    int y_sampling = channels[c].y_sampling;
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&pixel_type));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&x_sampling));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&y_sampling));
-
-    memcpy(p, &pixel_type, sizeof(int));
-    p += sizeof(int);
-
-    (*p) = channels[c].p_linear;
-    p += 4;
-
-    memcpy(p, &x_sampling, sizeof(int));
-    p += sizeof(int);
-
-    memcpy(p, &y_sampling, sizeof(int));
-    p += sizeof(int);
-  }
-
-  (*p) = '\0';
-}
-
-static void CompressZip(unsigned char *dst,
-                        tinyexr::tinyexr_uint64 &compressedSize,
-                        const unsigned char *src, unsigned long src_size) {
-  std::vector<unsigned char> tmpBuf(src_size);
-
-  //
-  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
-  // ImfZipCompressor.cpp
-  //
-
-  //
-  // Reorder the pixel data.
-  //
-
-  const char *srcPtr = reinterpret_cast<const char *>(src);
-
-  {
-    char *t1 = reinterpret_cast<char *>(&tmpBuf.at(0));
-    char *t2 = reinterpret_cast<char *>(&tmpBuf.at(0)) + (src_size + 1) / 2;
-    const char *stop = srcPtr + src_size;
-
-    for (;;) {
-      if (srcPtr < stop)
-        *(t1++) = *(srcPtr++);
-      else
-        break;
-
-      if (srcPtr < stop)
-        *(t2++) = *(srcPtr++);
-      else
-        break;
-    }
-  }
-
-  //
-  // Predictor.
-  //
-
-  {
-    unsigned char *t = &tmpBuf.at(0) + 1;
-    unsigned char *stop = &tmpBuf.at(0) + src_size;
-    int p = t[-1];
-
-    while (t < stop) {
-      int d = int(t[0]) - p + (128 + 256);
-      p = t[0];
-      t[0] = static_cast<unsigned char>(d);
-      ++t;
-    }
-  }
-
-#if TINYEXR_USE_MINIZ
-  //
-  // Compress the data using miniz
-  //
-
-  miniz::mz_ulong outSize = miniz::mz_compressBound(src_size);
-  int ret = miniz::mz_compress(
-      dst, &outSize, static_cast<const unsigned char *>(&tmpBuf.at(0)),
-      src_size);
-  assert(ret == miniz::MZ_OK);
-  (void)ret;
-
-  compressedSize = outSize;
-#else
-  uLong outSize = compressBound(static_cast<uLong>(src_size));
-  int ret = compress(dst, &outSize, static_cast<const Bytef *>(&tmpBuf.at(0)),
-                     src_size);
-  assert(ret == Z_OK);
-
-  compressedSize = outSize;
-#endif
-
-  // Use uncompressed data when compressed data is larger than uncompressed.
-  // (Issue 40)
-  if (compressedSize >= src_size) {
-    compressedSize = src_size;
-    memcpy(dst, src, src_size);
-  }
-}
-
-static bool DecompressZip(unsigned char *dst,
-                          unsigned long *uncompressed_size /* inout */,
-                          const unsigned char *src, unsigned long src_size) {
-  if ((*uncompressed_size) == src_size) {
-    // Data is not compressed(Issue 40).
-    memcpy(dst, src, src_size);
-    return true;
-  }
-  std::vector<unsigned char> tmpBuf(*uncompressed_size);
-
-#if TINYEXR_USE_MINIZ
-  int ret =
-      miniz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
-  if (miniz::MZ_OK != ret) {
-    return false;
-  }
-#else
-  int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
-  if (Z_OK != ret) {
-    return false;
-  }
-#endif
-
-  //
-  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
-  // ImfZipCompressor.cpp
-  //
-
-  // Predictor.
-  {
-    unsigned char *t = &tmpBuf.at(0) + 1;
-    unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size);
-
-    while (t < stop) {
-      int d = int(t[-1]) + int(t[0]) - 128;
-      t[0] = static_cast<unsigned char>(d);
-      ++t;
-    }
-  }
-
-  // Reorder the pixel data.
-  {
-    const char *t1 = reinterpret_cast<const char *>(&tmpBuf.at(0));
-    const char *t2 = reinterpret_cast<const char *>(&tmpBuf.at(0)) +
-                     (*uncompressed_size + 1) / 2;
-    char *s = reinterpret_cast<char *>(dst);
-    char *stop = s + (*uncompressed_size);
-
-    for (;;) {
-      if (s < stop)
-        *(s++) = *(t1++);
-      else
-        break;
-
-      if (s < stop)
-        *(s++) = *(t2++);
-      else
-        break;
-    }
-  }
-
-  return true;
-}
-
-// RLE code from OpenEXR --------------------------------------
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wsign-conversion"
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4204)  // nonstandard extension used : non-constant
-                                 // aggregate initializer (also supported by GNU
-                                 // C and C99, so no big deal)
-#pragma warning(disable : 4244)  // 'initializing': conversion from '__int64' to
-                                 // 'int', possible loss of data
-#pragma warning(disable : 4267)  // 'argument': conversion from '__int64' to
-                                 // 'int', possible loss of data
-#pragma warning(disable : 4996)  // 'strdup': The POSIX name for this item is
-                                 // deprecated. Instead, use the ISO C and C++
-                                 // conformant name: _strdup.
-#endif
-
-const int MIN_RUN_LENGTH = 3;
-const int MAX_RUN_LENGTH = 127;
-
-//
-// Compress an array of bytes, using run-length encoding,
-// and return the length of the compressed data.
-//
-
-static int rleCompress(int inLength, const char in[], signed char out[]) {
-  const char *inEnd = in + inLength;
-  const char *runStart = in;
-  const char *runEnd = in + 1;
-  signed char *outWrite = out;
-
-  while (runStart < inEnd) {
-    while (runEnd < inEnd && *runStart == *runEnd &&
-           runEnd - runStart - 1 < MAX_RUN_LENGTH) {
-      ++runEnd;
-    }
-
-    if (runEnd - runStart >= MIN_RUN_LENGTH) {
-      //
-      // Compressable run
-      //
-
-      *outWrite++ = static_cast<char>(runEnd - runStart) - 1;
-      *outWrite++ = *(reinterpret_cast<const signed char *>(runStart));
-      runStart = runEnd;
-    } else {
-      //
-      // Uncompressable run
-      //
-
-      while (runEnd < inEnd &&
-             ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) ||
-              (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) &&
-             runEnd - runStart < MAX_RUN_LENGTH) {
-        ++runEnd;
-      }
-
-      *outWrite++ = static_cast<char>(runStart - runEnd);
-
-      while (runStart < runEnd) {
-        *outWrite++ = *(reinterpret_cast<const signed char *>(runStart++));
-      }
-    }
-
-    ++runEnd;
-  }
-
-  return static_cast<int>(outWrite - out);
-}
-
-//
-// Uncompress an array of bytes compressed with rleCompress().
-// Returns the length of the oncompressed data, or 0 if the
-// length of the uncompressed data would be more than maxLength.
-//
-
-static int rleUncompress(int inLength, int maxLength, const signed char in[],
-                         char out[]) {
-  char *outStart = out;
-
-  while (inLength > 0) {
-    if (*in < 0) {
-      int count = -(static_cast<int>(*in++));
-      inLength -= count + 1;
-
-      // Fixes #116: Add bounds check to in buffer.
-      if ((0 > (maxLength -= count)) || (inLength < 0)) return 0;
-
-      memcpy(out, in, count);
-      out += count;
-      in += count;
-    } else {
-      int count = *in++;
-      inLength -= 2;
-
-      if (0 > (maxLength -= count + 1)) return 0;
-
-      memset(out, *reinterpret_cast<const char *>(in), count + 1);
-      out += count + 1;
-
-      in++;
-    }
-  }
-
-  return static_cast<int>(out - outStart);
-}
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-// End of RLE code from OpenEXR -----------------------------------
-
-static void CompressRle(unsigned char *dst,
-                        tinyexr::tinyexr_uint64 &compressedSize,
-                        const unsigned char *src, unsigned long src_size) {
-  std::vector<unsigned char> tmpBuf(src_size);
-
-  //
-  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
-  // ImfRleCompressor.cpp
-  //
-
-  //
-  // Reorder the pixel data.
-  //
-
-  const char *srcPtr = reinterpret_cast<const char *>(src);
-
-  {
-    char *t1 = reinterpret_cast<char *>(&tmpBuf.at(0));
-    char *t2 = reinterpret_cast<char *>(&tmpBuf.at(0)) + (src_size + 1) / 2;
-    const char *stop = srcPtr + src_size;
-
-    for (;;) {
-      if (srcPtr < stop)
-        *(t1++) = *(srcPtr++);
-      else
-        break;
-
-      if (srcPtr < stop)
-        *(t2++) = *(srcPtr++);
-      else
-        break;
-    }
-  }
-
-  //
-  // Predictor.
-  //
-
-  {
-    unsigned char *t = &tmpBuf.at(0) + 1;
-    unsigned char *stop = &tmpBuf.at(0) + src_size;
-    int p = t[-1];
-
-    while (t < stop) {
-      int d = int(t[0]) - p + (128 + 256);
-      p = t[0];
-      t[0] = static_cast<unsigned char>(d);
-      ++t;
-    }
-  }
-
-  // outSize will be (srcSiz * 3) / 2 at max.
-  int outSize = rleCompress(static_cast<int>(src_size),
-                            reinterpret_cast<const char *>(&tmpBuf.at(0)),
-                            reinterpret_cast<signed char *>(dst));
-  assert(outSize > 0);
-
-  compressedSize = static_cast<tinyexr::tinyexr_uint64>(outSize);
-
-  // Use uncompressed data when compressed data is larger than uncompressed.
-  // (Issue 40)
-  if (compressedSize >= src_size) {
-    compressedSize = src_size;
-    memcpy(dst, src, src_size);
-  }
-}
-
-static bool DecompressRle(unsigned char *dst,
-                          const unsigned long uncompressed_size,
-                          const unsigned char *src, unsigned long src_size) {
-  if (uncompressed_size == src_size) {
-    // Data is not compressed(Issue 40).
-    memcpy(dst, src, src_size);
-    return true;
-  }
-
-  // Workaround for issue #112.
-  // TODO(syoyo): Add more robust out-of-bounds check in `rleUncompress`.
-  if (src_size <= 2) {
-    return false;
-  }
-
-  std::vector<unsigned char> tmpBuf(uncompressed_size);
-
-  int ret = rleUncompress(static_cast<int>(src_size),
-                          static_cast<int>(uncompressed_size),
-                          reinterpret_cast<const signed char *>(src),
-                          reinterpret_cast<char *>(&tmpBuf.at(0)));
-  if (ret != static_cast<int>(uncompressed_size)) {
-    return false;
-  }
-
-  //
-  // Apply EXR-specific? postprocess. Grabbed from OpenEXR's
-  // ImfRleCompressor.cpp
-  //
-
-  // Predictor.
-  {
-    unsigned char *t = &tmpBuf.at(0) + 1;
-    unsigned char *stop = &tmpBuf.at(0) + uncompressed_size;
-
-    while (t < stop) {
-      int d = int(t[-1]) + int(t[0]) - 128;
-      t[0] = static_cast<unsigned char>(d);
-      ++t;
-    }
-  }
-
-  // Reorder the pixel data.
-  {
-    const char *t1 = reinterpret_cast<const char *>(&tmpBuf.at(0));
-    const char *t2 = reinterpret_cast<const char *>(&tmpBuf.at(0)) +
-                     (uncompressed_size + 1) / 2;
-    char *s = reinterpret_cast<char *>(dst);
-    char *stop = s + uncompressed_size;
-
-    for (;;) {
-      if (s < stop)
-        *(s++) = *(t1++);
-      else
-        break;
-
-      if (s < stop)
-        *(s++) = *(t2++);
-      else
-        break;
-    }
-  }
-
-  return true;
-}
-
-#if TINYEXR_USE_PIZ
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wc++11-long-long"
-#pragma clang diagnostic ignored "-Wold-style-cast"
-#pragma clang diagnostic ignored "-Wpadded"
-#pragma clang diagnostic ignored "-Wsign-conversion"
-#pragma clang diagnostic ignored "-Wc++11-extensions"
-#pragma clang diagnostic ignored "-Wconversion"
-#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
-
-#if __has_warning("-Wcast-qual")
-#pragma clang diagnostic ignored "-Wcast-qual"
-#endif
-
-#endif
-
-//
-// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp
-//
-// -----------------------------------------------------------------
-// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas
-// Digital Ltd. LLC)
-// (3 clause BSD license)
-//
-
-struct PIZChannelData {
-  unsigned short *start;
-  unsigned short *end;
-  int nx;
-  int ny;
-  int ys;
-  int size;
-};
-
-//-----------------------------------------------------------------------------
-//
-//  16-bit Haar Wavelet encoding and decoding
-//
-//  The source code in this file is derived from the encoding
-//  and decoding routines written by Christian Rouet for his
-//  PIZ image file format.
-//
-//-----------------------------------------------------------------------------
-
-//
-// Wavelet basis functions without modulo arithmetic; they produce
-// the best compression ratios when the wavelet-transformed data are
-// Huffman-encoded, but the wavelet transform works only for 14-bit
-// data (untransformed data values must be less than (1 << 14)).
-//
-
-inline void wenc14(unsigned short a, unsigned short b, unsigned short &l,
-                   unsigned short &h) {
-  short as = static_cast<short>(a);
-  short bs = static_cast<short>(b);
-
-  short ms = (as + bs) >> 1;
-  short ds = as - bs;
-
-  l = static_cast<unsigned short>(ms);
-  h = static_cast<unsigned short>(ds);
-}
-
-inline void wdec14(unsigned short l, unsigned short h, unsigned short &a,
-                   unsigned short &b) {
-  short ls = static_cast<short>(l);
-  short hs = static_cast<short>(h);
-
-  int hi = hs;
-  int ai = ls + (hi & 1) + (hi >> 1);
-
-  short as = static_cast<short>(ai);
-  short bs = static_cast<short>(ai - hi);
-
-  a = static_cast<unsigned short>(as);
-  b = static_cast<unsigned short>(bs);
-}
-
-//
-// Wavelet basis functions with modulo arithmetic; they work with full
-// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't
-// compress the data quite as well.
-//
-
-const int NBITS = 16;
-const int A_OFFSET = 1 << (NBITS - 1);
-const int M_OFFSET = 1 << (NBITS - 1);
-const int MOD_MASK = (1 << NBITS) - 1;
-
-inline void wenc16(unsigned short a, unsigned short b, unsigned short &l,
-                   unsigned short &h) {
-  int ao = (a + A_OFFSET) & MOD_MASK;
-  int m = ((ao + b) >> 1);
-  int d = ao - b;
-
-  if (d < 0) m = (m + M_OFFSET) & MOD_MASK;
-
-  d &= MOD_MASK;
-
-  l = static_cast<unsigned short>(m);
-  h = static_cast<unsigned short>(d);
-}
-
-inline void wdec16(unsigned short l, unsigned short h, unsigned short &a,
-                   unsigned short &b) {
-  int m = l;
-  int d = h;
-  int bb = (m - (d >> 1)) & MOD_MASK;
-  int aa = (d + bb - A_OFFSET) & MOD_MASK;
-  b = static_cast<unsigned short>(bb);
-  a = static_cast<unsigned short>(aa);
-}
-
-//
-// 2D Wavelet encoding:
-//
-
-static void wav2Encode(
-    unsigned short *in,  // io: values are transformed in place
-    int nx,              // i : x size
-    int ox,              // i : x offset
-    int ny,              // i : y size
-    int oy,              // i : y offset
-    unsigned short mx)   // i : maximum in[x][y] value
-{
-  bool w14 = (mx < (1 << 14));
-  int n = (nx > ny) ? ny : nx;
-  int p = 1;   // == 1 <<  level
-  int p2 = 2;  // == 1 << (level+1)
-
-  //
-  // Hierachical loop on smaller dimension n
-  //
-
-  while (p2 <= n) {
-    unsigned short *py = in;
-    unsigned short *ey = in + oy * (ny - p2);
-    int oy1 = oy * p;
-    int oy2 = oy * p2;
-    int ox1 = ox * p;
-    int ox2 = ox * p2;
-    unsigned short i00, i01, i10, i11;
-
-    //
-    // Y loop
-    //
-
-    for (; py <= ey; py += oy2) {
-      unsigned short *px = py;
-      unsigned short *ex = py + ox * (nx - p2);
-
-      //
-      // X loop
-      //
-
-      for (; px <= ex; px += ox2) {
-        unsigned short *p01 = px + ox1;
-        unsigned short *p10 = px + oy1;
-        unsigned short *p11 = p10 + ox1;
-
-        //
-        // 2D wavelet encoding
-        //
-
-        if (w14) {
-          wenc14(*px, *p01, i00, i01);
-          wenc14(*p10, *p11, i10, i11);
-          wenc14(i00, i10, *px, *p10);
-          wenc14(i01, i11, *p01, *p11);
-        } else {
-          wenc16(*px, *p01, i00, i01);
-          wenc16(*p10, *p11, i10, i11);
-          wenc16(i00, i10, *px, *p10);
-          wenc16(i01, i11, *p01, *p11);
-        }
-      }
-
-      //
-      // Encode (1D) odd column (still in Y loop)
-      //
-
-      if (nx & p) {
-        unsigned short *p10 = px + oy1;
-
-        if (w14)
-          wenc14(*px, *p10, i00, *p10);
-        else
-          wenc16(*px, *p10, i00, *p10);
-
-        *px = i00;
-      }
-    }
-
-    //
-    // Encode (1D) odd line (must loop in X)
-    //
-
-    if (ny & p) {
-      unsigned short *px = py;
-      unsigned short *ex = py + ox * (nx - p2);
-
-      for (; px <= ex; px += ox2) {
-        unsigned short *p01 = px + ox1;
-
-        if (w14)
-          wenc14(*px, *p01, i00, *p01);
-        else
-          wenc16(*px, *p01, i00, *p01);
-
-        *px = i00;
-      }
-    }
-
-    //
-    // Next level
-    //
-
-    p = p2;
-    p2 <<= 1;
-  }
-}
-
-//
-// 2D Wavelet decoding:
-//
-
-static void wav2Decode(
-    unsigned short *in,  // io: values are transformed in place
-    int nx,              // i : x size
-    int ox,              // i : x offset
-    int ny,              // i : y size
-    int oy,              // i : y offset
-    unsigned short mx)   // i : maximum in[x][y] value
-{
-  bool w14 = (mx < (1 << 14));
-  int n = (nx > ny) ? ny : nx;
-  int p = 1;
-  int p2;
-
-  //
-  // Search max level
-  //
-
-  while (p <= n) p <<= 1;
-
-  p >>= 1;
-  p2 = p;
-  p >>= 1;
-
-  //
-  // Hierarchical loop on smaller dimension n
-  //
-
-  while (p >= 1) {
-    unsigned short *py = in;
-    unsigned short *ey = in + oy * (ny - p2);
-    int oy1 = oy * p;
-    int oy2 = oy * p2;
-    int ox1 = ox * p;
-    int ox2 = ox * p2;
-    unsigned short i00, i01, i10, i11;
-
-    //
-    // Y loop
-    //
-
-    for (; py <= ey; py += oy2) {
-      unsigned short *px = py;
-      unsigned short *ex = py + ox * (nx - p2);
-
-      //
-      // X loop
-      //
-
-      for (; px <= ex; px += ox2) {
-        unsigned short *p01 = px + ox1;
-        unsigned short *p10 = px + oy1;
-        unsigned short *p11 = p10 + ox1;
-
-        //
-        // 2D wavelet decoding
-        //
-
-        if (w14) {
-          wdec14(*px, *p10, i00, i10);
-          wdec14(*p01, *p11, i01, i11);
-          wdec14(i00, i01, *px, *p01);
-          wdec14(i10, i11, *p10, *p11);
-        } else {
-          wdec16(*px, *p10, i00, i10);
-          wdec16(*p01, *p11, i01, i11);
-          wdec16(i00, i01, *px, *p01);
-          wdec16(i10, i11, *p10, *p11);
-        }
-      }
-
-      //
-      // Decode (1D) odd column (still in Y loop)
-      //
-
-      if (nx & p) {
-        unsigned short *p10 = px + oy1;
-
-        if (w14)
-          wdec14(*px, *p10, i00, *p10);
-        else
-          wdec16(*px, *p10, i00, *p10);
-
-        *px = i00;
-      }
-    }
-
-    //
-    // Decode (1D) odd line (must loop in X)
-    //
-
-    if (ny & p) {
-      unsigned short *px = py;
-      unsigned short *ex = py + ox * (nx - p2);
-
-      for (; px <= ex; px += ox2) {
-        unsigned short *p01 = px + ox1;
-
-        if (w14)
-          wdec14(*px, *p01, i00, *p01);
-        else
-          wdec16(*px, *p01, i00, *p01);
-
-        *px = i00;
-      }
-    }
-
-    //
-    // Next level
-    //
-
-    p2 = p;
-    p >>= 1;
-  }
-}
-
-//-----------------------------------------------------------------------------
-//
-//  16-bit Huffman compression and decompression.
-//
-//  The source code in this file is derived from the 8-bit
-//  Huffman compression and decompression routines written
-//  by Christian Rouet for his PIZ image file format.
-//
-//-----------------------------------------------------------------------------
-
-// Adds some modification for tinyexr.
-
-const int HUF_ENCBITS = 16;  // literal (value) bit length
-const int HUF_DECBITS = 14;  // decoding bit size (>= 8)
-
-const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1;  // encoding table size
-const int HUF_DECSIZE = 1 << HUF_DECBITS;        // decoding table size
-const int HUF_DECMASK = HUF_DECSIZE - 1;
-
-struct HufDec {  // short code    long code
-  //-------------------------------
-  int len : 8;   // code length    0
-  int lit : 24;  // lit      p size
-  int *p;        // 0      lits
-};
-
-inline long long hufLength(long long code) { return code & 63; }
-
-inline long long hufCode(long long code) { return code >> 6; }
-
-inline void outputBits(int nBits, long long bits, long long &c, int &lc,
-                       char *&out) {
-  c <<= nBits;
-  lc += nBits;
-
-  c |= bits;
-
-  while (lc >= 8) *out++ = static_cast<char>((c >> (lc -= 8)));
-}
-
-inline long long getBits(int nBits, long long &c, int &lc, const char *&in) {
-  while (lc < nBits) {
-    c = (c << 8) | *(reinterpret_cast<const unsigned char *>(in++));
-    lc += 8;
-  }
-
-  lc -= nBits;
-  return (c >> lc) & ((1 << nBits) - 1);
-}
-
-//
-// ENCODING TABLE BUILDING & (UN)PACKING
-//
-
-//
-// Build a "canonical" Huffman code table:
-//  - for each (uncompressed) symbol, hcode contains the length
-//    of the corresponding code (in the compressed data)
-//  - canonical codes are computed and stored in hcode
-//  - the rules for constructing canonical codes are as follows:
-//    * shorter codes (if filled with zeroes to the right)
-//      have a numerically higher value than longer codes
-//    * for codes with the same length, numerical values
-//      increase with numerical symbol values
-//  - because the canonical code table can be constructed from
-//    symbol lengths alone, the code table can be transmitted
-//    without sending the actual code values
-//  - see http://www.compressconsult.com/huffman/
-//
-
-static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) {
-  long long n[59];
-
-  //
-  // For each i from 0 through 58, count the
-  // number of different codes of length i, and
-  // store the count in n[i].
-  //
-
-  for (int i = 0; i <= 58; ++i) n[i] = 0;
-
-  for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1;
-
-  //
-  // For each i from 58 through 1, compute the
-  // numerically lowest code with length i, and
-  // store that code in n[i].
-  //
-
-  long long c = 0;
-
-  for (int i = 58; i > 0; --i) {
-    long long nc = ((c + n[i]) >> 1);
-    n[i] = c;
-    c = nc;
-  }
-
-  //
-  // hcode[i] contains the length, l, of the
-  // code for symbol i.  Assign the next available
-  // code of length l to the symbol and store both
-  // l and the code in hcode[i].
-  //
-
-  for (int i = 0; i < HUF_ENCSIZE; ++i) {
-    int l = static_cast<int>(hcode[i]);
-
-    if (l > 0) hcode[i] = l | (n[l]++ << 6);
-  }
-}
-
-//
-// Compute Huffman codes (based on frq input) and store them in frq:
-//  - code structure is : [63:lsb - 6:msb] | [5-0: bit length];
-//  - max code length is 58 bits;
-//  - codes outside the range [im-iM] have a null length (unused values);
-//  - original frequencies are destroyed;
-//  - encoding tables are used by hufEncode() and hufBuildDecTable();
-//
-
-struct FHeapCompare {
-  bool operator()(long long *a, long long *b) { return *a > *b; }
-};
-
-static void hufBuildEncTable(
-    long long *frq,  // io: input frequencies [HUF_ENCSIZE], output table
-    int *im,         //  o: min frq index
-    int *iM)         //  o: max frq index
-{
-  //
-  // This function assumes that when it is called, array frq
-  // indicates the frequency of all possible symbols in the data
-  // that are to be Huffman-encoded.  (frq[i] contains the number
-  // of occurrences of symbol i in the data.)
-  //
-  // The loop below does three things:
-  //
-  // 1) Finds the minimum and maximum indices that point
-  //    to non-zero entries in frq:
-  //
-  //     frq[im] != 0, and frq[i] == 0 for all i < im
-  //     frq[iM] != 0, and frq[i] == 0 for all i > iM
-  //
-  // 2) Fills array fHeap with pointers to all non-zero
-  //    entries in frq.
-  //
-  // 3) Initializes array hlink such that hlink[i] == i
-  //    for all array entries.
-  //
-
-  std::vector<int> hlink(HUF_ENCSIZE);
-  std::vector<long long *> fHeap(HUF_ENCSIZE);
-
-  *im = 0;
-
-  while (!frq[*im]) (*im)++;
-
-  int nf = 0;
-
-  for (int i = *im; i < HUF_ENCSIZE; i++) {
-    hlink[i] = i;
-
-    if (frq[i]) {
-      fHeap[nf] = &frq[i];
-      nf++;
-      *iM = i;
-    }
-  }
-
-  //
-  // Add a pseudo-symbol, with a frequency count of 1, to frq;
-  // adjust the fHeap and hlink array accordingly.  Function
-  // hufEncode() uses the pseudo-symbol for run-length encoding.
-  //
-
-  (*iM)++;
-  frq[*iM] = 1;
-  fHeap[nf] = &frq[*iM];
-  nf++;
-
-  //
-  // Build an array, scode, such that scode[i] contains the number
-  // of bits assigned to symbol i.  Conceptually this is done by
-  // constructing a tree whose leaves are the symbols with non-zero
-  // frequency:
-  //
-  //     Make a heap that contains all symbols with a non-zero frequency,
-  //     with the least frequent symbol on top.
-  //
-  //     Repeat until only one symbol is left on the heap:
-  //
-  //         Take the two least frequent symbols off the top of the heap.
-  //         Create a new node that has first two nodes as children, and
-  //         whose frequency is the sum of the frequencies of the first
-  //         two nodes.  Put the new node back into the heap.
-  //
-  // The last node left on the heap is the root of the tree.  For each
-  // leaf node, the distance between the root and the leaf is the length
-  // of the code for the corresponding symbol.
-  //
-  // The loop below doesn't actually build the tree; instead we compute
-  // the distances of the leaves from the root on the fly.  When a new
-  // node is added to the heap, then that node's descendants are linked
-  // into a single linear list that starts at the new node, and the code
-  // lengths of the descendants (that is, their distance from the root
-  // of the tree) are incremented by one.
-  //
-
-  std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
-
-  std::vector<long long> scode(HUF_ENCSIZE);
-  memset(scode.data(), 0, sizeof(long long) * HUF_ENCSIZE);
-
-  while (nf > 1) {
-    //
-    // Find the indices, mm and m, of the two smallest non-zero frq
-    // values in fHeap, add the smallest frq to the second-smallest
-    // frq, and remove the smallest frq value from fHeap.
-    //
-
-    int mm = fHeap[0] - frq;
-    std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
-    --nf;
-
-    int m = fHeap[0] - frq;
-    std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
-
-    frq[m] += frq[mm];
-    std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare());
-
-    //
-    // The entries in scode are linked into lists with the
-    // entries in hlink serving as "next" pointers and with
-    // the end of a list marked by hlink[j] == j.
-    //
-    // Traverse the lists that start at scode[m] and scode[mm].
-    // For each element visited, increment the length of the
-    // corresponding code by one bit. (If we visit scode[j]
-    // during the traversal, then the code for symbol j becomes
-    // one bit longer.)
-    //
-    // Merge the lists that start at scode[m] and scode[mm]
-    // into a single list that starts at scode[m].
-    //
-
-    //
-    // Add a bit to all codes in the first list.
-    //
-
-    for (int j = m;; j = hlink[j]) {
-      scode[j]++;
-
-      assert(scode[j] <= 58);
-
-      if (hlink[j] == j) {
-        //
-        // Merge the two lists.
-        //
-
-        hlink[j] = mm;
-        break;
-      }
-    }
-
-    //
-    // Add a bit to all codes in the second list
-    //
-
-    for (int j = mm;; j = hlink[j]) {
-      scode[j]++;
-
-      assert(scode[j] <= 58);
-
-      if (hlink[j] == j) break;
-    }
-  }
-
-  //
-  // Build a canonical Huffman code table, replacing the code
-  // lengths in scode with (code, code length) pairs.  Copy the
-  // code table from scode into frq.
-  //
-
-  hufCanonicalCodeTable(scode.data());
-  memcpy(frq, scode.data(), sizeof(long long) * HUF_ENCSIZE);
-}
-
-//
-// Pack an encoding table:
-//  - only code lengths, not actual codes, are stored
-//  - runs of zeroes are compressed as follows:
-//
-//    unpacked    packed
-//    --------------------------------
-//    1 zero    0  (6 bits)
-//    2 zeroes    59
-//    3 zeroes    60
-//    4 zeroes    61
-//    5 zeroes    62
-//    n zeroes (6 or more)  63 n-6  (6 + 8 bits)
-//
-
-const int SHORT_ZEROCODE_RUN = 59;
-const int LONG_ZEROCODE_RUN = 63;
-const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
-const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN;
-
-static void hufPackEncTable(
-    const long long *hcode,  // i : encoding table [HUF_ENCSIZE]
-    int im,                  // i : min hcode index
-    int iM,                  // i : max hcode index
-    char **pcode)            //  o: ptr to packed table (updated)
-{
-  char *p = *pcode;
-  long long c = 0;
-  int lc = 0;
-
-  for (; im <= iM; im++) {
-    int l = hufLength(hcode[im]);
-
-    if (l == 0) {
-      int zerun = 1;
-
-      while ((im < iM) && (zerun < LONGEST_LONG_RUN)) {
-        if (hufLength(hcode[im + 1]) > 0) break;
-        im++;
-        zerun++;
-      }
-
-      if (zerun >= 2) {
-        if (zerun >= SHORTEST_LONG_RUN) {
-          outputBits(6, LONG_ZEROCODE_RUN, c, lc, p);
-          outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p);
-        } else {
-          outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p);
-        }
-        continue;
-      }
-    }
-
-    outputBits(6, l, c, lc, p);
-  }
-
-  if (lc > 0) *p++ = (unsigned char)(c << (8 - lc));
-
-  *pcode = p;
-}
-
-//
-// Unpack an encoding table packed by hufPackEncTable():
-//
-
-static bool hufUnpackEncTable(
-    const char **pcode,  // io: ptr to packed table (updated)
-    int ni,              // i : input size (in bytes)
-    int im,              // i : min hcode index
-    int iM,              // i : max hcode index
-    long long *hcode)    //  o: encoding table [HUF_ENCSIZE]
-{
-  memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE);
-
-  const char *p = *pcode;
-  long long c = 0;
-  int lc = 0;
-
-  for (; im <= iM; im++) {
-    if (p - *pcode >= ni) {
-      return false;
-    }
-
-    long long l = hcode[im] = getBits(6, c, lc, p);  // code length
-
-    if (l == (long long)LONG_ZEROCODE_RUN) {
-      if (p - *pcode > ni) {
-        return false;
-      }
-
-      int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN;
-
-      if (im + zerun > iM + 1) {
-        return false;
-      }
-
-      while (zerun--) hcode[im++] = 0;
-
-      im--;
-    } else if (l >= (long long)SHORT_ZEROCODE_RUN) {
-      int zerun = l - SHORT_ZEROCODE_RUN + 2;
-
-      if (im + zerun > iM + 1) {
-        return false;
-      }
-
-      while (zerun--) hcode[im++] = 0;
-
-      im--;
-    }
-  }
-
-  *pcode = const_cast<char *>(p);
-
-  hufCanonicalCodeTable(hcode);
-
-  return true;
-}
-
-//
-// DECODING TABLE BUILDING
-//
-
-//
-// Clear a newly allocated decoding table so that it contains only zeroes.
-//
-
-static void hufClearDecTable(HufDec *hdecod)  // io: (allocated by caller)
-//     decoding table [HUF_DECSIZE]
-{
-  for (int i = 0; i < HUF_DECSIZE; i++) {
-    hdecod[i].len = 0;
-    hdecod[i].lit = 0;
-    hdecod[i].p = NULL;
-  }
-  // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE);
-}
-
-//
-// Build a decoding hash table based on the encoding table hcode:
-//  - short codes (<= HUF_DECBITS) are resolved with a single table access;
-//  - long code entry allocations are not optimized, because long codes are
-//    unfrequent;
-//  - decoding tables are used by hufDecode();
-//
-
-static bool hufBuildDecTable(const long long *hcode,  // i : encoding table
-                             int im,                  // i : min index in hcode
-                             int iM,                  // i : max index in hcode
-                             HufDec *hdecod)  //  o: (allocated by caller)
-//     decoding table [HUF_DECSIZE]
-{
-  //
-  // Init hashtable & loop on all codes.
-  // Assumes that hufClearDecTable(hdecod) has already been called.
-  //
-
-  for (; im <= iM; im++) {
-    long long c = hufCode(hcode[im]);
-    int l = hufLength(hcode[im]);
-
-    if (c >> l) {
-      //
-      // Error: c is supposed to be an l-bit code,
-      // but c contains a value that is greater
-      // than the largest l-bit number.
-      //
-
-      // invalidTableEntry();
-      return false;
-    }
-
-    if (l > HUF_DECBITS) {
-      //
-      // Long code: add a secondary entry
-      //
-
-      HufDec *pl = hdecod + (c >> (l - HUF_DECBITS));
-
-      if (pl->len) {
-        //
-        // Error: a short code has already
-        // been stored in table entry *pl.
-        //
-
-        // invalidTableEntry();
-        return false;
-      }
-
-      pl->lit++;
-
-      if (pl->p) {
-        int *p = pl->p;
-        pl->p = new int[pl->lit];
-
-        for (int i = 0; i < pl->lit - 1; ++i) pl->p[i] = p[i];
-
-        delete[] p;
-      } else {
-        pl->p = new int[1];
-      }
-
-      pl->p[pl->lit - 1] = im;
-    } else if (l) {
-      //
-      // Short code: init all primary entries
-      //
-
-      HufDec *pl = hdecod + (c << (HUF_DECBITS - l));
-
-      for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) {
-        if (pl->len || pl->p) {
-          //
-          // Error: a short code or a long code has
-          // already been stored in table entry *pl.
-          //
-
-          // invalidTableEntry();
-          return false;
-        }
-
-        pl->len = l;
-        pl->lit = im;
-      }
-    }
-  }
-
-  return true;
-}
-
-//
-// Free the long code entries of a decoding table built by hufBuildDecTable()
-//
-
-static void hufFreeDecTable(HufDec *hdecod)  // io: Decoding table
-{
-  for (int i = 0; i < HUF_DECSIZE; i++) {
-    if (hdecod[i].p) {
-      delete[] hdecod[i].p;
-      hdecod[i].p = 0;
-    }
-  }
-}
-
-//
-// ENCODING
-//
-
-inline void outputCode(long long code, long long &c, int &lc, char *&out) {
-  outputBits(hufLength(code), hufCode(code), c, lc, out);
-}
-
-inline void sendCode(long long sCode, int runCount, long long runCode,
-                     long long &c, int &lc, char *&out) {
-  //
-  // Output a run of runCount instances of the symbol sCount.
-  // Output the symbols explicitly, or if that is shorter, output
-  // the sCode symbol once followed by a runCode symbol and runCount
-  // expressed as an 8-bit number.
-  //
-
-  if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) {
-    outputCode(sCode, c, lc, out);
-    outputCode(runCode, c, lc, out);
-    outputBits(8, runCount, c, lc, out);
-  } else {
-    while (runCount-- >= 0) outputCode(sCode, c, lc, out);
-  }
-}
-
-//
-// Encode (compress) ni values based on the Huffman encoding table hcode:
-//
-
-static int hufEncode            // return: output size (in bits)
-    (const long long *hcode,    // i : encoding table
-     const unsigned short *in,  // i : uncompressed input buffer
-     const int ni,              // i : input buffer size (in bytes)
-     int rlc,                   // i : rl code
-     char *out)                 //  o: compressed output buffer
-{
-  char *outStart = out;
-  long long c = 0;  // bits not yet written to out
-  int lc = 0;       // number of valid bits in c (LSB)
-  int s = in[0];
-  int cs = 0;
-
-  //
-  // Loop on input values
-  //
-
-  for (int i = 1; i < ni; i++) {
-    //
-    // Count same values or send code
-    //
-
-    if (s == in[i] && cs < 255) {
-      cs++;
-    } else {
-      sendCode(hcode[s], cs, hcode[rlc], c, lc, out);
-      cs = 0;
-    }
-
-    s = in[i];
-  }
-
-  //
-  // Send remaining code
-  //
-
-  sendCode(hcode[s], cs, hcode[rlc], c, lc, out);
-
-  if (lc) *out = (c << (8 - lc)) & 0xff;
-
-  return (out - outStart) * 8 + lc;
-}
-
-//
-// DECODING
-//
-
-//
-// In order to force the compiler to inline them,
-// getChar() and getCode() are implemented as macros
-// instead of "inline" functions.
-//
-
-#define getChar(c, lc, in)                   \
-  {                                          \
-    c = (c << 8) | *(unsigned char *)(in++); \
-    lc += 8;                                 \
-  }
-
-#if 0
-#define getCode(po, rlc, c, lc, in, out, ob, oe) \
-  {                                              \
-    if (po == rlc) {                             \
-      if (lc < 8) getChar(c, lc, in);            \
-                                                 \
-      lc -= 8;                                   \
-                                                 \
-      unsigned char cs = (c >> lc);              \
-                                                 \
-      if (out + cs > oe) return false;           \
-                                                 \
-      /* TinyEXR issue 78 */                     \
-      unsigned short s = out[-1];                \
-                                                 \
-      while (cs-- > 0) *out++ = s;               \
-    } else if (out < oe) {                       \
-      *out++ = po;                               \
-    } else {                                     \
-      return false;                              \
-    }                                            \
-  }
-#else
-static bool getCode(int po, int rlc, long long &c, int &lc, const char *&in,
-                    const char *in_end, unsigned short *&out,
-                    const unsigned short *ob, const unsigned short *oe) {
-  (void)ob;
-  if (po == rlc) {
-    if (lc < 8) {
-      /* TinyEXR issue 78 */
-      if ((in + 1) >= in_end) {
-        return false;
-      }
-
-      getChar(c, lc, in);
-    }
-
-    lc -= 8;
-
-    unsigned char cs = (c >> lc);
-
-    if (out + cs > oe) return false;
-
-    // Bounds check for safety
-    // Issue 100.
-    if ((out - 1) < ob) return false;
-    unsigned short s = out[-1];
-
-    while (cs-- > 0) *out++ = s;
-  } else if (out < oe) {
-    *out++ = po;
-  } else {
-    return false;
-  }
-  return true;
-}
-#endif
-
-//
-// Decode (uncompress) ni bits based on encoding & decoding tables:
-//
-
-static bool hufDecode(const long long *hcode,  // i : encoding table
-                      const HufDec *hdecod,    // i : decoding table
-                      const char *in,          // i : compressed input buffer
-                      int ni,                  // i : input size (in bits)
-                      int rlc,                 // i : run-length code
-                      int no,  // i : expected output size (in bytes)
-                      unsigned short *out)  //  o: uncompressed output buffer
-{
-  long long c = 0;
-  int lc = 0;
-  unsigned short *outb = out;          // begin
-  unsigned short *oe = out + no;       // end
-  const char *ie = in + (ni + 7) / 8;  // input byte size
-
-  //
-  // Loop on input bytes
-  //
-
-  while (in < ie) {
-    getChar(c, lc, in);
-
-    //
-    // Access decoding table
-    //
-
-    while (lc >= HUF_DECBITS) {
-      const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK];
-
-      if (pl.len) {
-        //
-        // Get short code
-        //
-
-        lc -= pl.len;
-        // std::cout << "lit = " << pl.lit << std::endl;
-        // std::cout << "rlc = " << rlc << std::endl;
-        // std::cout << "c = " << c << std::endl;
-        // std::cout << "lc = " << lc << std::endl;
-        // std::cout << "in = " << in << std::endl;
-        // std::cout << "out = " << out << std::endl;
-        // std::cout << "oe = " << oe << std::endl;
-        if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) {
-          return false;
-        }
-      } else {
-        if (!pl.p) {
-          return false;
-        }
-        // invalidCode(); // wrong code
-
-        //
-        // Search long code
-        //
-
-        int j;
-
-        for (j = 0; j < pl.lit; j++) {
-          int l = hufLength(hcode[pl.p[j]]);
-
-          while (lc < l && in < ie)  // get more bits
-            getChar(c, lc, in);
-
-          if (lc >= l) {
-            if (hufCode(hcode[pl.p[j]]) ==
-                ((c >> (lc - l)) & (((long long)(1) << l) - 1))) {
-              //
-              // Found : get long code
-              //
-
-              lc -= l;
-              if (!getCode(pl.p[j], rlc, c, lc, in, ie, out, outb, oe)) {
-                return false;
-              }
-              break;
-            }
-          }
-        }
-
-        if (j == pl.lit) {
-          return false;
-          // invalidCode(); // Not found
-        }
-      }
-    }
-  }
-
-  //
-  // Get remaining (short) codes
-  //
-
-  int i = (8 - ni) & 7;
-  c >>= i;
-  lc -= i;
-
-  while (lc > 0) {
-    const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK];
-
-    if (pl.len) {
-      lc -= pl.len;
-      if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) {
-        return false;
-      }
-    } else {
-      return false;
-      // invalidCode(); // wrong (long) code
-    }
-  }
-
-  if (out - outb != no) {
-    return false;
-  }
-  // notEnoughData ();
-
-  return true;
-}
-
-static void countFrequencies(std::vector<long long> &freq,
-                             const unsigned short data[/*n*/], int n) {
-  for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0;
-
-  for (int i = 0; i < n; ++i) ++freq[data[i]];
-}
-
-static void writeUInt(char buf[4], unsigned int i) {
-  unsigned char *b = (unsigned char *)buf;
-
-  b[0] = i;
-  b[1] = i >> 8;
-  b[2] = i >> 16;
-  b[3] = i >> 24;
-}
-
-static unsigned int readUInt(const char buf[4]) {
-  const unsigned char *b = (const unsigned char *)buf;
-
-  return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) |
-         ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000);
-}
-
-//
-// EXTERNAL INTERFACE
-//
-
-static int hufCompress(const unsigned short raw[], int nRaw,
-                       char compressed[]) {
-  if (nRaw == 0) return 0;
-
-  std::vector<long long> freq(HUF_ENCSIZE);
-
-  countFrequencies(freq, raw, nRaw);
-
-  int im = 0;
-  int iM = 0;
-  hufBuildEncTable(freq.data(), &im, &iM);
-
-  char *tableStart = compressed + 20;
-  char *tableEnd = tableStart;
-  hufPackEncTable(freq.data(), im, iM, &tableEnd);
-  int tableLength = tableEnd - tableStart;
-
-  char *dataStart = tableEnd;
-  int nBits = hufEncode(freq.data(), raw, nRaw, iM, dataStart);
-  int data_length = (nBits + 7) / 8;
-
-  writeUInt(compressed, im);
-  writeUInt(compressed + 4, iM);
-  writeUInt(compressed + 8, tableLength);
-  writeUInt(compressed + 12, nBits);
-  writeUInt(compressed + 16, 0);  // room for future extensions
-
-  return dataStart + data_length - compressed;
-}
-
-static bool hufUncompress(const char compressed[], int nCompressed,
-                          std::vector<unsigned short> *raw) {
-  if (nCompressed == 0) {
-    if (raw->size() != 0) return false;
-
-    return false;
-  }
-
-  int im = readUInt(compressed);
-  int iM = readUInt(compressed + 4);
-  // int tableLength = readUInt (compressed + 8);
-  int nBits = readUInt(compressed + 12);
-
-  if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false;
-
-  const char *ptr = compressed + 20;
-
-  //
-  // Fast decoder needs at least 2x64-bits of compressed data, and
-  // needs to be run-able on this platform. Otherwise, fall back
-  // to the original decoder
-  //
-
-  // if (FastHufDecoder::enabled() && nBits > 128)
-  //{
-  //    FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM);
-  //    fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw);
-  //}
-  // else
-  {
-    std::vector<long long> freq(HUF_ENCSIZE);
-    std::vector<HufDec> hdec(HUF_DECSIZE);
-
-    hufClearDecTable(&hdec.at(0));
-
-    hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM,
-                      &freq.at(0));
-
-    {
-      if (nBits > 8 * (nCompressed - (ptr - compressed))) {
-        return false;
-      }
-
-      hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0));
-      hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, raw->size(),
-                raw->data());
-    }
-    // catch (...)
-    //{
-    //    hufFreeDecTable (hdec);
-    //    throw;
-    //}
-
-    hufFreeDecTable(&hdec.at(0));
-  }
-
-  return true;
-}
-
-//
-// Functions to compress the range of values in the pixel data
-//
-
-const int USHORT_RANGE = (1 << 16);
-const int BITMAP_SIZE = (USHORT_RANGE >> 3);
-
-static void bitmapFromData(const unsigned short data[/*nData*/], int nData,
-                           unsigned char bitmap[BITMAP_SIZE],
-                           unsigned short &minNonZero,
-                           unsigned short &maxNonZero) {
-  for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0;
-
-  for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7));
-
-  bitmap[0] &= ~1;  // zero is not explicitly stored in
-                    // the bitmap; we assume that the
-                    // data always contain zeroes
-  minNonZero = BITMAP_SIZE - 1;
-  maxNonZero = 0;
-
-  for (int i = 0; i < BITMAP_SIZE; ++i) {
-    if (bitmap[i]) {
-      if (minNonZero > i) minNonZero = i;
-      if (maxNonZero < i) maxNonZero = i;
-    }
-  }
-}
-
-static unsigned short forwardLutFromBitmap(
-    const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) {
-  int k = 0;
-
-  for (int i = 0; i < USHORT_RANGE; ++i) {
-    if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7))))
-      lut[i] = k++;
-    else
-      lut[i] = 0;
-  }
-
-  return k - 1;  // maximum value stored in lut[],
-}  // i.e. number of ones in bitmap minus 1
-
-static unsigned short reverseLutFromBitmap(
-    const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) {
-  int k = 0;
-
-  for (int i = 0; i < USHORT_RANGE; ++i) {
-    if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i;
-  }
-
-  int n = k - 1;
-
-  while (k < USHORT_RANGE) lut[k++] = 0;
-
-  return n;  // maximum k where lut[k] is non-zero,
-}  // i.e. number of ones in bitmap minus 1
-
-static void applyLut(const unsigned short lut[USHORT_RANGE],
-                     unsigned short data[/*nData*/], int nData) {
-  for (int i = 0; i < nData; ++i) data[i] = lut[data[i]];
-}
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif  // __clang__
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize,
-                        const unsigned char *inPtr, size_t inSize,
-                        const std::vector<ChannelInfo> &channelInfo,
-                        int data_width, int num_lines) {
-  std::vector<unsigned char> bitmap(BITMAP_SIZE);
-  unsigned short minNonZero;
-  unsigned short maxNonZero;
-
-#if !MINIZ_LITTLE_ENDIAN
-  // @todo { PIZ compression on BigEndian architecture. }
-  assert(0);
-  return false;
-#endif
-
-  // Assume `inSize` is multiple of 2 or 4.
-  std::vector<unsigned short> tmpBuffer(inSize / sizeof(unsigned short));
-
-  std::vector<PIZChannelData> channelData(channelInfo.size());
-  unsigned short *tmpBufferEnd = &tmpBuffer.at(0);
-
-  for (size_t c = 0; c < channelData.size(); c++) {
-    PIZChannelData &cd = channelData[c];
-
-    cd.start = tmpBufferEnd;
-    cd.end = cd.start;
-
-    cd.nx = data_width;
-    cd.ny = num_lines;
-    // cd.ys = c.channel().ySampling;
-
-    size_t pixelSize = sizeof(int);  // UINT and FLOAT
-    if (channelInfo[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-      pixelSize = sizeof(short);
-    }
-
-    cd.size = static_cast<int>(pixelSize / sizeof(short));
-
-    tmpBufferEnd += cd.nx * cd.ny * cd.size;
-  }
-
-  const unsigned char *ptr = inPtr;
-  for (int y = 0; y < num_lines; ++y) {
-    for (size_t i = 0; i < channelData.size(); ++i) {
-      PIZChannelData &cd = channelData[i];
-
-      // if (modp (y, cd.ys) != 0)
-      //    continue;
-
-      size_t n = static_cast<size_t>(cd.nx * cd.size);
-      memcpy(cd.end, ptr, n * sizeof(unsigned short));
-      ptr += n * sizeof(unsigned short);
-      cd.end += n;
-    }
-  }
-
-  bitmapFromData(&tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()),
-                 bitmap.data(), minNonZero, maxNonZero);
-
-  std::vector<unsigned short> lut(USHORT_RANGE);
-  unsigned short maxValue = forwardLutFromBitmap(bitmap.data(), lut.data());
-  applyLut(lut.data(), &tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()));
-
-  //
-  // Store range compression info in _outBuffer
-  //
-
-  char *buf = reinterpret_cast<char *>(outPtr);
-
-  memcpy(buf, &minNonZero, sizeof(unsigned short));
-  buf += sizeof(unsigned short);
-  memcpy(buf, &maxNonZero, sizeof(unsigned short));
-  buf += sizeof(unsigned short);
-
-  if (minNonZero <= maxNonZero) {
-    memcpy(buf, reinterpret_cast<char *>(&bitmap[0] + minNonZero),
-           maxNonZero - minNonZero + 1);
-    buf += maxNonZero - minNonZero + 1;
-  }
-
-  //
-  // Apply wavelet encoding
-  //
-
-  for (size_t i = 0; i < channelData.size(); ++i) {
-    PIZChannelData &cd = channelData[i];
-
-    for (int j = 0; j < cd.size; ++j) {
-      wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size,
-                 maxValue);
-    }
-  }
-
-  //
-  // Apply Huffman encoding; append the result to _outBuffer
-  //
-
-  // length header(4byte), then huff data. Initialize length header with zero,
-  // then later fill it by `length`.
-  char *lengthPtr = buf;
-  int zero = 0;
-  memcpy(buf, &zero, sizeof(int));
-  buf += sizeof(int);
-
-  int length =
-      hufCompress(&tmpBuffer.at(0), static_cast<int>(tmpBuffer.size()), buf);
-  memcpy(lengthPtr, &length, sizeof(int));
-
-  (*outSize) = static_cast<unsigned int>(
-      (reinterpret_cast<unsigned char *>(buf) - outPtr) +
-      static_cast<unsigned int>(length));
-
-  // Use uncompressed data when compressed data is larger than uncompressed.
-  // (Issue 40)
-  if ((*outSize) >= inSize) {
-    (*outSize) = static_cast<unsigned int>(inSize);
-    memcpy(outPtr, inPtr, inSize);
-  }
-  return true;
-}
-
-static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr,
-                          size_t tmpBufSize, size_t inLen, int num_channels,
-                          const EXRChannelInfo *channels, int data_width,
-                          int num_lines) {
-  if (inLen == tmpBufSize) {
-    // Data is not compressed(Issue 40).
-    memcpy(outPtr, inPtr, inLen);
-    return true;
-  }
-
-  std::vector<unsigned char> bitmap(BITMAP_SIZE);
-  unsigned short minNonZero;
-  unsigned short maxNonZero;
-
-#if !MINIZ_LITTLE_ENDIAN
-  // @todo { PIZ compression on BigEndian architecture. }
-  assert(0);
-  return false;
-#endif
-
-  memset(bitmap.data(), 0, BITMAP_SIZE);
-
-  const unsigned char *ptr = inPtr;
-  // minNonZero = *(reinterpret_cast<const unsigned short *>(ptr));
-  tinyexr::cpy2(&minNonZero, reinterpret_cast<const unsigned short *>(ptr));
-  // maxNonZero = *(reinterpret_cast<const unsigned short *>(ptr + 2));
-  tinyexr::cpy2(&maxNonZero, reinterpret_cast<const unsigned short *>(ptr + 2));
-  ptr += 4;
-
-  if (maxNonZero >= BITMAP_SIZE) {
-    return false;
-  }
-
-  if (minNonZero <= maxNonZero) {
-    memcpy(reinterpret_cast<char *>(&bitmap[0] + minNonZero), ptr,
-           maxNonZero - minNonZero + 1);
-    ptr += maxNonZero - minNonZero + 1;
-  }
-
-  std::vector<unsigned short> lut(USHORT_RANGE);
-  memset(lut.data(), 0, sizeof(unsigned short) * USHORT_RANGE);
-  unsigned short maxValue = reverseLutFromBitmap(bitmap.data(), lut.data());
-
-  //
-  // Huffman decoding
-  //
-
-  int length;
-
-  // length = *(reinterpret_cast<const int *>(ptr));
-  tinyexr::cpy4(&length, reinterpret_cast<const int *>(ptr));
-  ptr += sizeof(int);
-
-  if (size_t((ptr - inPtr) + length) > inLen) {
-    return false;
-  }
-
-  std::vector<unsigned short> tmpBuffer(tmpBufSize);
-  hufUncompress(reinterpret_cast<const char *>(ptr), length, &tmpBuffer);
-
-  //
-  // Wavelet decoding
-  //
-
-  std::vector<PIZChannelData> channelData(static_cast<size_t>(num_channels));
-
-  unsigned short *tmpBufferEnd = &tmpBuffer.at(0);
-
-  for (size_t i = 0; i < static_cast<size_t>(num_channels); ++i) {
-    const EXRChannelInfo &chan = channels[i];
-
-    size_t pixelSize = sizeof(int);  // UINT and FLOAT
-    if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) {
-      pixelSize = sizeof(short);
-    }
-
-    channelData[i].start = tmpBufferEnd;
-    channelData[i].end = channelData[i].start;
-    channelData[i].nx = data_width;
-    channelData[i].ny = num_lines;
-    // channelData[i].ys = 1;
-    channelData[i].size = static_cast<int>(pixelSize / sizeof(short));
-
-    tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size;
-  }
-
-  for (size_t i = 0; i < channelData.size(); ++i) {
-    PIZChannelData &cd = channelData[i];
-
-    for (int j = 0; j < cd.size; ++j) {
-      wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size,
-                 maxValue);
-    }
-  }
-
-  //
-  // Expand the pixel data to their original range
-  //
-
-  applyLut(lut.data(), &tmpBuffer.at(0), static_cast<int>(tmpBufSize));
-
-  for (int y = 0; y < num_lines; y++) {
-    for (size_t i = 0; i < channelData.size(); ++i) {
-      PIZChannelData &cd = channelData[i];
-
-      // if (modp (y, cd.ys) != 0)
-      //    continue;
-
-      size_t n = static_cast<size_t>(cd.nx * cd.size);
-      memcpy(outPtr, cd.end, static_cast<size_t>(n * sizeof(unsigned short)));
-      outPtr += n * sizeof(unsigned short);
-      cd.end += n;
-    }
-  }
-
-  return true;
-}
-#endif  // TINYEXR_USE_PIZ
-
-#if TINYEXR_USE_ZFP
-struct ZFPCompressionParam {
-  double rate;
-  int precision;
-  double tolerance;
-  int type;  // TINYEXR_ZFP_COMPRESSIONTYPE_*
-
-  ZFPCompressionParam() {
-    type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE;
-    rate = 2.0;
-    precision = 0;
-    tolerance = 0.0f;
-  }
-};
-
-bool FindZFPCompressionParam(ZFPCompressionParam *param,
-                             const EXRAttribute *attributes,
-                             int num_attributes) {
-  bool foundType = false;
-
-  for (int i = 0; i < num_attributes; i++) {
-    if ((strcmp(attributes[i].name, "zfpCompressionType") == 0) &&
-        (attributes[i].size == 1)) {
-      param->type = static_cast<int>(attributes[i].value[0]);
-
-      foundType = true;
-    }
-  }
-
-  if (!foundType) {
-    return false;
-  }
-
-  if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
-    for (int i = 0; i < num_attributes; i++) {
-      if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) &&
-          (attributes[i].size == 8)) {
-        param->rate = *(reinterpret_cast<double *>(attributes[i].value));
-        return true;
-      }
-    }
-  } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
-    for (int i = 0; i < num_attributes; i++) {
-      if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) &&
-          (attributes[i].size == 4)) {
-        param->rate = *(reinterpret_cast<int *>(attributes[i].value));
-        return true;
-      }
-    }
-  } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
-    for (int i = 0; i < num_attributes; i++) {
-      if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) &&
-          (attributes[i].size == 8)) {
-        param->tolerance = *(reinterpret_cast<double *>(attributes[i].value));
-        return true;
-      }
-    }
-  } else {
-    assert(0);
-  }
-
-  return false;
-}
-
-// Assume pixel format is FLOAT for all channels.
-static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines,
-                          int num_channels, const unsigned char *src,
-                          unsigned long src_size,
-                          const ZFPCompressionParam &param) {
-  size_t uncompressed_size = dst_width * dst_num_lines * num_channels;
-
-  if (uncompressed_size == src_size) {
-    // Data is not compressed(Issue 40).
-    memcpy(dst, src, src_size);
-  }
-
-  zfp_stream *zfp = NULL;
-  zfp_field *field = NULL;
-
-  assert((dst_width % 4) == 0);
-  assert((dst_num_lines % 4) == 0);
-
-  if ((dst_width & 3U) || (dst_num_lines & 3U)) {
-    return false;
-  }
-
-  field =
-      zfp_field_2d(reinterpret_cast<void *>(const_cast<unsigned char *>(src)),
-                   zfp_type_float, dst_width, dst_num_lines * num_channels);
-  zfp = zfp_stream_open(NULL);
-
-  if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
-    zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimention */ 2,
-                        /* write random access */ 0);
-  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
-    zfp_stream_set_precision(zfp, param.precision, zfp_type_float);
-  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
-    zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float);
-  } else {
-    assert(0);
-  }
-
-  size_t buf_size = zfp_stream_maximum_size(zfp, field);
-  std::vector<unsigned char> buf(buf_size);
-  memcpy(&buf.at(0), src, src_size);
-
-  bitstream *stream = stream_open(&buf.at(0), buf_size);
-  zfp_stream_set_bit_stream(zfp, stream);
-  zfp_stream_rewind(zfp);
-
-  size_t image_size = dst_width * dst_num_lines;
-
-  for (int c = 0; c < num_channels; c++) {
-    // decompress 4x4 pixel block.
-    for (int y = 0; y < dst_num_lines; y += 4) {
-      for (int x = 0; x < dst_width; x += 4) {
-        float fblock[16];
-        zfp_decode_block_float_2(zfp, fblock);
-        for (int j = 0; j < 4; j++) {
-          for (int i = 0; i < 4; i++) {
-            dst[c * image_size + ((y + j) * dst_width + (x + i))] =
-                fblock[j * 4 + i];
-          }
-        }
-      }
-    }
-  }
-
-  zfp_field_free(field);
-  zfp_stream_close(zfp);
-  stream_close(stream);
-
-  return true;
-}
-
-// Assume pixel format is FLOAT for all channels.
-bool CompressZfp(std::vector<unsigned char> *outBuf, unsigned int *outSize,
-                 const float *inPtr, int width, int num_lines, int num_channels,
-                 const ZFPCompressionParam &param) {
-  zfp_stream *zfp = NULL;
-  zfp_field *field = NULL;
-
-  assert((width % 4) == 0);
-  assert((num_lines % 4) == 0);
-
-  if ((width & 3U) || (num_lines & 3U)) {
-    return false;
-  }
-
-  // create input array.
-  field = zfp_field_2d(reinterpret_cast<void *>(const_cast<float *>(inPtr)),
-                       zfp_type_float, width, num_lines * num_channels);
-
-  zfp = zfp_stream_open(NULL);
-
-  if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) {
-    zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0);
-  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) {
-    zfp_stream_set_precision(zfp, param.precision, zfp_type_float);
-  } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) {
-    zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float);
-  } else {
-    assert(0);
-  }
-
-  size_t buf_size = zfp_stream_maximum_size(zfp, field);
-
-  outBuf->resize(buf_size);
-
-  bitstream *stream = stream_open(&outBuf->at(0), buf_size);
-  zfp_stream_set_bit_stream(zfp, stream);
-  zfp_field_free(field);
-
-  size_t image_size = width * num_lines;
-
-  for (int c = 0; c < num_channels; c++) {
-    // compress 4x4 pixel block.
-    for (int y = 0; y < num_lines; y += 4) {
-      for (int x = 0; x < width; x += 4) {
-        float fblock[16];
-        for (int j = 0; j < 4; j++) {
-          for (int i = 0; i < 4; i++) {
-            fblock[j * 4 + i] =
-                inPtr[c * image_size + ((y + j) * width + (x + i))];
-          }
-        }
-        zfp_encode_block_float_2(zfp, fblock);
-      }
-    }
-  }
-
-  zfp_stream_flush(zfp);
-  (*outSize) = zfp_stream_compressed_size(zfp);
-
-  zfp_stream_close(zfp);
-
-  return true;
-}
-
-#endif
-
-//
-// -----------------------------------------------------------------
-//
-
-// TODO(syoyo): Refactor function arguments.
-static bool DecodePixelData(/* out */ unsigned char **out_images,
-                            const int *requested_pixel_types,
-                            const unsigned char *data_ptr, size_t data_len,
-                            int compression_type, int line_order, int width,
-                            int height, int x_stride, int y, int line_no,
-                            int num_lines, size_t pixel_data_size,
-                            size_t num_attributes,
-                            const EXRAttribute *attributes, size_t num_channels,
-                            const EXRChannelInfo *channels,
-                            const std::vector<size_t> &channel_offset_list) {
-  if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {  // PIZ
-#if TINYEXR_USE_PIZ
-    if ((width == 0) || (num_lines == 0) || (pixel_data_size == 0)) {
-      // Invalid input #90
-      return false;
-    }
-
-    // Allocate original data size.
-    std::vector<unsigned char> outBuf(static_cast<size_t>(
-        static_cast<size_t>(width * num_lines) * pixel_data_size));
-    size_t tmpBufLen = outBuf.size();
-
-    bool ret = tinyexr::DecompressPiz(
-        reinterpret_cast<unsigned char *>(&outBuf.at(0)), data_ptr, tmpBufLen,
-        data_len, static_cast<int>(num_channels), channels, width, num_lines);
-
-    if (!ret) {
-      return false;
-    }
-
-    // For PIZ_COMPRESSION:
-    //   pixel sample data for channel 0 for scanline 0
-    //   pixel sample data for channel 1 for scanline 0
-    //   pixel sample data for channel ... for scanline 0
-    //   pixel sample data for channel n for scanline 0
-    //   pixel sample data for channel 0 for scanline 1
-    //   pixel sample data for channel 1 for scanline 1
-    //   pixel sample data for channel ... for scanline 1
-    //   pixel sample data for channel n for scanline 1
-    //   ...
-    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            FP16 hf;
-
-            // hf.u = line_ptr[u];
-            // use `cpy` to avoid unaligned memory access when compiler's
-            // optimization is on.
-            tinyexr::cpy2(&(hf.u), line_ptr + u);
-
-            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-              unsigned short *image =
-                  reinterpret_cast<unsigned short **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += static_cast<size_t>(
-                             (height - 1 - (line_no + static_cast<int>(v)))) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = hf.u;
-            } else {  // HALF -> FLOAT
-              FP32 f32 = half_to_float(hf);
-              float *image = reinterpret_cast<float **>(out_images)[c];
-              size_t offset = 0;
-              if (line_order == 0) {
-                offset = (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                offset = static_cast<size_t>(
-                             (height - 1 - (line_no + static_cast<int>(v)))) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              image += offset;
-              *image = f32.f;
-            }
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT);
-
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            unsigned int val;
-            // val = line_ptr[u];
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(&val);
-
-            unsigned int *image =
-                reinterpret_cast<unsigned int **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += static_cast<size_t>(
-                           (height - 1 - (line_no + static_cast<int>(v)))) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT);
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const float *line_ptr = reinterpret_cast<float *>(&outBuf.at(
-              v * pixel_data_size * static_cast<size_t>(x_stride) +
-              channel_offset_list[c] * static_cast<size_t>(x_stride)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            float val;
-            // val = line_ptr[u];
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            float *image = reinterpret_cast<float **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += static_cast<size_t>(
-                           (height - 1 - (line_no + static_cast<int>(v)))) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else {
-        assert(0);
-      }
-    }
-#else
-    assert(0 && "PIZ is enabled in this build");
-    return false;
-#endif
-
-  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS ||
-             compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-    // Allocate original data size.
-    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
-                                      static_cast<size_t>(num_lines) *
-                                      pixel_data_size);
-
-    unsigned long dstLen = static_cast<unsigned long>(outBuf.size());
-    assert(dstLen > 0);
-    if (!tinyexr::DecompressZip(
-            reinterpret_cast<unsigned char *>(&outBuf.at(0)), &dstLen, data_ptr,
-            static_cast<unsigned long>(data_len))) {
-      return false;
-    }
-
-    // For ZIP_COMPRESSION:
-    //   pixel sample data for channel 0 for scanline 0
-    //   pixel sample data for channel 1 for scanline 0
-    //   pixel sample data for channel ... for scanline 0
-    //   pixel sample data for channel n for scanline 0
-    //   pixel sample data for channel 0 for scanline 1
-    //   pixel sample data for channel 1 for scanline 1
-    //   pixel sample data for channel ... for scanline 1
-    //   pixel sample data for channel n for scanline 1
-    //   ...
-    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-              &outBuf.at(v * static_cast<size_t>(pixel_data_size) *
-                             static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            tinyexr::FP16 hf;
-
-            // hf.u = line_ptr[u];
-            tinyexr::cpy2(&(hf.u), line_ptr + u);
-
-            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-              unsigned short *image =
-                  reinterpret_cast<unsigned short **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += (static_cast<size_t>(height) - 1U -
-                          (static_cast<size_t>(line_no) + v)) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = hf.u;
-            } else {  // HALF -> FLOAT
-              tinyexr::FP32 f32 = half_to_float(hf);
-              float *image = reinterpret_cast<float **>(out_images)[c];
-              size_t offset = 0;
-              if (line_order == 0) {
-                offset = (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                offset = (static_cast<size_t>(height) - 1U -
-                          (static_cast<size_t>(line_no) + v)) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              image += offset;
-
-              *image = f32.f;
-            }
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT);
-
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            unsigned int val;
-            // val = line_ptr[u];
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(&val);
-
-            unsigned int *image =
-                reinterpret_cast<unsigned int **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT);
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const float *line_ptr = reinterpret_cast<float *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            float val;
-            // val = line_ptr[u];
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            float *image = reinterpret_cast<float **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else {
-        assert(0);
-        return false;
-      }
-    }
-  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) {
-    // Allocate original data size.
-    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
-                                      static_cast<size_t>(num_lines) *
-                                      pixel_data_size);
-
-    unsigned long dstLen = static_cast<unsigned long>(outBuf.size());
-    if (dstLen == 0) {
-      return false;
-    }
-
-    if (!tinyexr::DecompressRle(reinterpret_cast<unsigned char *>(&outBuf.at(0)),
-                           dstLen, data_ptr,
-                           static_cast<unsigned long>(data_len))) {
-      return false;
-    }
-
-    // For RLE_COMPRESSION:
-    //   pixel sample data for channel 0 for scanline 0
-    //   pixel sample data for channel 1 for scanline 0
-    //   pixel sample data for channel ... for scanline 0
-    //   pixel sample data for channel n for scanline 0
-    //   pixel sample data for channel 0 for scanline 1
-    //   pixel sample data for channel 1 for scanline 1
-    //   pixel sample data for channel ... for scanline 1
-    //   pixel sample data for channel n for scanline 1
-    //   ...
-    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-              &outBuf.at(v * static_cast<size_t>(pixel_data_size) *
-                             static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            tinyexr::FP16 hf;
-
-            // hf.u = line_ptr[u];
-            tinyexr::cpy2(&(hf.u), line_ptr + u);
-
-            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-              unsigned short *image =
-                  reinterpret_cast<unsigned short **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += (static_cast<size_t>(height) - 1U -
-                          (static_cast<size_t>(line_no) + v)) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = hf.u;
-            } else {  // HALF -> FLOAT
-              tinyexr::FP32 f32 = half_to_float(hf);
-              float *image = reinterpret_cast<float **>(out_images)[c];
-              if (line_order == 0) {
-                image += (static_cast<size_t>(line_no) + v) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              } else {
-                image += (static_cast<size_t>(height) - 1U -
-                          (static_cast<size_t>(line_no) + v)) *
-                             static_cast<size_t>(x_stride) +
-                         u;
-              }
-              *image = f32.f;
-            }
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT);
-
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            unsigned int val;
-            // val = line_ptr[u];
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(&val);
-
-            unsigned int *image =
-                reinterpret_cast<unsigned int **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT);
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const float *line_ptr = reinterpret_cast<float *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            float val;
-            // val = line_ptr[u];
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            float *image = reinterpret_cast<float **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else {
-        assert(0);
-        return false;
-      }
-    }
-  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-#if TINYEXR_USE_ZFP
-    tinyexr::ZFPCompressionParam zfp_compression_param;
-    if (!FindZFPCompressionParam(&zfp_compression_param, attributes,
-                                 num_attributes)) {
-      assert(0);
-      return false;
-    }
-
-    // Allocate original data size.
-    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
-                                      static_cast<size_t>(num_lines) *
-                                      pixel_data_size);
-
-    unsigned long dstLen = outBuf.size();
-    assert(dstLen > 0);
-    tinyexr::DecompressZfp(reinterpret_cast<float *>(&outBuf.at(0)), width,
-                           num_lines, num_channels, data_ptr,
-                           static_cast<unsigned long>(data_len),
-                           zfp_compression_param);
-
-    // For ZFP_COMPRESSION:
-    //   pixel sample data for channel 0 for scanline 0
-    //   pixel sample data for channel 1 for scanline 0
-    //   pixel sample data for channel ... for scanline 0
-    //   pixel sample data for channel n for scanline 0
-    //   pixel sample data for channel 0 for scanline 1
-    //   pixel sample data for channel 1 for scanline 1
-    //   pixel sample data for channel ... for scanline 1
-    //   pixel sample data for channel n for scanline 1
-    //   ...
-    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-      assert(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT);
-      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-        assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT);
-        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-          const float *line_ptr = reinterpret_cast<float *>(
-              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
-                         channel_offset_list[c] * static_cast<size_t>(width)));
-          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
-            float val;
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            float *image = reinterpret_cast<float **>(out_images)[c];
-            if (line_order == 0) {
-              image += (static_cast<size_t>(line_no) + v) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            } else {
-              image += (static_cast<size_t>(height) - 1U -
-                        (static_cast<size_t>(line_no) + v)) *
-                           static_cast<size_t>(x_stride) +
-                       u;
-            }
-            *image = val;
-          }
-        }
-      } else {
-        assert(0);
-        return false;
-      }
-    }
-#else
-    (void)attributes;
-    (void)num_attributes;
-    (void)num_channels;
-    assert(0);
-    return false;
-#endif
-  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) {
-    for (size_t c = 0; c < num_channels; c++) {
-      for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
-        if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-          const unsigned short *line_ptr =
-              reinterpret_cast<const unsigned short *>(
-                  data_ptr + v * pixel_data_size * size_t(width) +
-                  channel_offset_list[c] * static_cast<size_t>(width));
-
-          if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-            unsigned short *outLine =
-                reinterpret_cast<unsigned short *>(out_images[c]);
-            if (line_order == 0) {
-              outLine += (size_t(y) + v) * size_t(x_stride);
-            } else {
-              outLine +=
-                  (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride);
-            }
-
-            for (int u = 0; u < width; u++) {
-              tinyexr::FP16 hf;
-
-              // hf.u = line_ptr[u];
-              tinyexr::cpy2(&(hf.u), line_ptr + u);
-
-              tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-              outLine[u] = hf.u;
-            }
-          } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
-            float *outLine = reinterpret_cast<float *>(out_images[c]);
-            if (line_order == 0) {
-              outLine += (size_t(y) + v) * size_t(x_stride);
-            } else {
-              outLine +=
-                  (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride);
-            }
-
-            if (reinterpret_cast<const unsigned char *>(line_ptr + width) >
-                (data_ptr + data_len)) {
-              // Insufficient data size
-              return false;
-            }
-
-            for (int u = 0; u < width; u++) {
-              tinyexr::FP16 hf;
-
-              // address may not be aliged. use byte-wise copy for safety.#76
-              // hf.u = line_ptr[u];
-              tinyexr::cpy2(&(hf.u), line_ptr + u);
-
-              tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
-
-              tinyexr::FP32 f32 = half_to_float(hf);
-
-              outLine[u] = f32.f;
-            }
-          } else {
-            assert(0);
-            return false;
-          }
-        } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-          const float *line_ptr = reinterpret_cast<const float *>(
-              data_ptr + v * pixel_data_size * size_t(width) +
-              channel_offset_list[c] * static_cast<size_t>(width));
-
-          float *outLine = reinterpret_cast<float *>(out_images[c]);
-          if (line_order == 0) {
-            outLine += (size_t(y) + v) * size_t(x_stride);
-          } else {
-            outLine +=
-                (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride);
-          }
-
-          if (reinterpret_cast<const unsigned char *>(line_ptr + width) >
-              (data_ptr + data_len)) {
-            // Insufficient data size
-            return false;
-          }
-
-          for (int u = 0; u < width; u++) {
-            float val;
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            outLine[u] = val;
-          }
-        } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-          const unsigned int *line_ptr = reinterpret_cast<const unsigned int *>(
-              data_ptr + v * pixel_data_size * size_t(width) +
-              channel_offset_list[c] * static_cast<size_t>(width));
-
-          unsigned int *outLine =
-              reinterpret_cast<unsigned int *>(out_images[c]);
-          if (line_order == 0) {
-            outLine += (size_t(y) + v) * size_t(x_stride);
-          } else {
-            outLine +=
-                (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride);
-          }
-
-          for (int u = 0; u < width; u++) {
-            if (reinterpret_cast<const unsigned char *>(line_ptr + u) >=
-                (data_ptr + data_len)) {
-              // Corrupsed data?
-              return false;
-            }
-
-            unsigned int val;
-            tinyexr::cpy4(&val, line_ptr + u);
-
-            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-            outLine[u] = val;
-          }
-        }
-      }
-    }
-  }
-
-  return true;
-}
-
-static void DecodeTiledPixelData(
-    unsigned char **out_images, int *width, int *height,
-    const int *requested_pixel_types, const unsigned char *data_ptr,
-    size_t data_len, int compression_type, int line_order, int data_width,
-    int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x,
-    int tile_size_y, size_t pixel_data_size, size_t num_attributes,
-    const EXRAttribute *attributes, size_t num_channels,
-    const EXRChannelInfo *channels,
-    const std::vector<size_t> &channel_offset_list) {
-  assert(tile_offset_x * tile_size_x < data_width);
-  assert(tile_offset_y * tile_size_y < data_height);
-
-  // Compute actual image size in a tile.
-  if ((tile_offset_x + 1) * tile_size_x >= data_width) {
-    (*width) = data_width - (tile_offset_x * tile_size_x);
-  } else {
-    (*width) = tile_size_x;
-  }
-
-  if ((tile_offset_y + 1) * tile_size_y >= data_height) {
-    (*height) = data_height - (tile_offset_y * tile_size_y);
-  } else {
-    (*height) = tile_size_y;
-  }
-
-  // Image size = tile size.
-  DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len,
-                  compression_type, line_order, (*width), tile_size_y,
-                  /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0,
-                  (*height), pixel_data_size, num_attributes, attributes,
-                  num_channels, channels, channel_offset_list);
-}
-
-static bool ComputeChannelLayout(std::vector<size_t> *channel_offset_list,
-                                 int *pixel_data_size, size_t *channel_offset,
-                                 int num_channels,
-                                 const EXRChannelInfo *channels) {
-  channel_offset_list->resize(static_cast<size_t>(num_channels));
-
-  (*pixel_data_size) = 0;
-  (*channel_offset) = 0;
-
-  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-    (*channel_offset_list)[c] = (*channel_offset);
-    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-      (*pixel_data_size) += sizeof(unsigned short);
-      (*channel_offset) += sizeof(unsigned short);
-    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-      (*pixel_data_size) += sizeof(float);
-      (*channel_offset) += sizeof(float);
-    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-      (*pixel_data_size) += sizeof(unsigned int);
-      (*channel_offset) += sizeof(unsigned int);
-    } else {
-      // ???
-      return false;
-    }
-  }
-  return true;
-}
-
-static unsigned char **AllocateImage(int num_channels,
-                                     const EXRChannelInfo *channels,
-                                     const int *requested_pixel_types,
-                                     int data_width, int data_height) {
-  unsigned char **images =
-      reinterpret_cast<unsigned char **>(static_cast<float **>(
-          malloc(sizeof(float *) * static_cast<size_t>(num_channels))));
-
-  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-    size_t data_len =
-        static_cast<size_t>(data_width) * static_cast<size_t>(data_height);
-    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
-      // pixel_data_size += sizeof(unsigned short);
-      // channel_offset += sizeof(unsigned short);
-      // Alloc internal image for half type.
-      if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-        images[c] =
-            reinterpret_cast<unsigned char *>(static_cast<unsigned short *>(
-                malloc(sizeof(unsigned short) * data_len)));
-      } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
-        images[c] = reinterpret_cast<unsigned char *>(
-            static_cast<float *>(malloc(sizeof(float) * data_len)));
-      } else {
-        assert(0);
-      }
-    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
-      // pixel_data_size += sizeof(float);
-      // channel_offset += sizeof(float);
-      images[c] = reinterpret_cast<unsigned char *>(
-          static_cast<float *>(malloc(sizeof(float) * data_len)));
-    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
-      // pixel_data_size += sizeof(unsigned int);
-      // channel_offset += sizeof(unsigned int);
-      images[c] = reinterpret_cast<unsigned char *>(
-          static_cast<unsigned int *>(malloc(sizeof(unsigned int) * data_len)));
-    } else {
-      assert(0);
-    }
-  }
-
-  return images;
-}
-
-static int ParseEXRHeader(HeaderInfo *info, bool *empty_header,
-                          const EXRVersion *version, std::string *err,
-                          const unsigned char *buf, size_t size) {
-  const char *marker = reinterpret_cast<const char *>(&buf[0]);
-
-  if (empty_header) {
-    (*empty_header) = false;
-  }
-
-  if (version->multipart) {
-    if (size > 0 && marker[0] == '\0') {
-      // End of header list.
-      if (empty_header) {
-        (*empty_header) = true;
-      }
-      return TINYEXR_SUCCESS;
-    }
-  }
-
-  // According to the spec, the header of every OpenEXR file must contain at
-  // least the following attributes:
-  //
-  // channels chlist
-  // compression compression
-  // dataWindow box2i
-  // displayWindow box2i
-  // lineOrder lineOrder
-  // pixelAspectRatio float
-  // screenWindowCenter v2f
-  // screenWindowWidth float
-  bool has_channels = false;
-  bool has_compression = false;
-  bool has_data_window = false;
-  bool has_display_window = false;
-  bool has_line_order = false;
-  bool has_pixel_aspect_ratio = false;
-  bool has_screen_window_center = false;
-  bool has_screen_window_width = false;
-
-  info->data_window[0] = 0;
-  info->data_window[1] = 0;
-  info->data_window[2] = 0;
-  info->data_window[3] = 0;
-  info->line_order = 0;  // @fixme
-  info->display_window[0] = 0;
-  info->display_window[1] = 0;
-  info->display_window[2] = 0;
-  info->display_window[3] = 0;
-  info->screen_window_center[0] = 0.0f;
-  info->screen_window_center[1] = 0.0f;
-  info->screen_window_width = -1.0f;
-  info->pixel_aspect_ratio = -1.0f;
-
-  info->tile_size_x = -1;
-  info->tile_size_y = -1;
-  info->tile_level_mode = -1;
-  info->tile_rounding_mode = -1;
-
-  info->attributes.clear();
-
-  // Read attributes
-  size_t orig_size = size;
-  for (size_t nattr = 0; nattr < TINYEXR_MAX_HEADER_ATTRIBUTES; nattr++) {
-    if (0 == size) {
-      if (err) {
-        (*err) += "Insufficient data size for attributes.\n";
-      }
-      return TINYEXR_ERROR_INVALID_DATA;
-    } else if (marker[0] == '\0') {
-      size--;
-      break;
-    }
-
-    std::string attr_name;
-    std::string attr_type;
-    std::vector<unsigned char> data;
-    size_t marker_size;
-    if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size,
-                                marker, size)) {
-      if (err) {
-        (*err) += "Failed to read attribute.\n";
-      }
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-    marker += marker_size;
-    size -= marker_size;
-
-    if (version->tiled && attr_name.compare("tiles") == 0) {
-      unsigned int x_size, y_size;
-      unsigned char tile_mode;
-      assert(data.size() == 9);
-      memcpy(&x_size, &data.at(0), sizeof(int));
-      memcpy(&y_size, &data.at(4), sizeof(int));
-      tile_mode = data[8];
-      tinyexr::swap4(&x_size);
-      tinyexr::swap4(&y_size);
-
-      info->tile_size_x = static_cast<int>(x_size);
-      info->tile_size_y = static_cast<int>(y_size);
-
-      // mode = levelMode + roundingMode * 16
-      info->tile_level_mode = tile_mode & 0x3;
-      info->tile_rounding_mode = (tile_mode >> 4) & 0x1;
-
-    } else if (attr_name.compare("compression") == 0) {
-      bool ok = false;
-      if (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ) {
-        ok = true;
-      }
-
-      if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) {
-#if TINYEXR_USE_PIZ
-        ok = true;
-#else
-        if (err) {
-          (*err) = "PIZ compression is not supported.";
-        }
-        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-#endif
-      }
-
-      if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) {
-#if TINYEXR_USE_ZFP
-        ok = true;
-#else
-        if (err) {
-          (*err) = "ZFP compression is not supported.";
-        }
-        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-#endif
-      }
-
-      if (!ok) {
-        if (err) {
-          (*err) = "Unknown compression type.";
-        }
-        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-      }
-
-      info->compression_type = static_cast<int>(data[0]);
-      has_compression = true;
-
-    } else if (attr_name.compare("channels") == 0) {
-      // name: zero-terminated string, from 1 to 255 bytes long
-      // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2
-      // pLinear: unsigned char, possible values are 0 and 1
-      // reserved: three chars, should be zero
-      // xSampling: int
-      // ySampling: int
-
-      if (!ReadChannelInfo(info->channels, data)) {
-        if (err) {
-          (*err) += "Failed to parse channel info.\n";
-        }
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-      if (info->channels.size() < 1) {
-        if (err) {
-          (*err) += "# of channels is zero.\n";
-        }
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-      has_channels = true;
-
-    } else if (attr_name.compare("dataWindow") == 0) {
-      if (data.size() >= 16) {
-        memcpy(&info->data_window[0], &data.at(0), sizeof(int));
-        memcpy(&info->data_window[1], &data.at(4), sizeof(int));
-        memcpy(&info->data_window[2], &data.at(8), sizeof(int));
-        memcpy(&info->data_window[3], &data.at(12), sizeof(int));
-        tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[0]));
-        tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[1]));
-        tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[2]));
-        tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->data_window[3]));
-        has_data_window = true;
-      }
-    } else if (attr_name.compare("displayWindow") == 0) {
-      if (data.size() >= 16) {
-        memcpy(&info->display_window[0], &data.at(0), sizeof(int));
-        memcpy(&info->display_window[1], &data.at(4), sizeof(int));
-        memcpy(&info->display_window[2], &data.at(8), sizeof(int));
-        memcpy(&info->display_window[3], &data.at(12), sizeof(int));
-        tinyexr::swap4(
-            reinterpret_cast<unsigned int *>(&info->display_window[0]));
-        tinyexr::swap4(
-            reinterpret_cast<unsigned int *>(&info->display_window[1]));
-        tinyexr::swap4(
-            reinterpret_cast<unsigned int *>(&info->display_window[2]));
-        tinyexr::swap4(
-            reinterpret_cast<unsigned int *>(&info->display_window[3]));
-
-        has_display_window = true;
-      }
-    } else if (attr_name.compare("lineOrder") == 0) {
-      if (data.size() >= 1) {
-        info->line_order = static_cast<int>(data[0]);
-        has_line_order = true;
-      }
-    } else if (attr_name.compare("pixelAspectRatio") == 0) {
-      if (data.size() >= sizeof(float)) {
-        memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float));
-        tinyexr::swap4(
-            reinterpret_cast<unsigned int *>(&info->pixel_aspect_ratio));
-        has_pixel_aspect_ratio = true;
-      }
-    } else if (attr_name.compare("screenWindowCenter") == 0) {
-      if (data.size() >= 8) {
-        memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float));
-        memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float));
-        tinyexr::swap4(
-            reinterpret_cast<unsigned int *>(&info->screen_window_center[0]));
-        tinyexr::swap4(
-            reinterpret_cast<unsigned int *>(&info->screen_window_center[1]));
-        has_screen_window_center = true;
-      }
-    } else if (attr_name.compare("screenWindowWidth") == 0) {
-      if (data.size() >= sizeof(float)) {
-        memcpy(&info->screen_window_width, &data.at(0), sizeof(float));
-        tinyexr::swap4(
-            reinterpret_cast<unsigned int *>(&info->screen_window_width));
-
-        has_screen_window_width = true;
-      }
-    } else if (attr_name.compare("chunkCount") == 0) {
-      if (data.size() >= sizeof(int)) {
-        memcpy(&info->chunk_count, &data.at(0), sizeof(int));
-        tinyexr::swap4(reinterpret_cast<unsigned int *>(&info->chunk_count));
-      }
-    } else {
-      // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES)
-      if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) {
-        EXRAttribute attrib;
-#ifdef _MSC_VER
-        strncpy_s(attrib.name, attr_name.c_str(), 255);
-        strncpy_s(attrib.type, attr_type.c_str(), 255);
-#else
-        strncpy(attrib.name, attr_name.c_str(), 255);
-        strncpy(attrib.type, attr_type.c_str(), 255);
-#endif
-        attrib.name[255] = '\0';
-        attrib.type[255] = '\0';
-        attrib.size = static_cast<int>(data.size());
-        attrib.value = static_cast<unsigned char *>(malloc(data.size()));
-        memcpy(reinterpret_cast<char *>(attrib.value), &data.at(0),
-               data.size());
-        info->attributes.push_back(attrib);
-      }
-    }
-  }
-
-  // Check if required attributes exist
-  {
-    std::stringstream ss_err;
-
-    if (!has_compression) {
-      ss_err << "\"compression\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_channels) {
-      ss_err << "\"channels\" attribute not found in the header." << std::endl;
-    }
-
-    if (!has_line_order) {
-      ss_err << "\"lineOrder\" attribute not found in the header." << std::endl;
-    }
-
-    if (!has_display_window) {
-      ss_err << "\"displayWindow\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_data_window) {
-      ss_err << "\"dataWindow\" attribute not found in the header or invalid."
-             << std::endl;
-    }
-
-    if (!has_pixel_aspect_ratio) {
-      ss_err << "\"pixelAspectRatio\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_screen_window_width) {
-      ss_err << "\"screenWindowWidth\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!has_screen_window_center) {
-      ss_err << "\"screenWindowCenter\" attribute not found in the header."
-             << std::endl;
-    }
-
-    if (!(ss_err.str().empty())) {
-      if (err) {
-        (*err) += ss_err.str();
-      }
-      return TINYEXR_ERROR_INVALID_HEADER;
-    }
-  }
-
-  info->header_len = static_cast<unsigned int>(orig_size - size);
-
-  return TINYEXR_SUCCESS;
-}
-
-// C++ HeaderInfo to C EXRHeader conversion.
-static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) {
-  exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio;
-  exr_header->screen_window_center[0] = info.screen_window_center[0];
-  exr_header->screen_window_center[1] = info.screen_window_center[1];
-  exr_header->screen_window_width = info.screen_window_width;
-  exr_header->chunk_count = info.chunk_count;
-  exr_header->display_window[0] = info.display_window[0];
-  exr_header->display_window[1] = info.display_window[1];
-  exr_header->display_window[2] = info.display_window[2];
-  exr_header->display_window[3] = info.display_window[3];
-  exr_header->data_window[0] = info.data_window[0];
-  exr_header->data_window[1] = info.data_window[1];
-  exr_header->data_window[2] = info.data_window[2];
-  exr_header->data_window[3] = info.data_window[3];
-  exr_header->line_order = info.line_order;
-  exr_header->compression_type = info.compression_type;
-
-  exr_header->tile_size_x = info.tile_size_x;
-  exr_header->tile_size_y = info.tile_size_y;
-  exr_header->tile_level_mode = info.tile_level_mode;
-  exr_header->tile_rounding_mode = info.tile_rounding_mode;
-
-  exr_header->num_channels = static_cast<int>(info.channels.size());
-
-  exr_header->channels = static_cast<EXRChannelInfo *>(malloc(
-      sizeof(EXRChannelInfo) * static_cast<size_t>(exr_header->num_channels)));
-  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-#ifdef _MSC_VER
-    strncpy_s(exr_header->channels[c].name, info.channels[c].name.c_str(), 255);
-#else
-    strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255);
-#endif
-    // manually add '\0' for safety.
-    exr_header->channels[c].name[255] = '\0';
-
-    exr_header->channels[c].pixel_type = info.channels[c].pixel_type;
-    exr_header->channels[c].p_linear = info.channels[c].p_linear;
-    exr_header->channels[c].x_sampling = info.channels[c].x_sampling;
-    exr_header->channels[c].y_sampling = info.channels[c].y_sampling;
-  }
-
-  exr_header->pixel_types = static_cast<int *>(
-      malloc(sizeof(int) * static_cast<size_t>(exr_header->num_channels)));
-  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-    exr_header->pixel_types[c] = info.channels[c].pixel_type;
-  }
-
-  // Initially fill with values of `pixel_types`
-  exr_header->requested_pixel_types = static_cast<int *>(
-      malloc(sizeof(int) * static_cast<size_t>(exr_header->num_channels)));
-  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-    exr_header->requested_pixel_types[c] = info.channels[c].pixel_type;
-  }
-
-  exr_header->num_custom_attributes = static_cast<int>(info.attributes.size());
-
-  if (exr_header->num_custom_attributes > 0) {
-    // TODO(syoyo): Report warning when # of attributes exceeds
-    // `TINYEXR_MAX_CUSTOM_ATTRIBUTES`
-    if (exr_header->num_custom_attributes > TINYEXR_MAX_CUSTOM_ATTRIBUTES) {
-      exr_header->num_custom_attributes = TINYEXR_MAX_CUSTOM_ATTRIBUTES;
-    }
-
-    exr_header->custom_attributes = static_cast<EXRAttribute *>(malloc(
-        sizeof(EXRAttribute) * size_t(exr_header->num_custom_attributes)));
-
-    for (size_t i = 0; i < info.attributes.size(); i++) {
-      memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name,
-             256);
-      memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type,
-             256);
-      exr_header->custom_attributes[i].size = info.attributes[i].size;
-      // Just copy poiner
-      exr_header->custom_attributes[i].value = info.attributes[i].value;
-    }
-
-  } else {
-    exr_header->custom_attributes = NULL;
-  }
-
-  exr_header->header_len = info.header_len;
-}
-
-static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
-                       const std::vector<tinyexr::tinyexr_uint64> &offsets,
-                       const unsigned char *head, const size_t size,
-                       std::string *err) {
-  int num_channels = exr_header->num_channels;
-
-  int num_scanline_blocks = 1;
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-    num_scanline_blocks = 16;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    num_scanline_blocks = 32;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    num_scanline_blocks = 16;
-  }
-
-  int data_width = exr_header->data_window[2] - exr_header->data_window[0] + 1;
-  int data_height = exr_header->data_window[3] - exr_header->data_window[1] + 1;
-
-  if ((data_width < 0) || (data_height < 0)) {
-    if (err) {
-      std::stringstream ss;
-      ss << "Invalid data width or data height: " << data_width << ", "
-         << data_height << std::endl;
-      (*err) += ss.str();
-    }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  // Do not allow too large data_width and data_height. header invalid?
-  {
-    const int threshold = 1024 * 8192;  // heuristics
-    if ((data_width > threshold) || (data_height > threshold)) {
-      if (err) {
-        std::stringstream ss;
-        ss << "data_with or data_height too large. data_width: " << data_width
-           << ", "
-           << "data_height = " << data_height << std::endl;
-        (*err) += ss.str();
-      }
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-  }
-
-  size_t num_blocks = offsets.size();
-
-  std::vector<size_t> channel_offset_list;
-  int pixel_data_size = 0;
-  size_t channel_offset = 0;
-  if (!tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size,
-                                     &channel_offset, num_channels,
-                                     exr_header->channels)) {
-    if (err) {
-      (*err) += "Failed to compute channel layout.\n";
-    }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  bool invalid_data = false;  // TODO(LTE): Use atomic lock for MT safety.
-
-  if (exr_header->tiled) {
-    // value check
-    if (exr_header->tile_size_x < 0) {
-      if (err) {
-        std::stringstream ss;
-        ss << "Invalid tile size x : " << exr_header->tile_size_x << "\n";
-        (*err) += ss.str();
-      }
-      return TINYEXR_ERROR_INVALID_HEADER;
-    }
-
-    if (exr_header->tile_size_y < 0) {
-      if (err) {
-        std::stringstream ss;
-        ss << "Invalid tile size y : " << exr_header->tile_size_y << "\n";
-        (*err) += ss.str();
-      }
-      return TINYEXR_ERROR_INVALID_HEADER;
-    }
-
-    size_t num_tiles = offsets.size();  // = # of blocks
-
-    exr_image->tiles = static_cast<EXRTile *>(
-        calloc(sizeof(EXRTile), static_cast<size_t>(num_tiles)));
-
-    for (size_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
-      // Allocate memory for each tile.
-      exr_image->tiles[tile_idx].images = tinyexr::AllocateImage(
-          num_channels, exr_header->channels, exr_header->requested_pixel_types,
-          exr_header->tile_size_x, exr_header->tile_size_y);
-
-      // 16 byte: tile coordinates
-      // 4 byte : data size
-      // ~      : data(uncompressed or compressed)
-      if (offsets[tile_idx] + sizeof(int) * 5 > size) {
-        if (err) {
-          (*err) += "Insufficient data size.\n";
-        }
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-      size_t data_size = size_t(size - (offsets[tile_idx] + sizeof(int) * 5));
-      const unsigned char *data_ptr =
-          reinterpret_cast<const unsigned char *>(head + offsets[tile_idx]);
-
-      int tile_coordinates[4];
-      memcpy(tile_coordinates, data_ptr, sizeof(int) * 4);
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[0]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[1]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[2]));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&tile_coordinates[3]));
-
-      // @todo{ LoD }
-      if (tile_coordinates[2] != 0) {
-        return TINYEXR_ERROR_UNSUPPORTED_FEATURE;
-      }
-      if (tile_coordinates[3] != 0) {
-        return TINYEXR_ERROR_UNSUPPORTED_FEATURE;
-      }
-
-      int data_len;
-      memcpy(&data_len, data_ptr + 16,
-             sizeof(int));  // 16 = sizeof(tile_coordinates)
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
-
-      if (data_len < 4 || size_t(data_len) > data_size) {
-        if (err) {
-          (*err) += "Insufficient data length.\n";
-        }
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-      // Move to data addr: 20 = 16 + 4;
-      data_ptr += 20;
-
-      tinyexr::DecodeTiledPixelData(
-          exr_image->tiles[tile_idx].images,
-          &(exr_image->tiles[tile_idx].width),
-          &(exr_image->tiles[tile_idx].height),
-          exr_header->requested_pixel_types, data_ptr,
-          static_cast<size_t>(data_len), exr_header->compression_type,
-          exr_header->line_order, data_width, data_height, tile_coordinates[0],
-          tile_coordinates[1], exr_header->tile_size_x, exr_header->tile_size_y,
-          static_cast<size_t>(pixel_data_size),
-          static_cast<size_t>(exr_header->num_custom_attributes),
-          exr_header->custom_attributes,
-          static_cast<size_t>(exr_header->num_channels), exr_header->channels,
-          channel_offset_list);
-
-      exr_image->tiles[tile_idx].offset_x = tile_coordinates[0];
-      exr_image->tiles[tile_idx].offset_y = tile_coordinates[1];
-      exr_image->tiles[tile_idx].level_x = tile_coordinates[2];
-      exr_image->tiles[tile_idx].level_y = tile_coordinates[3];
-
-      exr_image->num_tiles = static_cast<int>(num_tiles);
-    }
-  } else {  // scanline format
-
-    // Don't allow too large image(256GB * pixel_data_size or more). Workaround
-    // for #104.
-    size_t total_data_len =
-        size_t(data_width) * size_t(data_height) * size_t(num_channels);
-    const bool total_data_len_overflown = sizeof(void*) == 8 ? (total_data_len >= 0x4000000000) : false;
-    if ((total_data_len == 0) || total_data_len_overflown ) {
-      if (err) {
-        std::stringstream ss;
-        ss << "Image data size is zero or too large: width = " << data_width
-           << ", height = " << data_height << ", channels = " << num_channels
-           << std::endl;
-        (*err) += ss.str();
-      }
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    exr_image->images = tinyexr::AllocateImage(
-        num_channels, exr_header->channels, exr_header->requested_pixel_types,
-        data_width, data_height);
-
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
-    for (int y = 0; y < static_cast<int>(num_blocks); y++) {
-      size_t y_idx = static_cast<size_t>(y);
-
-      if (offsets[y_idx] + sizeof(int) * 2 > size) {
-        invalid_data = true;
-      } else {
-        // 4 byte: scan line
-        // 4 byte: data size
-        // ~     : pixel data(uncompressed or compressed)
-        size_t data_size = size_t(size - (offsets[y_idx] + sizeof(int) * 2));
-        const unsigned char *data_ptr =
-            reinterpret_cast<const unsigned char *>(head + offsets[y_idx]);
-
-        int line_no;
-        memcpy(&line_no, data_ptr, sizeof(int));
-        int data_len;
-        memcpy(&data_len, data_ptr + 4, sizeof(int));
-        tinyexr::swap4(reinterpret_cast<unsigned int *>(&line_no));
-        tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
-
-        if (size_t(data_len) > data_size) {
-          invalid_data = true;
-
-        } else if ((line_no > (2 << 20)) || (line_no < -(2 << 20))) {
-          // Too large value. Assume this is invalid
-          // 2**20 = 1048576 = heuristic value.
-          invalid_data = true;
-        } else if (data_len == 0) {
-          // TODO(syoyo): May be ok to raise the threshold for example `data_len
-          // < 4`
-          invalid_data = true;
-        } else {
-          // line_no may be negative.
-          int end_line_no = (std::min)(line_no + num_scanline_blocks,
-                                       (exr_header->data_window[3] + 1));
-
-          int num_lines = end_line_no - line_no;
-
-          if (num_lines <= 0) {
-            invalid_data = true;
-          } else {
-            // Move to data addr: 8 = 4 + 4;
-            data_ptr += 8;
-
-            // Adjust line_no with data_window.bmin.y
-
-            // overflow check
-            tinyexr_int64 lno = static_cast<tinyexr_int64>(line_no) - static_cast<tinyexr_int64>(exr_header->data_window[1]);
-            if (lno > std::numeric_limits<int>::max()) {
-              line_no = -1; // invalid
-            } else if (lno < -std::numeric_limits<int>::max()) {
-              line_no = -1; // invalid
-            } else {
-              line_no -= exr_header->data_window[1];
-            }
-
-            if (line_no < 0) {
-              invalid_data = true;
-            } else {
-              if (!tinyexr::DecodePixelData(
-                      exr_image->images, exr_header->requested_pixel_types,
-                      data_ptr, static_cast<size_t>(data_len),
-                      exr_header->compression_type, exr_header->line_order,
-                      data_width, data_height, data_width, y, line_no,
-                      num_lines, static_cast<size_t>(pixel_data_size),
-                      static_cast<size_t>(exr_header->num_custom_attributes),
-                      exr_header->custom_attributes,
-                      static_cast<size_t>(exr_header->num_channels),
-                      exr_header->channels, channel_offset_list)) {
-                invalid_data = true;
-              }
-            }
-          }
-        }
-      }
-    }  // omp parallel
-  }
-
-  if (invalid_data) {
-    if (err) {
-      std::stringstream ss;
-      (*err) += "Invalid data found when decoding pixels.\n";
-    }
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  // Overwrite `pixel_type` with `requested_pixel_type`.
-  {
-    for (int c = 0; c < exr_header->num_channels; c++) {
-      exr_header->pixel_types[c] = exr_header->requested_pixel_types[c];
-    }
-  }
-
-  {
-    exr_image->num_channels = num_channels;
-
-    exr_image->width = data_width;
-    exr_image->height = data_height;
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-static bool ReconstructLineOffsets(
-    std::vector<tinyexr::tinyexr_uint64> *offsets, size_t n,
-    const unsigned char *head, const unsigned char *marker, const size_t size) {
-  assert(head < marker);
-  assert(offsets->size() == n);
-
-  for (size_t i = 0; i < n; i++) {
-    size_t offset = static_cast<size_t>(marker - head);
-    // Offset should not exceed whole EXR file/data size.
-    if ((offset + sizeof(tinyexr::tinyexr_uint64)) >= size) {
-      return false;
-    }
-
-    int y;
-    unsigned int data_len;
-
-    memcpy(&y, marker, sizeof(int));
-    memcpy(&data_len, marker + 4, sizeof(unsigned int));
-
-    if (data_len >= size) {
-      return false;
-    }
-
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&y));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data_len));
-
-    (*offsets)[i] = offset;
-
-    marker += data_len + 8;  // 8 = 4 bytes(y) + 4 bytes(data_len)
-  }
-
-  return true;
-}
-
-static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header,
-                          const unsigned char *head,
-                          const unsigned char *marker, const size_t size,
-                          const char **err) {
-  if (exr_image == NULL || exr_header == NULL || head == NULL ||
-      marker == NULL || (size <= tinyexr::kEXRVersionSize)) {
-    tinyexr::SetErrorMessage("Invalid argument for DecodeEXRImage().", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  int num_scanline_blocks = 1;
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-    num_scanline_blocks = 16;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    num_scanline_blocks = 32;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    num_scanline_blocks = 16;
-  }
-
-  int data_width = exr_header->data_window[2] - exr_header->data_window[0];
-  if (data_width >= std::numeric_limits<int>::max()) {
-    // Issue 63
-    tinyexr::SetErrorMessage("Invalid data width value", err);
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-  data_width++;
-
-  int data_height = exr_header->data_window[3] - exr_header->data_window[1];
-  if (data_height >= std::numeric_limits<int>::max()) {
-    tinyexr::SetErrorMessage("Invalid data height value", err);
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-  data_height++;
-
-  if ((data_width < 0) || (data_height < 0)) {
-    tinyexr::SetErrorMessage("data width or data height is negative.", err);
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  // Do not allow too large data_width and data_height. header invalid?
-  {
-    const int threshold = 1024 * 8192;  // heuristics
-    if (data_width > threshold) {
-      tinyexr::SetErrorMessage("data width too large.", err);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-    if (data_height > threshold) {
-      tinyexr::SetErrorMessage("data height too large.", err);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-  }
-
-  // Read offset tables.
-  size_t num_blocks = 0;
-
-  if (exr_header->chunk_count > 0) {
-    // Use `chunkCount` attribute.
-    num_blocks = static_cast<size_t>(exr_header->chunk_count);
-  } else if (exr_header->tiled) {
-    // @todo { LoD }
-    size_t num_x_tiles = static_cast<size_t>(data_width) /
-                         static_cast<size_t>(exr_header->tile_size_x);
-    if (num_x_tiles * static_cast<size_t>(exr_header->tile_size_x) <
-        static_cast<size_t>(data_width)) {
-      num_x_tiles++;
-    }
-    size_t num_y_tiles = static_cast<size_t>(data_height) /
-                         static_cast<size_t>(exr_header->tile_size_y);
-    if (num_y_tiles * static_cast<size_t>(exr_header->tile_size_y) <
-        static_cast<size_t>(data_height)) {
-      num_y_tiles++;
-    }
-
-    num_blocks = num_x_tiles * num_y_tiles;
-  } else {
-    num_blocks = static_cast<size_t>(data_height) /
-                 static_cast<size_t>(num_scanline_blocks);
-    if (num_blocks * static_cast<size_t>(num_scanline_blocks) <
-        static_cast<size_t>(data_height)) {
-      num_blocks++;
-    }
-  }
-
-  std::vector<tinyexr::tinyexr_uint64> offsets(num_blocks);
-
-  for (size_t y = 0; y < num_blocks; y++) {
-    tinyexr::tinyexr_uint64 offset;
-    // Issue #81
-    if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) {
-      tinyexr::SetErrorMessage("Insufficient data size in offset table.", err);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64));
-    tinyexr::swap8(&offset);
-    if (offset >= size) {
-      tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-    marker += sizeof(tinyexr::tinyexr_uint64);  // = 8
-    offsets[y] = offset;
-  }
-
-  // If line offsets are invalid, we try to reconstruct it.
-  // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details.
-  for (size_t y = 0; y < num_blocks; y++) {
-    if (offsets[y] <= 0) {
-      // TODO(syoyo) Report as warning?
-      // if (err) {
-      //  stringstream ss;
-      //  ss << "Incomplete lineOffsets." << std::endl;
-      //  (*err) += ss.str();
-      //}
-      bool ret =
-          ReconstructLineOffsets(&offsets, num_blocks, head, marker, size);
-      if (ret) {
-        // OK
-        break;
-      } else {
-        tinyexr::SetErrorMessage(
-            "Cannot reconstruct lineOffset table in DecodeEXRImage.", err);
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-    }
-  }
-
-  {
-    std::string e;
-    int ret = DecodeChunk(exr_image, exr_header, offsets, head, size, &e);
-
-    if (ret != TINYEXR_SUCCESS) {
-      if (!e.empty()) {
-        tinyexr::SetErrorMessage(e, err);
-      }
-
-      // release memory(if exists)
-      if ((exr_header->num_channels > 0) && exr_image && exr_image->images) {
-        for (size_t c = 0; c < size_t(exr_header->num_channels); c++) {
-          if (exr_image->images[c]) {
-            free(exr_image->images[c]);
-            exr_image->images[c] = NULL;
-          }
-        }
-        free(exr_image->images);
-        exr_image->images = NULL;
-      }
-    }
-
-    return ret;
-  }
-}
-
-}  // namespace tinyexr
-
-int LoadEXR(float **out_rgba, int *width, int *height, const char *filename,
-            const char **err) {
-  if (out_rgba == NULL) {
-    tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  EXRVersion exr_version;
-  EXRImage exr_image;
-  EXRHeader exr_header;
-  InitEXRHeader(&exr_header);
-  InitEXRImage(&exr_image);
-
-  {
-    int ret = ParseEXRVersionFromFile(&exr_version, filename);
-    if (ret != TINYEXR_SUCCESS) {
-      tinyexr::SetErrorMessage("Invalid EXR header.", err);
-      return ret;
-    }
-
-    if (exr_version.multipart || exr_version.non_image) {
-      tinyexr::SetErrorMessage(
-          "Loading multipart or DeepImage is not supported  in LoadEXR() API",
-          err);
-      return TINYEXR_ERROR_INVALID_DATA;  // @fixme.
-    }
-  }
-
-  {
-    int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err);
-    if (ret != TINYEXR_SUCCESS) {
-      FreeEXRHeader(&exr_header);
-      return ret;
-    }
-  }
-
-  // Read HALF channel as FLOAT.
-  for (int i = 0; i < exr_header.num_channels; i++) {
-    if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) {
-      exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;
-    }
-  }
-
-  {
-    int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err);
-    if (ret != TINYEXR_SUCCESS) {
-      FreeEXRHeader(&exr_header);
-      return ret;
-    }
-  }
-
-  // RGBA
-  int idxR = -1;
-  int idxG = -1;
-  int idxB = -1;
-  int idxA = -1;
-  for (int c = 0; c < exr_header.num_channels; c++) {
-    if (strcmp(exr_header.channels[c].name, "R") == 0) {
-      idxR = c;
-    } else if (strcmp(exr_header.channels[c].name, "G") == 0) {
-      idxG = c;
-    } else if (strcmp(exr_header.channels[c].name, "B") == 0) {
-      idxB = c;
-    } else if (strcmp(exr_header.channels[c].name, "A") == 0) {
-      idxA = c;
-    }
-  }
-
-  if (exr_header.num_channels == 1) {
-    // Grayscale channel only.
-
-    (*out_rgba) = reinterpret_cast<float *>(
-        malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
-               static_cast<size_t>(exr_image.height)));
-
-    if (exr_header.tiled) {
-      for (int it = 0; it < exr_image.num_tiles; it++) {
-        for (int j = 0; j < exr_header.tile_size_y; j++) {
-          for (int i = 0; i < exr_header.tile_size_x; i++) {
-            const int ii =
-                exr_image.tiles[it].offset_x * exr_header.tile_size_x + i;
-            const int jj =
-                exr_image.tiles[it].offset_y * exr_header.tile_size_y + j;
-            const int idx = ii + jj * exr_image.width;
-
-            // out of region check.
-            if (ii >= exr_image.width) {
-              continue;
-            }
-            if (jj >= exr_image.height) {
-              continue;
-            }
-            const int srcIdx = i + j * exr_header.tile_size_x;
-            unsigned char **src = exr_image.tiles[it].images;
-            (*out_rgba)[4 * idx + 0] =
-                reinterpret_cast<float **>(src)[0][srcIdx];
-            (*out_rgba)[4 * idx + 1] =
-                reinterpret_cast<float **>(src)[0][srcIdx];
-            (*out_rgba)[4 * idx + 2] =
-                reinterpret_cast<float **>(src)[0][srcIdx];
-            (*out_rgba)[4 * idx + 3] =
-                reinterpret_cast<float **>(src)[0][srcIdx];
-          }
-        }
-      }
-    } else {
-      for (int i = 0; i < exr_image.width * exr_image.height; i++) {
-        const float val = reinterpret_cast<float **>(exr_image.images)[0][i];
-        (*out_rgba)[4 * i + 0] = val;
-        (*out_rgba)[4 * i + 1] = val;
-        (*out_rgba)[4 * i + 2] = val;
-        (*out_rgba)[4 * i + 3] = val;
-      }
-    }
-  } else {
-    // Assume RGB(A)
-
-    if (idxR == -1) {
-      tinyexr::SetErrorMessage("R channel not found", err);
-
-      // @todo { free exr_image }
-      FreeEXRHeader(&exr_header);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    if (idxG == -1) {
-      tinyexr::SetErrorMessage("G channel not found", err);
-      // @todo { free exr_image }
-      FreeEXRHeader(&exr_header);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    if (idxB == -1) {
-      tinyexr::SetErrorMessage("B channel not found", err);
-      // @todo { free exr_image }
-      FreeEXRHeader(&exr_header);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    (*out_rgba) = reinterpret_cast<float *>(
-        malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
-               static_cast<size_t>(exr_image.height)));
-    if (exr_header.tiled) {
-      for (int it = 0; it < exr_image.num_tiles; it++) {
-        for (int j = 0; j < exr_header.tile_size_y; j++) {
-          for (int i = 0; i < exr_header.tile_size_x; i++) {
-            const int ii =
-                exr_image.tiles[it].offset_x * exr_header.tile_size_x + i;
-            const int jj =
-                exr_image.tiles[it].offset_y * exr_header.tile_size_y + j;
-            const int idx = ii + jj * exr_image.width;
-
-            // out of region check.
-            if (ii >= exr_image.width) {
-              continue;
-            }
-            if (jj >= exr_image.height) {
-              continue;
-            }
-            const int srcIdx = i + j * exr_header.tile_size_x;
-            unsigned char **src = exr_image.tiles[it].images;
-            (*out_rgba)[4 * idx + 0] =
-                reinterpret_cast<float **>(src)[idxR][srcIdx];
-            (*out_rgba)[4 * idx + 1] =
-                reinterpret_cast<float **>(src)[idxG][srcIdx];
-            (*out_rgba)[4 * idx + 2] =
-                reinterpret_cast<float **>(src)[idxB][srcIdx];
-            if (idxA != -1) {
-              (*out_rgba)[4 * idx + 3] =
-                  reinterpret_cast<float **>(src)[idxA][srcIdx];
-            } else {
-              (*out_rgba)[4 * idx + 3] = 1.0;
-            }
-          }
-        }
-      }
-    } else {
-      for (int i = 0; i < exr_image.width * exr_image.height; i++) {
-        (*out_rgba)[4 * i + 0] =
-            reinterpret_cast<float **>(exr_image.images)[idxR][i];
-        (*out_rgba)[4 * i + 1] =
-            reinterpret_cast<float **>(exr_image.images)[idxG][i];
-        (*out_rgba)[4 * i + 2] =
-            reinterpret_cast<float **>(exr_image.images)[idxB][i];
-        if (idxA != -1) {
-          (*out_rgba)[4 * i + 3] =
-              reinterpret_cast<float **>(exr_image.images)[idxA][i];
-        } else {
-          (*out_rgba)[4 * i + 3] = 1.0;
-        }
-      }
-    }
-  }
-
-  (*width) = exr_image.width;
-  (*height) = exr_image.height;
-
-  FreeEXRHeader(&exr_header);
-  FreeEXRImage(&exr_image);
-
-  return TINYEXR_SUCCESS;
-}
-
-int IsEXR(const char *filename) {
-  EXRVersion exr_version;
-
-  int ret = ParseEXRVersionFromFile(&exr_version, filename);
-  if (ret != TINYEXR_SUCCESS) {
-    return TINYEXR_ERROR_INVALID_HEADER;
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version,
-                             const unsigned char *memory, size_t size,
-                             const char **err) {
-  if (memory == NULL || exr_header == NULL) {
-    tinyexr::SetErrorMessage(
-        "Invalid argument. `memory` or `exr_header` argument is null in "
-        "ParseEXRHeaderFromMemory()",
-        err);
-
-    // Invalid argument
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (size < tinyexr::kEXRVersionSize) {
-    tinyexr::SetErrorMessage("Insufficient header/data size.\n", err);
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  const unsigned char *marker = memory + tinyexr::kEXRVersionSize;
-  size_t marker_size = size - tinyexr::kEXRVersionSize;
-
-  tinyexr::HeaderInfo info;
-  info.clear();
-
-  std::string err_str;
-  int ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size);
-
-  if (ret != TINYEXR_SUCCESS) {
-    if (err && !err_str.empty()) {
-      tinyexr::SetErrorMessage(err_str, err);
-    }
-  }
-
-  ConvertHeader(exr_header, info);
-
-  // transfoer `tiled` from version.
-  exr_header->tiled = version->tiled;
-
-  return ret;
-}
-
-int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
-                      const unsigned char *memory, size_t size,
-                      const char **err) {
-  if (out_rgba == NULL || memory == NULL) {
-    tinyexr::SetErrorMessage("Invalid argument for LoadEXRFromMemory", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  EXRVersion exr_version;
-  EXRImage exr_image;
-  EXRHeader exr_header;
-
-  InitEXRHeader(&exr_header);
-
-  int ret = ParseEXRVersionFromMemory(&exr_version, memory, size);
-  if (ret != TINYEXR_SUCCESS) {
-    tinyexr::SetErrorMessage("Failed to parse EXR version", err);
-    return ret;
-  }
-
-  ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
-
-  // Read HALF channel as FLOAT.
-  for (int i = 0; i < exr_header.num_channels; i++) {
-    if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) {
-      exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT;
-    }
-  }
-
-  InitEXRImage(&exr_image);
-  ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
-
-  // RGBA
-  int idxR = -1;
-  int idxG = -1;
-  int idxB = -1;
-  int idxA = -1;
-  for (int c = 0; c < exr_header.num_channels; c++) {
-    if (strcmp(exr_header.channels[c].name, "R") == 0) {
-      idxR = c;
-    } else if (strcmp(exr_header.channels[c].name, "G") == 0) {
-      idxG = c;
-    } else if (strcmp(exr_header.channels[c].name, "B") == 0) {
-      idxB = c;
-    } else if (strcmp(exr_header.channels[c].name, "A") == 0) {
-      idxA = c;
-    }
-  }
-
-  // TODO(syoyo): Refactor removing same code as used in LoadEXR().
-  if (exr_header.num_channels == 1) {
-    // Grayscale channel only.
-
-    (*out_rgba) = reinterpret_cast<float *>(
-        malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
-               static_cast<size_t>(exr_image.height)));
-
-    if (exr_header.tiled) {
-      for (int it = 0; it < exr_image.num_tiles; it++) {
-        for (int j = 0; j < exr_header.tile_size_y; j++) {
-          for (int i = 0; i < exr_header.tile_size_x; i++) {
-            const int ii =
-                exr_image.tiles[it].offset_x * exr_header.tile_size_x + i;
-            const int jj =
-                exr_image.tiles[it].offset_y * exr_header.tile_size_y + j;
-            const int idx = ii + jj * exr_image.width;
-
-            // out of region check.
-            if (ii >= exr_image.width) {
-              continue;
-            }
-            if (jj >= exr_image.height) {
-              continue;
-            }
-            const int srcIdx = i + j * exr_header.tile_size_x;
-            unsigned char **src = exr_image.tiles[it].images;
-            (*out_rgba)[4 * idx + 0] =
-                reinterpret_cast<float **>(src)[0][srcIdx];
-            (*out_rgba)[4 * idx + 1] =
-                reinterpret_cast<float **>(src)[0][srcIdx];
-            (*out_rgba)[4 * idx + 2] =
-                reinterpret_cast<float **>(src)[0][srcIdx];
-            (*out_rgba)[4 * idx + 3] =
-                reinterpret_cast<float **>(src)[0][srcIdx];
-          }
-        }
-      }
-    } else {
-      for (int i = 0; i < exr_image.width * exr_image.height; i++) {
-        const float val = reinterpret_cast<float **>(exr_image.images)[0][i];
-        (*out_rgba)[4 * i + 0] = val;
-        (*out_rgba)[4 * i + 1] = val;
-        (*out_rgba)[4 * i + 2] = val;
-        (*out_rgba)[4 * i + 3] = val;
-      }
-    }
-
-  } else {
-    // TODO(syoyo): Support non RGBA image.
-
-    if (idxR == -1) {
-      tinyexr::SetErrorMessage("R channel not found", err);
-
-      // @todo { free exr_image }
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    if (idxG == -1) {
-      tinyexr::SetErrorMessage("G channel not found", err);
-      // @todo { free exr_image }
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    if (idxB == -1) {
-      tinyexr::SetErrorMessage("B channel not found", err);
-      // @todo { free exr_image }
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    (*out_rgba) = reinterpret_cast<float *>(
-        malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
-               static_cast<size_t>(exr_image.height)));
-
-    if (exr_header.tiled) {
-      for (int it = 0; it < exr_image.num_tiles; it++) {
-        for (int j = 0; j < exr_header.tile_size_y; j++)
-          for (int i = 0; i < exr_header.tile_size_x; i++) {
-            const int ii =
-                exr_image.tiles[it].offset_x * exr_header.tile_size_x + i;
-            const int jj =
-                exr_image.tiles[it].offset_y * exr_header.tile_size_y + j;
-            const int idx = ii + jj * exr_image.width;
-
-            // out of region check.
-            if (ii >= exr_image.width) {
-              continue;
-            }
-            if (jj >= exr_image.height) {
-              continue;
-            }
-            const int srcIdx = i + j * exr_header.tile_size_x;
-            unsigned char **src = exr_image.tiles[it].images;
-            (*out_rgba)[4 * idx + 0] =
-                reinterpret_cast<float **>(src)[idxR][srcIdx];
-            (*out_rgba)[4 * idx + 1] =
-                reinterpret_cast<float **>(src)[idxG][srcIdx];
-            (*out_rgba)[4 * idx + 2] =
-                reinterpret_cast<float **>(src)[idxB][srcIdx];
-            if (idxA != -1) {
-              (*out_rgba)[4 * idx + 3] =
-                  reinterpret_cast<float **>(src)[idxA][srcIdx];
-            } else {
-              (*out_rgba)[4 * idx + 3] = 1.0;
-            }
-          }
-      }
-    } else {
-      for (int i = 0; i < exr_image.width * exr_image.height; i++) {
-        (*out_rgba)[4 * i + 0] =
-            reinterpret_cast<float **>(exr_image.images)[idxR][i];
-        (*out_rgba)[4 * i + 1] =
-            reinterpret_cast<float **>(exr_image.images)[idxG][i];
-        (*out_rgba)[4 * i + 2] =
-            reinterpret_cast<float **>(exr_image.images)[idxB][i];
-        if (idxA != -1) {
-          (*out_rgba)[4 * i + 3] =
-              reinterpret_cast<float **>(exr_image.images)[idxA][i];
-        } else {
-          (*out_rgba)[4 * i + 3] = 1.0;
-        }
-      }
-    }
-  }
-
-  (*width) = exr_image.width;
-  (*height) = exr_image.height;
-
-  FreeEXRHeader(&exr_header);
-  FreeEXRImage(&exr_image);
-
-  return TINYEXR_SUCCESS;
-}
-
-int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header,
-                         const char *filename, const char **err) {
-  if (exr_image == NULL) {
-    tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromFile", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  if (filesize < 16) {
-    tinyexr::SetErrorMessage("File size too short " + std::string(filename),
-                             err);
-    return TINYEXR_ERROR_INVALID_FILE;
-  }
-
-  std::vector<unsigned char> buf(filesize);  // @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    fclose(fp);
-    (void)ret;
-  }
-
-  return LoadEXRImageFromMemory(exr_image, exr_header, &buf.at(0), filesize,
-                                err);
-}
-
-int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header,
-                           const unsigned char *memory, const size_t size,
-                           const char **err) {
-  if (exr_image == NULL || memory == NULL ||
-      (size < tinyexr::kEXRVersionSize)) {
-    tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromMemory",
-                             err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (exr_header->header_len == 0) {
-    tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  const unsigned char *head = memory;
-  const unsigned char *marker = reinterpret_cast<const unsigned char *>(
-      memory + exr_header->header_len +
-      8);  // +8 for magic number + version header.
-  return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size,
-                                 err);
-}
-
-size_t SaveEXRImageToMemory(const EXRImage *exr_image,
-                            const EXRHeader *exr_header,
-                            unsigned char **memory_out, const char **err) {
-  if (exr_image == NULL || memory_out == NULL ||
-      exr_header->compression_type < 0) {
-    tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToMemory", err);
-    return 0;
-  }
-
-#if !TINYEXR_USE_PIZ
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    tinyexr::SetErrorMessage("PIZ compression is not supported in this build",
-                             err);
-    return 0;
-  }
-#endif
-
-#if !TINYEXR_USE_ZFP
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    tinyexr::SetErrorMessage("ZFP compression is not supported in this build",
-                             err);
-    return 0;
-  }
-#endif
-
-#if TINYEXR_USE_ZFP
-  for (size_t i = 0; i < static_cast<size_t>(exr_header->num_channels); i++) {
-    if (exr_header->requested_pixel_types[i] != TINYEXR_PIXELTYPE_FLOAT) {
-      tinyexr::SetErrorMessage("Pixel type must be FLOAT for ZFP compression",
-                               err);
-      return 0;
-    }
-  }
-#endif
-
-  std::vector<unsigned char> memory;
-
-  // Header
-  {
-    const char header[] = {0x76, 0x2f, 0x31, 0x01};
-    memory.insert(memory.end(), header, header + 4);
-  }
-
-  // Version, scanline.
-  {
-    char marker[] = {2, 0, 0, 0};
-    /* @todo
-    if (exr_header->tiled) {
-      marker[1] |= 0x2;
-    }
-    if (exr_header->long_name) {
-      marker[1] |= 0x4;
-    }
-    if (exr_header->non_image) {
-      marker[1] |= 0x8;
-    }
-    if (exr_header->multipart) {
-      marker[1] |= 0x10;
-    }
-    */
-    memory.insert(memory.end(), marker, marker + 4);
-  }
-
-  int num_scanlines = 1;
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-    num_scanlines = 16;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    num_scanlines = 32;
-  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    num_scanlines = 16;
-  }
-
-  // Write attributes.
-  std::vector<tinyexr::ChannelInfo> channels;
-  {
-    std::vector<unsigned char> data;
-
-    for (int c = 0; c < exr_header->num_channels; c++) {
-      tinyexr::ChannelInfo info;
-      info.p_linear = 0;
-      info.pixel_type = exr_header->requested_pixel_types[c];
-      info.x_sampling = 1;
-      info.y_sampling = 1;
-      info.name = std::string(exr_header->channels[c].name);
-      channels.push_back(info);
-    }
-
-    tinyexr::WriteChannelInfo(data, channels);
-
-    tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0),
-                                    static_cast<int>(data.size()));
-  }
-
-  {
-    int comp = exr_header->compression_type;
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&comp));
-    tinyexr::WriteAttributeToMemory(
-        &memory, "compression", "compression",
-        reinterpret_cast<const unsigned char *>(&comp), 1);
-  }
-
-  {
-    int data[4] = {0, 0, exr_image->width - 1, exr_image->height - 1};
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[0]));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[1]));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[2]));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&data[3]));
-    tinyexr::WriteAttributeToMemory(
-        &memory, "dataWindow", "box2i",
-        reinterpret_cast<const unsigned char *>(data), sizeof(int) * 4);
-    tinyexr::WriteAttributeToMemory(
-        &memory, "displayWindow", "box2i",
-        reinterpret_cast<const unsigned char *>(data), sizeof(int) * 4);
-  }
-
-  {
-    unsigned char line_order = 0;  // @fixme { read line_order from EXRHeader }
-    tinyexr::WriteAttributeToMemory(&memory, "lineOrder", "lineOrder",
-                                    &line_order, 1);
-  }
-
-  {
-    float aspectRatio = 1.0f;
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&aspectRatio));
-    tinyexr::WriteAttributeToMemory(
-        &memory, "pixelAspectRatio", "float",
-        reinterpret_cast<const unsigned char *>(&aspectRatio), sizeof(float));
-  }
-
-  {
-    float center[2] = {0.0f, 0.0f};
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&center[0]));
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&center[1]));
-    tinyexr::WriteAttributeToMemory(
-        &memory, "screenWindowCenter", "v2f",
-        reinterpret_cast<const unsigned char *>(center), 2 * sizeof(float));
-  }
-
-  {
-    float w = static_cast<float>(exr_image->width);
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&w));
-    tinyexr::WriteAttributeToMemory(&memory, "screenWindowWidth", "float",
-                                    reinterpret_cast<const unsigned char *>(&w),
-                                    sizeof(float));
-  }
-
-  // Custom attributes
-  if (exr_header->num_custom_attributes > 0) {
-    for (int i = 0; i < exr_header->num_custom_attributes; i++) {
-      tinyexr::WriteAttributeToMemory(
-          &memory, exr_header->custom_attributes[i].name,
-          exr_header->custom_attributes[i].type,
-          reinterpret_cast<const unsigned char *>(
-              exr_header->custom_attributes[i].value),
-          exr_header->custom_attributes[i].size);
-    }
-  }
-
-  {  // end of header
-    unsigned char e = 0;
-    memory.push_back(e);
-  }
-
-  int num_blocks = exr_image->height / num_scanlines;
-  if (num_blocks * num_scanlines < exr_image->height) {
-    num_blocks++;
-  }
-
-  std::vector<tinyexr::tinyexr_uint64> offsets(static_cast<size_t>(num_blocks));
-
-  size_t headerSize = memory.size();
-  tinyexr::tinyexr_uint64 offset =
-      headerSize +
-      static_cast<size_t>(num_blocks) *
-          sizeof(
-              tinyexr::tinyexr_int64);  // sizeof(header) + sizeof(offsetTable)
-
-  std::vector<std::vector<unsigned char> > data_list(
-      static_cast<size_t>(num_blocks));
-  std::vector<size_t> channel_offset_list(
-      static_cast<size_t>(exr_header->num_channels));
-
-  int pixel_data_size = 0;
-  size_t channel_offset = 0;
-  for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-    channel_offset_list[c] = channel_offset;
-    if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-      pixel_data_size += sizeof(unsigned short);
-      channel_offset += sizeof(unsigned short);
-    } else if (exr_header->requested_pixel_types[c] ==
-               TINYEXR_PIXELTYPE_FLOAT) {
-      pixel_data_size += sizeof(float);
-      channel_offset += sizeof(float);
-    } else if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT) {
-      pixel_data_size += sizeof(unsigned int);
-      channel_offset += sizeof(unsigned int);
-    } else {
-      assert(0);
-    }
-  }
-
-#if TINYEXR_USE_ZFP
-  tinyexr::ZFPCompressionParam zfp_compression_param;
-
-  // Use ZFP compression parameter from custom attributes(if such a parameter
-  // exists)
-  {
-    bool ret = tinyexr::FindZFPCompressionParam(
-        &zfp_compression_param, exr_header->custom_attributes,
-        exr_header->num_custom_attributes);
-
-    if (!ret) {
-      // Use predefined compression parameter.
-      zfp_compression_param.type = 0;
-      zfp_compression_param.rate = 2;
-    }
-  }
-#endif
-
-// Use signed int since some OpenMP compiler doesn't allow unsigned type for
-// `parallel for`
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
-  for (int i = 0; i < num_blocks; i++) {
-    size_t ii = static_cast<size_t>(i);
-    int start_y = num_scanlines * i;
-    int endY = (std::min)(num_scanlines * (i + 1), exr_image->height);
-    int h = endY - start_y;
-
-    std::vector<unsigned char> buf(
-        static_cast<size_t>(exr_image->width * h * pixel_data_size));
-
-    for (size_t c = 0; c < static_cast<size_t>(exr_header->num_channels); c++) {
-      if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-        if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
-          for (int y = 0; y < h; y++) {
-            // Assume increasing Y
-            float *line_ptr = reinterpret_cast<float *>(&buf.at(
-                static_cast<size_t>(pixel_data_size * y * exr_image->width) +
-                channel_offset_list[c] *
-                    static_cast<size_t>(exr_image->width)));
-            for (int x = 0; x < exr_image->width; x++) {
-              tinyexr::FP16 h16;
-              h16.u = reinterpret_cast<unsigned short **>(
-                  exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-              tinyexr::FP32 f32 = half_to_float(h16);
-
-              tinyexr::swap4(reinterpret_cast<unsigned int *>(&f32.f));
-
-              // line_ptr[x] = f32.f;
-              tinyexr::cpy4(line_ptr + x, &(f32.f));
-            }
-          }
-        } else if (exr_header->requested_pixel_types[c] ==
-                   TINYEXR_PIXELTYPE_HALF) {
-          for (int y = 0; y < h; y++) {
-            // Assume increasing Y
-            unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-                &buf.at(static_cast<size_t>(pixel_data_size * y *
-                                            exr_image->width) +
-                        channel_offset_list[c] *
-                            static_cast<size_t>(exr_image->width)));
-            for (int x = 0; x < exr_image->width; x++) {
-              unsigned short val = reinterpret_cast<unsigned short **>(
-                  exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-              tinyexr::swap2(&val);
-
-              // line_ptr[x] = val;
-              tinyexr::cpy2(line_ptr + x, &val);
-            }
-          }
-        } else {
-          assert(0);
-        }
-
-      } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) {
-        if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
-          for (int y = 0; y < h; y++) {
-            // Assume increasing Y
-            unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
-                &buf.at(static_cast<size_t>(pixel_data_size * y *
-                                            exr_image->width) +
-                        channel_offset_list[c] *
-                            static_cast<size_t>(exr_image->width)));
-            for (int x = 0; x < exr_image->width; x++) {
-              tinyexr::FP32 f32;
-              f32.f = reinterpret_cast<float **>(
-                  exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-              tinyexr::FP16 h16;
-              h16 = float_to_half_full(f32);
-
-              tinyexr::swap2(reinterpret_cast<unsigned short *>(&h16.u));
-
-              // line_ptr[x] = h16.u;
-              tinyexr::cpy2(line_ptr + x, &(h16.u));
-            }
-          }
-        } else if (exr_header->requested_pixel_types[c] ==
-                   TINYEXR_PIXELTYPE_FLOAT) {
-          for (int y = 0; y < h; y++) {
-            // Assume increasing Y
-            float *line_ptr = reinterpret_cast<float *>(&buf.at(
-                static_cast<size_t>(pixel_data_size * y * exr_image->width) +
-                channel_offset_list[c] *
-                    static_cast<size_t>(exr_image->width)));
-            for (int x = 0; x < exr_image->width; x++) {
-              float val = reinterpret_cast<float **>(
-                  exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-              tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
-
-              // line_ptr[x] = val;
-              tinyexr::cpy4(line_ptr + x, &val);
-            }
-          }
-        } else {
-          assert(0);
-        }
-      } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_UINT) {
-        for (int y = 0; y < h; y++) {
-          // Assume increasing Y
-          unsigned int *line_ptr = reinterpret_cast<unsigned int *>(&buf.at(
-              static_cast<size_t>(pixel_data_size * y * exr_image->width) +
-              channel_offset_list[c] * static_cast<size_t>(exr_image->width)));
-          for (int x = 0; x < exr_image->width; x++) {
-            unsigned int val = reinterpret_cast<unsigned int **>(
-                exr_image->images)[c][(y + start_y) * exr_image->width + x];
-
-            tinyexr::swap4(&val);
-
-            // line_ptr[x] = val;
-            tinyexr::cpy4(line_ptr + x, &val);
-          }
-        }
-      }
-    }
-
-    if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_NONE) {
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(uncompressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = static_cast<unsigned int>(buf.size());
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), buf.begin(),
-                           buf.begin() + data_len);
-
-    } else if ((exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
-               (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) {
-#if TINYEXR_USE_MINIZ
-      std::vector<unsigned char> block(tinyexr::miniz::mz_compressBound(
-          static_cast<unsigned long>(buf.size())));
-#else
-      std::vector<unsigned char> block(
-          compressBound(static_cast<uLong>(buf.size())));
-#endif
-      tinyexr::tinyexr_uint64 outSize = block.size();
-
-      tinyexr::CompressZip(&block.at(0), outSize,
-                           reinterpret_cast<const unsigned char *>(&buf.at(0)),
-                           static_cast<unsigned long>(buf.size()));
-
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(compressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = static_cast<unsigned int>(outSize);  // truncate
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), block.begin(),
-                           block.begin() + data_len);
-
-    } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_RLE) {
-      // (buf.size() * 3) / 2 would be enough.
-      std::vector<unsigned char> block((buf.size() * 3) / 2);
-
-      tinyexr::tinyexr_uint64 outSize = block.size();
-
-      tinyexr::CompressRle(&block.at(0), outSize,
-                           reinterpret_cast<const unsigned char *>(&buf.at(0)),
-                           static_cast<unsigned long>(buf.size()));
-
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(compressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = static_cast<unsigned int>(outSize);  // truncate
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), block.begin(),
-                           block.begin() + data_len);
-
-    } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-#if TINYEXR_USE_PIZ
-      unsigned int bufLen =
-          8192 + static_cast<unsigned int>(
-                     2 * static_cast<unsigned int>(
-                             buf.size()));  // @fixme { compute good bound. }
-      std::vector<unsigned char> block(bufLen);
-      unsigned int outSize = static_cast<unsigned int>(block.size());
-
-      CompressPiz(&block.at(0), &outSize,
-                  reinterpret_cast<const unsigned char *>(&buf.at(0)),
-                  buf.size(), channels, exr_image->width, h);
-
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(compressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = outSize;
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), block.begin(),
-                           block.begin() + data_len);
-
-#else
-      assert(0);
-#endif
-    } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-#if TINYEXR_USE_ZFP
-      std::vector<unsigned char> block;
-      unsigned int outSize;
-
-      tinyexr::CompressZfp(
-          &block, &outSize, reinterpret_cast<const float *>(&buf.at(0)),
-          exr_image->width, h, exr_header->num_channels, zfp_compression_param);
-
-      // 4 byte: scan line
-      // 4 byte: data size
-      // ~     : pixel data(compressed)
-      std::vector<unsigned char> header(8);
-      unsigned int data_len = outSize;
-      memcpy(&header.at(0), &start_y, sizeof(int));
-      memcpy(&header.at(4), &data_len, sizeof(unsigned int));
-
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(0)));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&header.at(4)));
-
-      data_list[ii].insert(data_list[ii].end(), header.begin(), header.end());
-      data_list[ii].insert(data_list[ii].end(), block.begin(),
-                           block.begin() + data_len);
-
-#else
-      assert(0);
-#endif
-    } else {
-      assert(0);
-    }
-  }  // omp parallel
-
-  for (size_t i = 0; i < static_cast<size_t>(num_blocks); i++) {
-    offsets[i] = offset;
-    tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offsets[i]));
-    offset += data_list[i].size();
-  }
-
-  size_t totalSize = static_cast<size_t>(offset);
-  {
-    memory.insert(
-        memory.end(), reinterpret_cast<unsigned char *>(&offsets.at(0)),
-        reinterpret_cast<unsigned char *>(&offsets.at(0)) +
-            sizeof(tinyexr::tinyexr_uint64) * static_cast<size_t>(num_blocks));
-  }
-
-  if (memory.size() == 0) {
-    tinyexr::SetErrorMessage("Output memory size is zero", err);
-    return 0;
-  }
-
-  (*memory_out) = static_cast<unsigned char *>(malloc(totalSize));
-  memcpy((*memory_out), &memory.at(0), memory.size());
-  unsigned char *memory_ptr = *memory_out + memory.size();
-
-  for (size_t i = 0; i < static_cast<size_t>(num_blocks); i++) {
-    memcpy(memory_ptr, &data_list[i].at(0), data_list[i].size());
-    memory_ptr += data_list[i].size();
-  }
-
-  return totalSize;  // OK
-}
-
-int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header,
-                       const char *filename, const char **err) {
-  if (exr_image == NULL || filename == NULL ||
-      exr_header->compression_type < 0) {
-    tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToFile", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#if !TINYEXR_USE_PIZ
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) {
-    tinyexr::SetErrorMessage("PIZ compression is not supported in this build",
-                             err);
-    return TINYEXR_ERROR_UNSUPPORTED_FEATURE;
-  }
-#endif
-
-#if !TINYEXR_USE_ZFP
-  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
-    tinyexr::SetErrorMessage("ZFP compression is not supported in this build",
-                             err);
-    return TINYEXR_ERROR_UNSUPPORTED_FEATURE;
-  }
-#endif
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "wb");
-#else
-  FILE *fp = fopen(filename, "wb");
-#endif
-  if (!fp) {
-    tinyexr::SetErrorMessage("Cannot write a file", err);
-    return TINYEXR_ERROR_CANT_WRITE_FILE;
-  }
-
-  unsigned char *mem = NULL;
-  size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err);
-  if (mem_size == 0) {
-    return TINYEXR_ERROR_SERIALZATION_FAILED;
-  }
-
-  size_t written_size = 0;
-  if ((mem_size > 0) && mem) {
-    written_size = fwrite(mem, 1, mem_size, fp);
-  }
-  free(mem);
-
-  fclose(fp);
-
-  if (written_size != mem_size) {
-    tinyexr::SetErrorMessage("Cannot write a file", err);
-    return TINYEXR_ERROR_CANT_WRITE_FILE;
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) {
-  if (deep_image == NULL) {
-    tinyexr::SetErrorMessage("Invalid argument for LoadDeepEXR", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _MSC_VER
-  FILE *fp = NULL;
-  errno_t errcode = fopen_s(&fp, filename, "rb");
-  if ((0 != errcode) || (!fp)) {
-    tinyexr::SetErrorMessage("Cannot read a file " + std::string(filename),
-                             err);
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-#else
-  FILE *fp = fopen(filename, "rb");
-  if (!fp) {
-    tinyexr::SetErrorMessage("Cannot read a file " + std::string(filename),
-                             err);
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-#endif
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  if (filesize == 0) {
-    fclose(fp);
-    tinyexr::SetErrorMessage("File size is zero : " + std::string(filename),
-                             err);
-    return TINYEXR_ERROR_INVALID_FILE;
-  }
-
-  std::vector<char> buf(filesize);  // @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    (void)ret;
-  }
-  fclose(fp);
-
-  const char *head = &buf[0];
-  const char *marker = &buf[0];
-
-  // Header check.
-  {
-    const char header[] = {0x76, 0x2f, 0x31, 0x01};
-
-    if (memcmp(marker, header, 4) != 0) {
-      tinyexr::SetErrorMessage("Invalid magic number", err);
-      return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
-    }
-    marker += 4;
-  }
-
-  // Version, scanline.
-  {
-    // ver 2.0, scanline, deep bit on(0x800)
-    // must be [2, 0, 0, 0]
-    if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) {
-      tinyexr::SetErrorMessage("Unsupported version or scanline", err);
-      return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-    }
-
-    marker += 4;
-  }
-
-  int dx = -1;
-  int dy = -1;
-  int dw = -1;
-  int dh = -1;
-  int num_scanline_blocks = 1;  // 16 for ZIP compression.
-  int compression_type = -1;
-  int num_channels = -1;
-  std::vector<tinyexr::ChannelInfo> channels;
-
-  // Read attributes
-  size_t size = filesize - tinyexr::kEXRVersionSize;
-  for (;;) {
-    if (0 == size) {
-      return TINYEXR_ERROR_INVALID_DATA;
-    } else if (marker[0] == '\0') {
-      marker++;
-      size--;
-      break;
-    }
-
-    std::string attr_name;
-    std::string attr_type;
-    std::vector<unsigned char> data;
-    size_t marker_size;
-    if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size,
-                                marker, size)) {
-      std::stringstream ss;
-      ss << "Failed to parse attribute\n";
-      tinyexr::SetErrorMessage(ss.str(), err);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-    marker += marker_size;
-    size -= marker_size;
-
-    if (attr_name.compare("compression") == 0) {
-      compression_type = data[0];
-      if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) {
-        std::stringstream ss;
-        ss << "Unsupported compression type : " << compression_type;
-        tinyexr::SetErrorMessage(ss.str(), err);
-        return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-      }
-
-      if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
-        num_scanline_blocks = 16;
-      }
-
-    } else if (attr_name.compare("channels") == 0) {
-      // name: zero-terminated string, from 1 to 255 bytes long
-      // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2
-      // pLinear: unsigned char, possible values are 0 and 1
-      // reserved: three chars, should be zero
-      // xSampling: int
-      // ySampling: int
-
-      if (!tinyexr::ReadChannelInfo(channels, data)) {
-        tinyexr::SetErrorMessage("Failed to parse channel info", err);
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-      num_channels = static_cast<int>(channels.size());
-
-      if (num_channels < 1) {
-        tinyexr::SetErrorMessage("Invalid channels format", err);
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-    } else if (attr_name.compare("dataWindow") == 0) {
-      memcpy(&dx, &data.at(0), sizeof(int));
-      memcpy(&dy, &data.at(4), sizeof(int));
-      memcpy(&dw, &data.at(8), sizeof(int));
-      memcpy(&dh, &data.at(12), sizeof(int));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&dx));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&dy));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&dw));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&dh));
-
-    } else if (attr_name.compare("displayWindow") == 0) {
-      int x;
-      int y;
-      int w;
-      int h;
-      memcpy(&x, &data.at(0), sizeof(int));
-      memcpy(&y, &data.at(4), sizeof(int));
-      memcpy(&w, &data.at(8), sizeof(int));
-      memcpy(&h, &data.at(12), sizeof(int));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&x));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&y));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&w));
-      tinyexr::swap4(reinterpret_cast<unsigned int *>(&h));
-    }
-  }
-
-  assert(dx >= 0);
-  assert(dy >= 0);
-  assert(dw >= 0);
-  assert(dh >= 0);
-  assert(num_channels >= 1);
-
-  int data_width = dw - dx + 1;
-  int data_height = dh - dy + 1;
-
-  std::vector<float> image(
-      static_cast<size_t>(data_width * data_height * 4));  // 4 = RGBA
-
-  // Read offset tables.
-  int num_blocks = data_height / num_scanline_blocks;
-  if (num_blocks * num_scanline_blocks < data_height) {
-    num_blocks++;
-  }
-
-  std::vector<tinyexr::tinyexr_int64> offsets(static_cast<size_t>(num_blocks));
-
-  for (size_t y = 0; y < static_cast<size_t>(num_blocks); y++) {
-    tinyexr::tinyexr_int64 offset;
-    memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64));
-    tinyexr::swap8(reinterpret_cast<tinyexr::tinyexr_uint64 *>(&offset));
-    marker += sizeof(tinyexr::tinyexr_int64);  // = 8
-    offsets[y] = offset;
-  }
-
-#if TINYEXR_USE_PIZ
-  if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) {
-#else
-  if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
-      (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) {
-#endif
-    // OK
-  } else {
-    tinyexr::SetErrorMessage("Unsupported compression format", err);
-    return TINYEXR_ERROR_UNSUPPORTED_FORMAT;
-  }
-
-  deep_image->image = static_cast<float ***>(
-      malloc(sizeof(float **) * static_cast<size_t>(num_channels)));
-  for (int c = 0; c < num_channels; c++) {
-    deep_image->image[c] = static_cast<float **>(
-        malloc(sizeof(float *) * static_cast<size_t>(data_height)));
-    for (int y = 0; y < data_height; y++) {
-    }
-  }
-
-  deep_image->offset_table = static_cast<int **>(
-      malloc(sizeof(int *) * static_cast<size_t>(data_height)));
-  for (int y = 0; y < data_height; y++) {
-    deep_image->offset_table[y] = static_cast<int *>(
-        malloc(sizeof(int) * static_cast<size_t>(data_width)));
-  }
-
-  for (size_t y = 0; y < static_cast<size_t>(num_blocks); y++) {
-    const unsigned char *data_ptr =
-        reinterpret_cast<const unsigned char *>(head + offsets[y]);
-
-    // int: y coordinate
-    // int64: packed size of pixel offset table
-    // int64: packed size of sample data
-    // int64: unpacked size of sample data
-    // compressed pixel offset table
-    // compressed sample data
-    int line_no;
-    tinyexr::tinyexr_int64 packedOffsetTableSize;
-    tinyexr::tinyexr_int64 packedSampleDataSize;
-    tinyexr::tinyexr_int64 unpackedSampleDataSize;
-    memcpy(&line_no, data_ptr, sizeof(int));
-    memcpy(&packedOffsetTableSize, data_ptr + 4,
-           sizeof(tinyexr::tinyexr_int64));
-    memcpy(&packedSampleDataSize, data_ptr + 12,
-           sizeof(tinyexr::tinyexr_int64));
-    memcpy(&unpackedSampleDataSize, data_ptr + 20,
-           sizeof(tinyexr::tinyexr_int64));
-
-    tinyexr::swap4(reinterpret_cast<unsigned int *>(&line_no));
-    tinyexr::swap8(
-        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&packedOffsetTableSize));
-    tinyexr::swap8(
-        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&packedSampleDataSize));
-    tinyexr::swap8(
-        reinterpret_cast<tinyexr::tinyexr_uint64 *>(&unpackedSampleDataSize));
-
-    std::vector<int> pixelOffsetTable(static_cast<size_t>(data_width));
-
-    // decode pixel offset table.
-    {
-      unsigned long dstLen =
-          static_cast<unsigned long>(pixelOffsetTable.size() * sizeof(int));
-      if (!tinyexr::DecompressZip(
-              reinterpret_cast<unsigned char *>(&pixelOffsetTable.at(0)),
-              &dstLen, data_ptr + 28,
-              static_cast<unsigned long>(packedOffsetTableSize))) {
-        return false;
-      }
-
-      assert(dstLen == pixelOffsetTable.size() * sizeof(int));
-      for (size_t i = 0; i < static_cast<size_t>(data_width); i++) {
-        deep_image->offset_table[y][i] = pixelOffsetTable[i];
-      }
-    }
-
-    std::vector<unsigned char> sample_data(
-        static_cast<size_t>(unpackedSampleDataSize));
-
-    // decode sample data.
-    {
-      unsigned long dstLen = static_cast<unsigned long>(unpackedSampleDataSize);
-      if (dstLen) {
-        if (!tinyexr::DecompressZip(
-                reinterpret_cast<unsigned char *>(&sample_data.at(0)), &dstLen,
-                data_ptr + 28 + packedOffsetTableSize,
-                static_cast<unsigned long>(packedSampleDataSize))) {
-          return false;
-        }
-        assert(dstLen == static_cast<unsigned long>(unpackedSampleDataSize));
-      }
-    }
-
-    // decode sample
-    int sampleSize = -1;
-    std::vector<int> channel_offset_list(static_cast<size_t>(num_channels));
-    {
-      int channel_offset = 0;
-      for (size_t i = 0; i < static_cast<size_t>(num_channels); i++) {
-        channel_offset_list[i] = channel_offset;
-        if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) {  // UINT
-          channel_offset += 4;
-        } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) {  // half
-          channel_offset += 2;
-        } else if (channels[i].pixel_type ==
-                   TINYEXR_PIXELTYPE_FLOAT) {  // float
-          channel_offset += 4;
-        } else {
-          assert(0);
-        }
-      }
-      sampleSize = channel_offset;
-    }
-    assert(sampleSize >= 2);
-
-    assert(static_cast<size_t>(
-               pixelOffsetTable[static_cast<size_t>(data_width - 1)] *
-               sampleSize) == sample_data.size());
-    int samples_per_line = static_cast<int>(sample_data.size()) / sampleSize;
-
-    //
-    // Alloc memory
-    //
-
-    //
-    // pixel data is stored as image[channels][pixel_samples]
-    //
-    {
-      tinyexr::tinyexr_uint64 data_offset = 0;
-      for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-        deep_image->image[c][y] = static_cast<float *>(
-            malloc(sizeof(float) * static_cast<size_t>(samples_per_line)));
-
-        if (channels[c].pixel_type == 0) {  // UINT
-          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
-            unsigned int ui;
-            unsigned int *src_ptr = reinterpret_cast<unsigned int *>(
-                &sample_data.at(size_t(data_offset) + x * sizeof(int)));
-            tinyexr::cpy4(&ui, src_ptr);
-            deep_image->image[c][y][x] = static_cast<float>(ui);  // @fixme
-          }
-          data_offset +=
-              sizeof(unsigned int) * static_cast<size_t>(samples_per_line);
-        } else if (channels[c].pixel_type == 1) {  // half
-          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
-            tinyexr::FP16 f16;
-            const unsigned short *src_ptr = reinterpret_cast<unsigned short *>(
-                &sample_data.at(size_t(data_offset) + x * sizeof(short)));
-            tinyexr::cpy2(&(f16.u), src_ptr);
-            tinyexr::FP32 f32 = half_to_float(f16);
-            deep_image->image[c][y][x] = f32.f;
-          }
-          data_offset += sizeof(short) * static_cast<size_t>(samples_per_line);
-        } else {  // float
-          for (size_t x = 0; x < static_cast<size_t>(samples_per_line); x++) {
-            float f;
-            const float *src_ptr = reinterpret_cast<float *>(
-                &sample_data.at(size_t(data_offset) + x * sizeof(float)));
-            tinyexr::cpy4(&f, src_ptr);
-            deep_image->image[c][y][x] = f;
-          }
-          data_offset += sizeof(float) * static_cast<size_t>(samples_per_line);
-        }
-      }
-    }
-  }  // y
-
-  deep_image->width = data_width;
-  deep_image->height = data_height;
-
-  deep_image->channel_names = static_cast<const char **>(
-      malloc(sizeof(const char *) * static_cast<size_t>(num_channels)));
-  for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
-#ifdef _WIN32
-    deep_image->channel_names[c] = _strdup(channels[c].name.c_str());
-#else
-    deep_image->channel_names[c] = strdup(channels[c].name.c_str());
-#endif
-  }
-  deep_image->num_channels = num_channels;
-
-  return TINYEXR_SUCCESS;
-}
-
-void InitEXRImage(EXRImage *exr_image) {
-  if (exr_image == NULL) {
-    return;
-  }
-
-  exr_image->width = 0;
-  exr_image->height = 0;
-  exr_image->num_channels = 0;
-
-  exr_image->images = NULL;
-  exr_image->tiles = NULL;
-
-  exr_image->num_tiles = 0;
-}
-
-void FreeEXRErrorMessage(const char *msg) {
-  if (msg) {
-    free(reinterpret_cast<void *>(const_cast<char *>(msg)));
-  }
-  return;
-}
-
-void InitEXRHeader(EXRHeader *exr_header) {
-  if (exr_header == NULL) {
-    return;
-  }
-
-  memset(exr_header, 0, sizeof(EXRHeader));
-}
-
-int FreeEXRHeader(EXRHeader *exr_header) {
-  if (exr_header == NULL) {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (exr_header->channels) {
-    free(exr_header->channels);
-  }
-
-  if (exr_header->pixel_types) {
-    free(exr_header->pixel_types);
-  }
-
-  if (exr_header->requested_pixel_types) {
-    free(exr_header->requested_pixel_types);
-  }
-
-  for (int i = 0; i < exr_header->num_custom_attributes; i++) {
-    if (exr_header->custom_attributes[i].value) {
-      free(exr_header->custom_attributes[i].value);
-    }
-  }
-
-  if (exr_header->custom_attributes) {
-    free(exr_header->custom_attributes);
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int FreeEXRImage(EXRImage *exr_image) {
-  if (exr_image == NULL) {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  for (int i = 0; i < exr_image->num_channels; i++) {
-    if (exr_image->images && exr_image->images[i]) {
-      free(exr_image->images[i]);
-    }
-  }
-
-  if (exr_image->images) {
-    free(exr_image->images);
-  }
-
-  if (exr_image->tiles) {
-    for (int tid = 0; tid < exr_image->num_tiles; tid++) {
-      for (int i = 0; i < exr_image->num_channels; i++) {
-        if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) {
-          free(exr_image->tiles[tid].images[i]);
-        }
-      }
-      if (exr_image->tiles[tid].images) {
-        free(exr_image->tiles[tid].images);
-      }
-    }
-    free(exr_image->tiles);
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version,
-                           const char *filename, const char **err) {
-  if (exr_header == NULL || exr_version == NULL || filename == NULL) {
-    tinyexr::SetErrorMessage("Invalid argument for ParseEXRHeaderFromFile",
-                             err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  std::vector<unsigned char> buf(filesize);  // @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    fclose(fp);
-
-    if (ret != filesize) {
-      tinyexr::SetErrorMessage("fread() error on " + std::string(filename),
-                               err);
-      return TINYEXR_ERROR_INVALID_FILE;
-    }
-  }
-
-  return ParseEXRHeaderFromMemory(exr_header, exr_version, &buf.at(0), filesize,
-                                  err);
-}
-
-int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers,
-                                      int *num_headers,
-                                      const EXRVersion *exr_version,
-                                      const unsigned char *memory, size_t size,
-                                      const char **err) {
-  if (memory == NULL || exr_headers == NULL || num_headers == NULL ||
-      exr_version == NULL) {
-    // Invalid argument
-    tinyexr::SetErrorMessage(
-        "Invalid argument for ParseEXRMultipartHeaderFromMemory", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (size < tinyexr::kEXRVersionSize) {
-    tinyexr::SetErrorMessage("Data size too short", err);
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  const unsigned char *marker = memory + tinyexr::kEXRVersionSize;
-  size_t marker_size = size - tinyexr::kEXRVersionSize;
-
-  std::vector<tinyexr::HeaderInfo> infos;
-
-  for (;;) {
-    tinyexr::HeaderInfo info;
-    info.clear();
-
-    std::string err_str;
-    bool empty_header = false;
-    int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str,
-                             marker, marker_size);
-
-    if (ret != TINYEXR_SUCCESS) {
-      tinyexr::SetErrorMessage(err_str, err);
-      return ret;
-    }
-
-    if (empty_header) {
-      marker += 1;  // skip '\0'
-      break;
-    }
-
-    // `chunkCount` must exist in the header.
-    if (info.chunk_count == 0) {
-      tinyexr::SetErrorMessage(
-          "`chunkCount' attribute is not found in the header.", err);
-      return TINYEXR_ERROR_INVALID_DATA;
-    }
-
-    infos.push_back(info);
-
-    // move to next header.
-    marker += info.header_len;
-    size -= info.header_len;
-  }
-
-  // allocate memory for EXRHeader and create array of EXRHeader pointers.
-  (*exr_headers) =
-      static_cast<EXRHeader **>(malloc(sizeof(EXRHeader *) * infos.size()));
-  for (size_t i = 0; i < infos.size(); i++) {
-    EXRHeader *exr_header = static_cast<EXRHeader *>(malloc(sizeof(EXRHeader)));
-
-    ConvertHeader(exr_header, infos[i]);
-
-    // transfoer `tiled` from version.
-    exr_header->tiled = exr_version->tiled;
-
-    (*exr_headers)[i] = exr_header;
-  }
-
-  (*num_headers) = static_cast<int>(infos.size());
-
-  return TINYEXR_SUCCESS;
-}
-
-int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers,
-                                    const EXRVersion *exr_version,
-                                    const char *filename, const char **err) {
-  if (exr_headers == NULL || num_headers == NULL || exr_version == NULL ||
-      filename == NULL) {
-    tinyexr::SetErrorMessage(
-        "Invalid argument for ParseEXRMultipartHeaderFromFile()", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  std::vector<unsigned char> buf(filesize);  // @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    fclose(fp);
-
-    if (ret != filesize) {
-      tinyexr::SetErrorMessage("`fread' error. file may be corrupted.", err);
-      return TINYEXR_ERROR_INVALID_FILE;
-    }
-  }
-
-  return ParseEXRMultipartHeaderFromMemory(
-      exr_headers, num_headers, exr_version, &buf.at(0), filesize, err);
-}
-
-int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory,
-                              size_t size) {
-  if (version == NULL || memory == NULL) {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  if (size < tinyexr::kEXRVersionSize) {
-    return TINYEXR_ERROR_INVALID_DATA;
-  }
-
-  const unsigned char *marker = memory;
-
-  // Header check.
-  {
-    const char header[] = {0x76, 0x2f, 0x31, 0x01};
-
-    if (memcmp(marker, header, 4) != 0) {
-      return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
-    }
-    marker += 4;
-  }
-
-  version->tiled = false;
-  version->long_name = false;
-  version->non_image = false;
-  version->multipart = false;
-
-  // Parse version header.
-  {
-    // must be 2
-    if (marker[0] != 2) {
-      return TINYEXR_ERROR_INVALID_EXR_VERSION;
-    }
-
-    if (version == NULL) {
-      return TINYEXR_SUCCESS;  // May OK
-    }
-
-    version->version = 2;
-
-    if (marker[1] & 0x2) {  // 9th bit
-      version->tiled = true;
-    }
-    if (marker[1] & 0x4) {  // 10th bit
-      version->long_name = true;
-    }
-    if (marker[1] & 0x8) {        // 11th bit
-      version->non_image = true;  // (deep image)
-    }
-    if (marker[1] & 0x10) {  // 12th bit
-      version->multipart = true;
-    }
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) {
-  if (filename == NULL) {
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t file_size;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  file_size = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  if (file_size < tinyexr::kEXRVersionSize) {
-    return TINYEXR_ERROR_INVALID_FILE;
-  }
-
-  unsigned char buf[tinyexr::kEXRVersionSize];
-  size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp);
-  fclose(fp);
-
-  if (ret != tinyexr::kEXRVersionSize) {
-    return TINYEXR_ERROR_INVALID_FILE;
-  }
-
-  return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize);
-}
-
-int LoadEXRMultipartImageFromMemory(EXRImage *exr_images,
-                                    const EXRHeader **exr_headers,
-                                    unsigned int num_parts,
-                                    const unsigned char *memory,
-                                    const size_t size, const char **err) {
-  if (exr_images == NULL || exr_headers == NULL || num_parts == 0 ||
-      memory == NULL || (size <= tinyexr::kEXRVersionSize)) {
-    tinyexr::SetErrorMessage(
-        "Invalid argument for LoadEXRMultipartImageFromMemory()", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  // compute total header size.
-  size_t total_header_size = 0;
-  for (unsigned int i = 0; i < num_parts; i++) {
-    if (exr_headers[i]->header_len == 0) {
-      tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err);
-      return TINYEXR_ERROR_INVALID_ARGUMENT;
-    }
-
-    total_header_size += exr_headers[i]->header_len;
-  }
-
-  const char *marker = reinterpret_cast<const char *>(
-      memory + total_header_size + 4 +
-      4);  // +8 for magic number and version header.
-
-  marker += 1;  // Skip empty header.
-
-  // NOTE 1:
-  //   In multipart image, There is 'part number' before chunk data.
-  //   4 byte : part number
-  //   4+     : chunk
-  //
-  // NOTE 2:
-  //   EXR spec says 'part number' is 'unsigned long' but actually this is
-  //   'unsigned int(4 bytes)' in OpenEXR implementation...
-  //   http://www.openexr.com/openexrfilelayout.pdf
-
-  // Load chunk offset table.
-  std::vector<std::vector<tinyexr::tinyexr_uint64> > chunk_offset_table_list;
-  for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) {
-    std::vector<tinyexr::tinyexr_uint64> offset_table(
-        static_cast<size_t>(exr_headers[i]->chunk_count));
-
-    for (size_t c = 0; c < offset_table.size(); c++) {
-      tinyexr::tinyexr_uint64 offset;
-      memcpy(&offset, marker, 8);
-      tinyexr::swap8(&offset);
-
-      if (offset >= size) {
-        tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.",
-                                 err);
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-
-      offset_table[c] = offset + 4;  // +4 to skip 'part number'
-      marker += 8;
-    }
-
-    chunk_offset_table_list.push_back(offset_table);
-  }
-
-  // Decode image.
-  for (size_t i = 0; i < static_cast<size_t>(num_parts); i++) {
-    std::vector<tinyexr::tinyexr_uint64> &offset_table =
-        chunk_offset_table_list[i];
-
-    // First check 'part number' is identitical to 'i'
-    for (size_t c = 0; c < offset_table.size(); c++) {
-      const unsigned char *part_number_addr =
-          memory + offset_table[c] - 4;  // -4 to move to 'part number' field.
-      unsigned int part_no;
-      memcpy(&part_no, part_number_addr, sizeof(unsigned int));  // 4
-      tinyexr::swap4(&part_no);
-
-      if (part_no != i) {
-        tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.",
-                                 err);
-        return TINYEXR_ERROR_INVALID_DATA;
-      }
-    }
-
-    std::string e;
-    int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_table,
-                                   memory, size, &e);
-    if (ret != TINYEXR_SUCCESS) {
-      if (!e.empty()) {
-        tinyexr::SetErrorMessage(e, err);
-      }
-      return ret;
-    }
-  }
-
-  return TINYEXR_SUCCESS;
-}
-
-int LoadEXRMultipartImageFromFile(EXRImage *exr_images,
-                                  const EXRHeader **exr_headers,
-                                  unsigned int num_parts, const char *filename,
-                                  const char **err) {
-  if (exr_images == NULL || exr_headers == NULL || num_parts == 0) {
-    tinyexr::SetErrorMessage(
-        "Invalid argument for LoadEXRMultipartImageFromFile", err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-#ifdef _WIN32
-  FILE *fp = NULL;
-  fopen_s(&fp, filename, "rb");
-#else
-  FILE *fp = fopen(filename, "rb");
-#endif
-  if (!fp) {
-    tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err);
-    return TINYEXR_ERROR_CANT_OPEN_FILE;
-  }
-
-  size_t filesize;
-  // Compute size
-  fseek(fp, 0, SEEK_END);
-  filesize = static_cast<size_t>(ftell(fp));
-  fseek(fp, 0, SEEK_SET);
-
-  std::vector<unsigned char> buf(filesize);  //  @todo { use mmap }
-  {
-    size_t ret;
-    ret = fread(&buf[0], 1, filesize, fp);
-    assert(ret == filesize);
-    fclose(fp);
-    (void)ret;
-  }
-
-  return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts,
-                                         &buf.at(0), filesize, err);
-}
-
-int SaveEXR(const float *data, int width, int height, int components,
-            const int save_as_fp16, const char *outfilename, const char **err) {
-  if ((components == 1) || components == 3 || components == 4) {
-    // OK
-  } else {
-    std::stringstream ss;
-    ss << "Unsupported component value : " << components << std::endl;
-
-    tinyexr::SetErrorMessage(ss.str(), err);
-    return TINYEXR_ERROR_INVALID_ARGUMENT;
-  }
-
-  EXRHeader header;
-  InitEXRHeader(&header);
-
-  if ((width < 16) && (height < 16)) {
-    // No compression for small image.
-    header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE;
-  } else {
-    header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP;
-  }
-
-  EXRImage image;
-  InitEXRImage(&image);
-
-  image.num_channels = components;
-
-  std::vector<float> images[4];
-
-  if (components == 1) {
-    images[0].resize(static_cast<size_t>(width * height));
-    memcpy(images[0].data(), data, sizeof(float) * size_t(width * height));
-  } else {
-    images[0].resize(static_cast<size_t>(width * height));
-    images[1].resize(static_cast<size_t>(width * height));
-    images[2].resize(static_cast<size_t>(width * height));
-    images[3].resize(static_cast<size_t>(width * height));
-
-    // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers
-    for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
-      images[0][i] = data[static_cast<size_t>(components) * i + 0];
-      images[1][i] = data[static_cast<size_t>(components) * i + 1];
-      images[2][i] = data[static_cast<size_t>(components) * i + 2];
-      if (components == 4) {
-        images[3][i] = data[static_cast<size_t>(components) * i + 3];
-      }
-    }
-  }
-
-  float *image_ptr[4] = {0, 0, 0, 0};
-  if (components == 4) {
-    image_ptr[0] = &(images[3].at(0));  // A
-    image_ptr[1] = &(images[2].at(0));  // B
-    image_ptr[2] = &(images[1].at(0));  // G
-    image_ptr[3] = &(images[0].at(0));  // R
-  } else if (components == 3) {
-    image_ptr[0] = &(images[2].at(0));  // B
-    image_ptr[1] = &(images[1].at(0));  // G
-    image_ptr[2] = &(images[0].at(0));  // R
-  } else if (components == 1) {
-    image_ptr[0] = &(images[0].at(0));  // A
-  }
-
-  image.images = reinterpret_cast<unsigned char **>(image_ptr);
-  image.width = width;
-  image.height = height;
-
-  header.num_channels = components;
-  header.channels = static_cast<EXRChannelInfo *>(malloc(
-      sizeof(EXRChannelInfo) * static_cast<size_t>(header.num_channels)));
-  // Must be (A)BGR order, since most of EXR viewers expect this channel order.
-  if (components == 4) {
-#ifdef _MSC_VER
-    strncpy_s(header.channels[0].name, "A", 255);
-    strncpy_s(header.channels[1].name, "B", 255);
-    strncpy_s(header.channels[2].name, "G", 255);
-    strncpy_s(header.channels[3].name, "R", 255);
-#else
-    strncpy(header.channels[0].name, "A", 255);
-    strncpy(header.channels[1].name, "B", 255);
-    strncpy(header.channels[2].name, "G", 255);
-    strncpy(header.channels[3].name, "R", 255);
-#endif
-    header.channels[0].name[strlen("A")] = '\0';
-    header.channels[1].name[strlen("B")] = '\0';
-    header.channels[2].name[strlen("G")] = '\0';
-    header.channels[3].name[strlen("R")] = '\0';
-  } else if (components == 3) {
-#ifdef _MSC_VER
-    strncpy_s(header.channels[0].name, "B", 255);
-    strncpy_s(header.channels[1].name, "G", 255);
-    strncpy_s(header.channels[2].name, "R", 255);
-#else
-    strncpy(header.channels[0].name, "B", 255);
-    strncpy(header.channels[1].name, "G", 255);
-    strncpy(header.channels[2].name, "R", 255);
-#endif
-    header.channels[0].name[strlen("B")] = '\0';
-    header.channels[1].name[strlen("G")] = '\0';
-    header.channels[2].name[strlen("R")] = '\0';
-  } else {
-#ifdef _MSC_VER
-    strncpy_s(header.channels[0].name, "A", 255);
-#else
-    strncpy(header.channels[0].name, "A", 255);
-#endif
-    header.channels[0].name[strlen("A")] = '\0';
-  }
-
-  header.pixel_types = static_cast<int *>(
-      malloc(sizeof(int) * static_cast<size_t>(header.num_channels)));
-  header.requested_pixel_types = static_cast<int *>(
-      malloc(sizeof(int) * static_cast<size_t>(header.num_channels)));
-  for (int i = 0; i < header.num_channels; i++) {
-    header.pixel_types[i] =
-        TINYEXR_PIXELTYPE_FLOAT;  // pixel type of input image
-
-    if (save_as_fp16 > 0) {
-      header.requested_pixel_types[i] =
-          TINYEXR_PIXELTYPE_HALF;  // save with half(fp16) pixel format
-    } else {
-      header.requested_pixel_types[i] =
-          TINYEXR_PIXELTYPE_FLOAT;  // save with float(fp32) pixel format(i.e.
-                                    // no precision reduction)
-    }
-  }
-
-  int ret = SaveEXRImageToFile(&image, &header, outfilename, err);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
-
-  free(header.channels);
-  free(header.pixel_types);
-  free(header.requested_pixel_types);
-
-  return ret;
-}
-
-#ifdef __clang__
-// zero-as-null-ppinter-constant
-#pragma clang diagnostic pop
-#endif
-
-#endif  // TINYEXR_IMPLEMENTATION_DEIFNED
-#endif  // TINYEXR_IMPLEMENTATION
diff --git a/zenovis/stbi/src/stbi.c b/zenovis/stbi/src/stbi.c
index aa7c528a9c..72ee413543 100644
--- a/zenovis/stbi/src/stbi.c
+++ b/zenovis/stbi/src/stbi.c
@@ -2,3 +2,6 @@
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include <stb_image.h>
 #include <stb_image_write.h>
+
+#define STB_DXT_IMPLEMENTATION
+#include "stb_dxt.h"
diff --git a/zenovis/stbi/src/tinyexr.cpp b/zenovis/stbi/src/tinyexr.cpp
deleted file mode 100644
index a1a456ac61..0000000000
--- a/zenovis/stbi/src/tinyexr.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-#define TINYEXR_IMPLEMENTATION
-#include <tinyexr.h>
diff --git a/zenovis/xinxinoptix/BCX.h b/zenovis/xinxinoptix/BCX.h
new file mode 100644
index 0000000000..05e2a353c4
--- /dev/null
+++ b/zenovis/xinxinoptix/BCX.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include <stb_dxt.h>
+#include <tbb/task.h>
+#include <tbb/task_group.h>
+
+#include <map>
+#include <vector>
+#include <vector_types.h>
+
+template <char N>
+inline void compress(std::vector<unsigned char> &packed, std::vector<unsigned char> &block) {
+
+    if constexpr (N == 1)
+        return stb_compress_bc4_block(packed.data(), (unsigned char*)block.data());
+    if constexpr (N == 2)
+        return stb_compress_bc5_block(packed.data(), (unsigned char*)block.data());
+    if constexpr (N == 4)
+        return stb_compress_dxt_block(packed.data(), (unsigned char*)block.data(), 1, STB_DXT_HIGHQUAL);
+}
+
+template <char N>
+inline std::vector<unsigned char> compressBCx(unsigned char* img, uint32_t nx, uint32_t ny) {
+
+    static std::map<char, uint32_t> sizes {
+        { 1, 8 },
+        { 2, 16},
+        { 4, 16}
+    };
+
+    const auto size_per_packed = sizes[N];
+
+    auto count = size_per_packed * (nx/4) * (ny/4);
+    std::vector<unsigned char> result(count);
+    
+    tbb::task_group bc_group;
+
+    for (size_t i=0; i<ny; i+=4) { // row
+
+        bc_group.run([&, i]{
+
+            std::vector<unsigned char> block(16 * N, 0);
+            std::vector<unsigned char> packed(size_per_packed, 0);
+
+            for (size_t j=0; j<nx; j+=4) { // col
+
+                for (size_t k=0; k<16; k+=4) {
+                    auto offset_i = k / 4;
+                    //auto offset_j = k % 4;
+
+                    auto index = nx * (i+offset_i) + (j);
+                    //raw_block[k] = img[index];
+                    auto dst_ptr = block.data() + k*N;
+                    auto src_ptr = img + index*N ;
+                    memcpy(dst_ptr, src_ptr, N * 4);
+                }
+                compress<N>(packed, block);
+
+                auto offset = size_per_packed * ((nx/4) * i/4 + j/4);
+                memcpy(result.data()+offset, packed.data(), packed.size());
+            }
+
+        }); // run
+    }
+
+    bc_group.wait();
+    return result;
+}
+
+inline std::vector<unsigned char> compressBC4(unsigned char* one_byte_per_pixel, uint32_t nx, uint32_t ny) {
+    return compressBCx<1>(one_byte_per_pixel, nx, ny);
+}
+
+inline std::vector<unsigned char> compressBC5(unsigned char* two_byte_per_pixel, uint32_t nx, uint32_t ny) {
+    return compressBCx<2>(two_byte_per_pixel, nx, ny);
+}
+
+inline std::vector<unsigned char> compressBC3(unsigned char* four_byte_per_pixel, uint32_t nx, uint32_t ny) {
+    return compressBCx<4>(four_byte_per_pixel, nx, ny);
+}
\ No newline at end of file
diff --git a/zenovis/xinxinoptix/CMakeLists.txt b/zenovis/xinxinoptix/CMakeLists.txt
index 43e552e726..0a927a9b76 100644
--- a/zenovis/xinxinoptix/CMakeLists.txt
+++ b/zenovis/xinxinoptix/CMakeLists.txt
@@ -15,6 +15,8 @@ target_sources(zenovis PRIVATE
   Shape.h
   XAS.h
 
+  BCX.h
+
   LightTree.cpp
   LightTree.h
   LightBounds.cpp
diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h
index 310d015263..8a0ee9e52c 100644
--- a/zenovis/xinxinoptix/OptiXStuff.h
+++ b/zenovis/xinxinoptix/OptiXStuff.h
@@ -48,6 +48,7 @@
 #include <string>
 #include <filesystem>
 
+#include "BCX.h"
 #include "ies/ies.h"
 
 #include "zeno/utils/fileio.h"
@@ -338,6 +339,7 @@ inline void createRTProgramGroups(OptixDeviceContext &context, OptixModule &_mod
 }
 struct cuTexture{
     std::string md5;
+    bool blockCompression = false;
     
     cudaArray_t gpuImageArray = nullptr;
     cudaTextureObject_t texture = 0llu;
@@ -367,40 +369,83 @@ inline sutil::Texture loadCubeMap(const std::string& ppm_filename)
 
     return loadPPMTexture( ppm_filename, make_float3(1,1,1), nullptr );
 }
-inline std::shared_ptr<cuTexture> makeCudaTexture(unsigned char* img, int nx, int ny, int nc)
+
+
+
+inline std::shared_ptr<cuTexture> makeCudaTexture(unsigned char* img, int nx, int ny, int nc, bool blockCompression)
 {
     auto texture = std::make_shared<cuTexture>(nx, ny);
-    std::vector<uchar4> data;
-    data.resize(nx*ny);
-    for(int j=0;j<ny;j++)
-    for(int i=0;i<nx;i++)
-    {
-        size_t idx = j*nx + i;
-        data[idx] = {
-            nc>=1?(img[idx*nc + 0]):(unsigned char)0,
-            nc>=2?(img[idx*nc + 1]):(unsigned char)0,
-            nc>=3?(img[idx*nc + 2]):(unsigned char)0,
-            nc>=4?(img[idx*nc + 3]):(unsigned char)0,
-        };
-    }
+
+    std::vector<uchar4> alt;
     
-    cudaChannelFormatDesc channelDescriptor = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
-    cudaError_t rc = cudaMallocArray(&texture->gpuImageArray, &channelDescriptor, nx, ny, 0);
-    if (rc != cudaSuccess) {
-        std::cout<<"texture space alloc failed\n";
-        return 0;
+    if (nc == 3) { // cuda doesn't rgb, should be rgba 
+        auto count = nx * ny;    
+        alt.resize(count);
+
+        for (size_t i=0; i<count; ++i) {
+            alt[i] = { img[i*nc + 0], img[i*nc + 1], img[i*nc + 2], 255u };
+        }
+        nc = 4;
+        img = (unsigned char*)alt.data();
     }
-    rc = cudaMemcpy2DToArray(texture->gpuImageArray, 0, 0, data.data(), 
-                             nx * sizeof(unsigned char) * 4,
-                             nx * sizeof(unsigned char) * 4,
-                             ny, 
-                             cudaMemcpyHostToDevice);
+
+    if (nx%4 || ny%4) {
+        blockCompression = false;
+    }
+
+    cudaError_t rc;
+ 
+    if (blockCompression == false) {
+        std::vector<int> xyzw(4, 0);
+        for (int i=0; i<nc; ++i) {xyzw[i] = 8;}
+
+        cudaChannelFormatDesc channelDescriptor = cudaCreateChannelDesc(xyzw[0], xyzw[1], xyzw[2], xyzw[3], cudaChannelFormatKindUnsigned);
+        rc = cudaMallocArray(&texture->gpuImageArray, &channelDescriptor, nx, ny, 0);
+        if (rc != cudaSuccess) {
+            std::cout<<"texture space alloc failed\n";
+            return 0;
+        }
+        // rc = cudaMemcpy2DToArray(texture->gpuImageArray, 0, 0, img, 
+        //                         nx * sizeof(unsigned char) * nc,
+        //                         nx * sizeof(unsigned char) * nc,
+        //                         ny, 
+        //                         cudaMemcpyHostToDevice);
+
+        rc = cudaMemcpyToArray(texture->gpuImageArray, 0, 0, img, sizeof(unsigned char) * nc * nx * ny, cudaMemcpyHostToDevice);
+
+    } else {
+
+        std::vector<unsigned char> bc_data;
+        cudaChannelFormatDesc channelDescriptor;
+
+        if (nc == 1) {
+            bc_data = compressBC4(img, nx, ny);
+            channelDescriptor = cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed4>();
+        } else if (nc == 2) {
+            bc_data = compressBC5(img, nx, ny);
+            channelDescriptor = cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed5>();
+        } else if (nc == 4) {
+            bc_data = compressBC3(img, nx, ny);
+            channelDescriptor = cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed3>();
+        }
+        
+        rc = cudaMallocArray(&texture->gpuImageArray, &channelDescriptor, nx, ny, 0);
+
+        if (rc != cudaSuccess) {
+            std::cout<<"texture space alloc failed\n";
+            return 0;
+        }
+
+        rc = cudaMemcpyToArray(texture->gpuImageArray, 0, 0, bc_data.data(), bc_data.size(), cudaMemcpyHostToDevice);
+    }
+
     if (rc != cudaSuccess) {
         std::cout<<"texture data copy failed\n";
         cudaFreeArray(texture->gpuImageArray);
         texture->gpuImageArray = nullptr;
         return 0;
     }
+
     cudaResourceDesc resourceDescriptor = { };
     resourceDescriptor.resType = cudaResourceTypeArray;
     resourceDescriptor.res.array.array = texture->gpuImageArray;
@@ -422,6 +467,8 @@ inline std::shared_ptr<cuTexture> makeCudaTexture(unsigned char* img, int nx, in
         texture->gpuImageArray = nullptr;
         return 0;
     }
+
+    texture->blockCompression = blockCompression;
     return texture;
 
 }
@@ -721,14 +768,22 @@ namespace detail {
 }
 
 template<typename TaskType=void>
-inline void addTexture(std::string path, TaskType* task=nullptr)
+inline void addTexture(std::string path, bool blockCompression=false, TaskType* task=nullptr)
 {
     zeno::log_debug("loading texture :{}", path);
     std::string native_path = std::filesystem::u8path(path).string();
+
+    bool should_reload = false;
+
     if (std::filesystem::exists(native_path)) {
         std::filesystem::file_time_type ftime = std::filesystem::last_write_time(native_path);
-        if(g_tex.count(path) && g_tex_last_write_time[path] == ftime) {
-            return;
+
+        if(g_tex_last_write_time[path] == ftime && g_tex.count(path) ) {
+
+            if (blockCompression == g_tex[path]->blockCompression) {
+                return;
+            }
+            should_reload = true;
         }
         g_tex_last_write_time[path] = ftime;
     }
@@ -740,7 +795,7 @@ inline void addTexture(std::string path, TaskType* task=nullptr)
     auto input = readData(native_path);
     std::string md5Hash = calculateMD5(input);
 
-    if (md5_path_mapping.count(md5Hash)) {
+    if (md5_path_mapping.count(md5Hash) && !should_reload) {
         g_tex[path] = g_tex[md5_path_mapping[md5Hash]];
         zeno::log_info("path {} reuse {} tex", path, md5_path_mapping[md5Hash]);
         return;
@@ -816,19 +871,24 @@ inline void addTexture(std::string path, TaskType* task=nullptr)
         }
         nx = std::max(img->userData().get2<int>("w"), 1);
         ny = std::max(img->userData().get2<int>("h"), 1);
-        int channels = std::max(img->userData().get2<int>("channels"), 3);
-        nc = 3;
+        nc = std::max(img->userData().get2<int>("channels"), 1);
+
+        if (nc < 4) {
 
-        if (channels == 3) {
             std::vector<unsigned char> ucdata;
-            ucdata.resize(img->verts.size()*3);
-            for(int i=0;i<img->verts.size()*3;i++)
-            {
-              ucdata[i] = (unsigned char)(((float*)img->verts.data())[i]*255.0);
+            ucdata.resize(img->verts.size() * nc);
+
+            for(size_t i=0; i<img->verts.size(); i+=1 ) {
+
+                for (int c=0; c<nc; ++c) {
+                    ucdata[i*nc+c] = (img->verts[i][c] * 255.0);
+                }
             }
-            g_tex[path] = makeCudaTexture(ucdata.data(), nx, ny, 3);
-        }
-        else {
+            g_tex[path] = makeCudaTexture(ucdata.data(), nx, ny, nc, blockCompression);
+
+        } else {
+
+            assert(nc == 4);
             std::vector<uchar4> data(nx * ny);
             auto &alpha = img->verts.attr<float>("alpha");
             for (auto i = 0; i < nx * ny; i++) {
@@ -838,7 +898,7 @@ inline void addTexture(std::string path, TaskType* task=nullptr)
                 data[i].w = (unsigned char)(alpha[i]        *255.0);
 
             }
-            g_tex[path] = makeCudaTexture((unsigned char *)data.data(), nx, ny, 4);
+            g_tex[path] = makeCudaTexture((unsigned char *)data.data(), nx, ny, 4, blockCompression);
         }
         
         lookupTexture = [&img](uint32_t idx) {
@@ -876,7 +936,7 @@ inline void addTexture(std::string path, TaskType* task=nullptr)
         nx = std::max(nx, 1);
         ny = std::max(ny, 1);
         
-        g_tex[path] = makeCudaTexture(img, nx, ny, nc);
+        g_tex[path] = makeCudaTexture(img, nx, ny, nc, blockCompression);
 
         lookupTexture = [img](uint32_t idx) {
             return (float)img[idx] / 255;
@@ -925,7 +985,7 @@ inline void addSkyTexture(std::string path) {
         calc_sky_cdf_map(tex, nx, ny, nc, lookupTexture);
     };
 
-    addTexture(path, &task);
+    addTexture(path, false, &task);
 }
 
 struct OptixShaderCore {

From f619e0ac6d305d89554916681f064f078fafb57c Mon Sep 17 00:00:00 2001
From: littlemine <wxlwxl1993@zju.edu.cn>
Date: Tue, 2 Jul 2024 18:19:06 +0800
Subject: [PATCH 23/24] fix build

---
 projects/CUDA/zpc                                               | 2 +-
 projects/CuLagrange/pbd/ConstraintsBuilder.cu                   | 2 +-
 .../CuLagrange/pbd/constraint_function_kernel/constraint.cuh    | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc
index 740079464b..282e0b1f19 160000
--- a/projects/CUDA/zpc
+++ b/projects/CUDA/zpc
@@ -1 +1 @@
-Subproject commit 740079464b56bfd84269610509bb5ab941710826
+Subproject commit 282e0b1f197fdf37b1c4a496b502b85d4d24f359
diff --git a/projects/CuLagrange/pbd/ConstraintsBuilder.cu b/projects/CuLagrange/pbd/ConstraintsBuilder.cu
index d3f86b7f60..b025c7e676 100644
--- a/projects/CuLagrange/pbd/ConstraintsBuilder.cu
+++ b/projects/CuLagrange/pbd/ConstraintsBuilder.cu
@@ -470,7 +470,7 @@ virtual void apply() override {
             rest_scale = rest_scale,
             eles = proxy<space>({},eles),
             verts = proxy<space>({},verts)] ZS_LAMBDA(auto ai,const auto& pair) mutable {
-                eles.tuple(dim_c<2>,"inds",ai) = pair.reinterpret_bits<float>();
+                eles.tuple(dim_c<2>,"inds",ai) = pair.template reinterpret_bits<float>();
                 auto v0 = verts.pack(dim_c<3>,"x",pair[0]);
                 auto v1 = verts.pack(dim_c<3>,"x",pair[1]);
                 eles("r",ai) = (v0 - v1).norm() * rest_scale;
diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh
index 3572e62c43..e7c55a7971 100644
--- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh
+++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh
@@ -976,6 +976,7 @@ constexpr bool solve_BendTwistConstraint(
 }
 
 // ----------------------------------------------------------------------------------------------
+#if 0
 template<typename VECTOR3d,typename SCALER>
 constexpr bool solve_PerpendiculaBisectorConstraint(
     const VECTOR3d &p0, SCALER invMass0,
@@ -1002,6 +1003,7 @@ constexpr bool solve_PerpendiculaBisectorConstraint(
 
     return true;
 }
+#endif
 
 //     // ----------------------------------------------------------------------------------------------
 //     template<typename VECTOR3d,typename SCALER>

From 4a431bc84399152604b813128307b81a51ac8054 Mon Sep 17 00:00:00 2001
From: littlemine <wxlwxl1993@zju.edu.cn>
Date: Thu, 4 Jul 2024 16:43:53 +0800
Subject: [PATCH 24/24] zsparticleperlinnoise

---
 projects/CUDA/utils/Primitives.cu | 94 +++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/projects/CUDA/utils/Primitives.cu b/projects/CUDA/utils/Primitives.cu
index 694bbeefff..0598963937 100644
--- a/projects/CUDA/utils/Primitives.cu
+++ b/projects/CUDA/utils/Primitives.cu
@@ -14,6 +14,8 @@
 #include <zeno/utils/vec.h>
 #include <zeno/zeno.h>
 
+#include "Noise.cuh"
+
 namespace zeno {
 
 /// utilities
@@ -74,6 +76,98 @@ float prim_reduce(typename ZenoParticles::particles_t &verts, float e, TransOp t
     return ret.getVal();
 }
 
+struct ZSParticlePerlinNoise : INode {
+    virtual void apply() override {
+        auto zspars = get_input<ZenoParticles>("zspars");
+        auto attrTag = get_input2<std::string>("Attribute");
+        auto opType = get_input2<std::string>("OpType");
+        auto frequency = get_input2<vec3f>("Frequency");
+        auto offset = get_input2<vec3f>("Offset");
+        auto roughness = get_input2<float>("Roughness");
+        auto turbulence = get_input2<int>("Turbulence");
+        auto amplitude = get_input2<float>("Amplitude");
+        auto attenuation = get_input2<float>("Attenuation");
+        auto mean = get_input2<vec3f>("MeanNoise");
+
+        bool isAccumulate = opType == "accumulate" ? true : false;
+
+        zs::SmallString tag = attrTag;
+
+        auto &tv = zspars->getParticles();
+
+        if (!tv.hasProperty(tag))
+            throw std::runtime_error(fmt::format("Attribute [{}] doesn't exist!", tag));
+        const int nchns = tv.getPropertySize(tag);
+
+        auto pol = zs::cuda_exec();
+        constexpr auto space = zs::execspace_e::cuda;
+
+        pol(zs::range(tv.size()),
+            [tvv = zs::proxy<space>({}, tv), tag, nchns, isAccumulate,
+             frequency = zs::vec<float, 3>::from_array(frequency), offset = zs::vec<float, 3>::from_array(offset),
+             roughness, turbulence, amplitude, attenuation,
+             mean = zs::vec<float, 3>::from_array(mean)] __device__(int no) mutable {
+                auto wcoord = tvv.pack(zs::dim_c<3>, "x", no);
+                auto pp = frequency * wcoord - offset;
+
+                float scale = amplitude;
+
+                if (nchns == 3) {
+                    // fractal Brownian motion
+                    auto fbm = zs::vec<float, 3>::uniform(0);
+                    for (int i = 0; i < turbulence; ++i, pp *= 2.f, scale *= roughness) {
+                        zs::vec<float, 3> pln{ZSPerlinNoise1::perlin(pp[0], pp[1], pp[2]),
+                                              ZSPerlinNoise1::perlin(pp[1], pp[2], pp[0]),
+                                              ZSPerlinNoise1::perlin(pp[2], pp[0], pp[1])};
+                        fbm += scale * pln;
+                    }
+                    auto noise = zs::vec<float, 3>{zs::pow(fbm[0], attenuation), zs::pow(fbm[1], attenuation),
+                                                   zs::pow(fbm[2], attenuation)} +
+                                 mean;
+
+                    if (isAccumulate)
+                        tvv.tuple(zs::dim_c<3>, tag, no) =
+                            tvv.pack(zs::dim_c<3>, tag, no) + noise;
+                    else
+                        tvv.tuple(zs::dim_c<3>, tag, no) = noise;
+
+                } else if (nchns == 1) {
+                    float fbm = 0;
+                    for (int i = 0; i < turbulence; ++i, pp *= 2.f, scale *= roughness) {
+                        float pln = ZSPerlinNoise1::perlin(pp[0], pp[1], pp[2]);
+                        fbm += scale * pln;
+                    }
+                    auto noise = zs::pow(fbm, attenuation) + mean[0];
+
+                    if (isAccumulate)
+                        tvv(tag, no) += noise;
+                    else
+                        tvv(tag, no) = noise;
+                }
+            });
+
+        set_output("zspars", zspars);
+    }
+};
+
+ZENDEFNODE(ZSParticlePerlinNoise, {/* inputs: */
+                               {"zspars",
+                                {"string", "Attribute", "v"},
+                                {"enum replace accumulate", "OpType", "accumulate"},
+                                {"vec3f", "Frequency", "1, 1, 1"},
+                                {"vec3f", "Offset", "0, 0, 0"},
+                                {"float", "Roughness", "0.5"},
+                                {"int", "Turbulence", "4"},
+                                {"float", "Amplitude", "1.0"},
+                                {"float", "Attenuation", "1.0"},
+                                {"vec3f", "MeanNoise", "0, 0, 0"}},
+                               /* outputs: */
+                               {"zspars"},
+                               /* params: */
+                               {},
+                               /* category: */
+                               {"Eulerian"}});
+
 struct ZSPrimitiveReduction : zeno::INode {
     struct pass_on {
         template <typename T>